Last active
June 25, 2025 14:42
-
-
Save garyachy/1ef8367784bedb3959d86e7a45db907e to your computer and use it in GitHub Desktop.
Transcriber
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class AudioTranscriber | |
{ | |
public: | |
AudioTranscriber( | |
std::string name, | |
const std::string &model_path, | |
uint32_t input_sample_rate, | |
uint32_t output_sample_rate, | |
uint32_t window_size_seconds, | |
G711ToPcmTranscoder &transcoder, | |
int transcription_interval_seconds | |
): | |
name_( std::move( name ) ), | |
model_path_( model_path ), | |
input_sample_rate_( input_sample_rate ), | |
output_sample_rate_( output_sample_rate ), | |
window_size_seconds_( window_size_seconds ), | |
max_pcm_samples_( output_sample_rate * window_size_seconds ), | |
transcoder_( transcoder ), | |
transcription_interval_seconds_( transcription_interval_seconds ) | |
{ | |
whisper_context_params cparams = whisper_context_default_params(); | |
ctx_ = whisper_init_from_file_with_params( model_path_.c_str(), cparams ); | |
if( !ctx_ ) | |
{ | |
throw std::runtime_error( | |
"Failed to initialize whisper context. Check model path " | |
"and whisper.cpp setup." | |
); | |
} | |
wparams_ = whisper_full_default_params( WHISPER_SAMPLING_GREEDY ); | |
wparams_.print_progress = false; | |
wparams_.print_special = false; | |
wparams_.print_timestamps = false; | |
wparams_.print_realtime = false; | |
wparams_.n_threads = 8; | |
} | |
~AudioTranscriber() | |
{ | |
whisper_free( ctx_ ); | |
} | |
void process_rtp_payload( const u_char *rtp_payload, int rtp_payload_size ) | |
{ | |
auto pcm_chunk = transcoder_.transcode( rtp_payload, rtp_payload_size ); | |
add_pcm_data( pcm_chunk ); | |
add_samples( rtp_payload_size ); // For G.711, 1 byte = 1 sample | |
if( get_accumulated_samples() >= | |
get_input_sample_rate() * transcription_interval_seconds_ ) | |
{ | |
transcribe(); | |
reset_accumulated_samples(); | |
} | |
} | |
void finalize() | |
{ | |
add_pcm_data( transcoder_.flush() ); | |
transcribe(); | |
} | |
void add_pcm_data( const std::vector<float> &pcm_chunk ) | |
{ | |
pcm_data_.insert( pcm_data_.end(), pcm_chunk.begin(), pcm_chunk.end() ); | |
// Keep only the last window_size_seconds_ seconds of audio | |
if( pcm_data_.size() > max_pcm_samples_ ) | |
{ | |
pcm_data_.erase( | |
pcm_data_.begin(), | |
pcm_data_.begin() + ( pcm_data_.size() - max_pcm_samples_ ) | |
); | |
} | |
} | |
void set_input_sample_rate( uint32_t rate ) | |
{ | |
input_sample_rate_ = rate; | |
} | |
uint32_t get_input_sample_rate() const | |
{ | |
return input_sample_rate_; | |
} | |
void add_samples( size_t count ) | |
{ | |
accumulated_samples_ += count; | |
} | |
size_t get_accumulated_samples() const | |
{ | |
return accumulated_samples_; | |
} | |
void reset_accumulated_samples() | |
{ | |
accumulated_samples_ = 0; | |
} | |
void transcribe() const | |
{ | |
if( pcm_data_.empty() ) | |
{ | |
return; | |
} | |
if( whisper_full( ctx_, wparams_, pcm_data_.data(), pcm_data_.size() ) == | |
0 ) | |
{ | |
const int n_segments = whisper_full_n_segments( ctx_ ); | |
std::string full_transcription; | |
for( int i = 0; i < n_segments; ++i ) | |
{ | |
const char *text = whisper_full_get_segment_text( ctx_, i ); | |
if( text ) | |
{ | |
full_transcription += text; | |
} | |
} | |
} | |
} | |
size_t pcm_data_size() const | |
{ | |
return pcm_data_.size(); | |
} | |
private: | |
std::string name_; | |
std::string model_path_; | |
std::vector<float> pcm_data_; | |
whisper_context *ctx_ = nullptr; | |
uint32_t input_sample_rate_; | |
uint32_t output_sample_rate_; | |
size_t accumulated_samples_ = 0; | |
const size_t max_pcm_samples_; | |
uint32_t window_size_seconds_; | |
whisper_full_params wparams_; | |
G711ToPcmTranscoder &transcoder_; | |
int transcription_interval_seconds_; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment