/* * Convert the specified speech WAV file into text output * on the program's standard output. * * Diomidis Spinellis, October 2019 * Based on https://stackoverflow.com/a/40002268/20520 */ #include <iostream> #include <sapi.h> #include <sphelper.h> int main(int argc, char* argv[]) { if (argc != 2) { std::cerr << "Usage: " << argv[0] << " file.wav\n"; return 1; } ::CoInitialize(NULL); HRESULT hr = S_OK; CComPtr<ISpStream> cpInputStream; CComPtr<ISpRecognizer> cpRecognizer; CComPtr<ISpRecoContext> cpRecoContext; CComPtr<ISpRecoGrammar> cpRecoGrammar; hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer); hr = cpInputStream.CoCreateInstance(CLSID_SpStream); std::string sInputFileName(argv[1]); std::wstring wInputFileName(sInputFileName.begin(), sInputFileName.end()); hr = cpInputStream->BindToFile(wInputFileName.c_str(), SPFM_OPEN_READONLY, NULL, NULL, SPFEI_ALL_EVENTS); if (FAILED(hr)) { std::cerr << "Unable to open " << argv[1] << '\n'; return 1; } hr = cpRecognizer->SetInput(cpInputStream, TRUE); hr = cpRecognizer->CreateRecoContext(&cpRecoContext); hr = cpRecoContext->CreateGrammar(NULL, &cpRecoGrammar); hr = cpRecoGrammar->LoadDictation(NULL, SPLO_STATIC); hr = cpRecoContext->SetNotifyWin32Event(); hr = cpRecoContext->SetInterest(SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM), SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM)); hr = cpRecoGrammar->SetDictationState(SPRS_ACTIVE); BOOL fEndStreamReached = FALSE; while (!fEndStreamReached && cpRecoContext->WaitForNotifyEvent(INFINITE) == S_OK) { CSpEvent spEvent; ISpRecoResult *pPhrase; SPPHRASE *phrase; while (!fEndStreamReached && spEvent.GetFrom(cpRecoContext) == S_OK) { switch (spEvent.eEventId) { case SPEI_RECOGNITION: pPhrase = spEvent.RecoResult(); phrase = NULL; pPhrase->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, NULL, NULL); pPhrase->GetPhrase(&phrase); if (phrase == NULL || phrase->pElements == NULL) break; for (int i = 0; i < phrase->Rule.ulCountOfElements; i++) if (phrase->pElements[i].pszDisplayText != NULL) std::wcout << phrase->pElements[i].pszDisplayText << ' '; break; case SPEI_END_SR_STREAM: fEndStreamReached = TRUE; break; } spEvent.Clear(); } } hr = cpRecoGrammar->SetDictationState(SPRS_INACTIVE); hr = cpRecoGrammar->UnloadDictation(); hr = cpInputStream->Close(); ::CoUninitialize(); std::wcout << '\n'; return 0; }