/*
 * Convert the specified speech WAV file into text output
 * on the program's standard output.
 *
 * Diomidis Spinellis, October 2019
 * Based on https://stackoverflow.com/a/40002268/20520
 */

#include <iostream>
#include <sapi.h>
#include <sphelper.h>

int main(int argc, char* argv[])
{
	if (argc != 2) {
		std::cerr << "Usage: " << argv[0] << " file.wav\n";
		return 1;
	}

	::CoInitialize(NULL);

	HRESULT hr = S_OK;
	CComPtr<ISpStream> cpInputStream;
	CComPtr<ISpRecognizer> cpRecognizer;
	CComPtr<ISpRecoContext> cpRecoContext;
	CComPtr<ISpRecoGrammar> cpRecoGrammar;
	hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
	hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
	std::string sInputFileName(argv[1]);
	std::wstring wInputFileName(sInputFileName.begin(), sInputFileName.end());
	hr = cpInputStream->BindToFile(wInputFileName.c_str(), SPFM_OPEN_READONLY, NULL, NULL, SPFEI_ALL_EVENTS);
	if (FAILED(hr)) {
		std::cerr << "Unable to open " << argv[1] << '\n';
		return 1;
	}
	hr = cpRecognizer->SetInput(cpInputStream, TRUE);
	hr = cpRecognizer->CreateRecoContext(&cpRecoContext);
	hr = cpRecoContext->CreateGrammar(NULL, &cpRecoGrammar);
	hr = cpRecoGrammar->LoadDictation(NULL, SPLO_STATIC);

	hr = cpRecoContext->SetNotifyWin32Event();
	hr = cpRecoContext->SetInterest(SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM), SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM));
	hr = cpRecoGrammar->SetDictationState(SPRS_ACTIVE);
	BOOL fEndStreamReached = FALSE;

	while (!fEndStreamReached && cpRecoContext->WaitForNotifyEvent(INFINITE) == S_OK) {
		CSpEvent spEvent;
		ISpRecoResult *pPhrase;
		SPPHRASE *phrase;

		 while (!fEndStreamReached && spEvent.GetFrom(cpRecoContext) == S_OK) {

			switch (spEvent.eEventId) {
			case SPEI_RECOGNITION:
				pPhrase = spEvent.RecoResult();
				phrase = NULL;
				pPhrase->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, NULL, NULL);
				pPhrase->GetPhrase(&phrase);

				if (phrase == NULL || phrase->pElements == NULL)
					break;

				for (int i = 0; i < phrase->Rule.ulCountOfElements; i++)
					if (phrase->pElements[i].pszDisplayText != NULL)
						std::wcout << phrase->pElements[i].pszDisplayText << ' ';
				break;
			case SPEI_END_SR_STREAM:
				fEndStreamReached = TRUE;
				break;
			}
			spEvent.Clear();
		}
	}
	hr = cpRecoGrammar->SetDictationState(SPRS_INACTIVE);
	hr = cpRecoGrammar->UnloadDictation();
	hr = cpInputStream->Close();


	::CoUninitialize();

	std::wcout << '\n';
	return 0;
}