sheikhfaisalanwar · August 1, 2017 15:36
diff --git a/ibm_audio_transcriber.py b/ibm_audio_transcriber.py

 import subprocess
 import argparse
 import base64
 import json
 import time
 import httplib2

 ## Going with IBM
 from watson_developer_cloud import SpeechToTextV1


 """
 You need the following
 pip install --upgrade watson-developer-cloud
 Credentials from IBM - Username and pass
 """

 ##IBM CREDS
 speech_to_text = SpeechToTextV1(
    username='07802403-20f7-477d-bd84-9c849f3658a4',
    password='Q44hNwIZNEDW',
    x_watson_learning_opt_out=False
 )

 def extract_audio(video_file, speech_file):
    command = "ffmpeg -loglevel panic -i "+ str(video_file) + " -ab 160k -ac 1 -ar 16000 -vn " + str(speech_file)
    subprocess.call(command, shell=True)


 def main(video_file, speech_file):
    """Transcribe the given audio file extracted from the video file.

    Args:
        video_file: the name of the video stream to extract audio from
        speech_file: the name of the audio file.
    """
    extract_audio(video_file, speech_file)

    with open(speech_file, 'rb') as speech:
        start = time.time()
        response = json.dumps(speech_to_text.recognize(
            speech, content_type='audio/wav', timestamps=True,
            word_confidence=True),
            indent=2)
        print ("####################")
        print ("####################")
        response_latency =  str(time.time() - start)

    print (response)
    print response_latency
    print ("####################")
    print ("####################")



 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('video_file', help='Full path of video file to get audio from')
    parser.add_argument('speech_file', help='Full path of audio file to be recognized')
    args = parser.parse_args()
    main(args.video_file, args.speech_file)

	import subprocess
	import argparse
	import base64
	import json
	import time
	import httplib2

	## Going with IBM
	from watson_developer_cloud import SpeechToTextV1


	"""
	You need the following
	pip install --upgrade watson-developer-cloud
	Credentials from IBM - Username and pass
	"""

	##IBM CREDS
	speech_to_text = SpeechToTextV1(
	username='07802403-20f7-477d-bd84-9c849f3658a4',
	password='Q44hNwIZNEDW',
	x_watson_learning_opt_out=False
	)

	def extract_audio(video_file, speech_file):
	command = "ffmpeg -loglevel panic -i "+ str(video_file) + " -ab 160k -ac 1 -ar 16000 -vn " + str(speech_file)
	subprocess.call(command, shell=True)


	def main(video_file, speech_file):
	"""Transcribe the given audio file extracted from the video file.

	Args:
	video_file: the name of the video stream to extract audio from
	speech_file: the name of the audio file.
	"""
	extract_audio(video_file, speech_file)

	with open(speech_file, 'rb') as speech:
	start = time.time()
	response = json.dumps(speech_to_text.recognize(
	speech, content_type='audio/wav', timestamps=True,
	word_confidence=True),
	indent=2)
	print ("####################")
	print ("####################")
	response_latency = str(time.time() - start)

	print (response)
	print response_latency
	print ("####################")
	print ("####################")



	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('video_file', help='Full path of video file to get audio from')
	parser.add_argument('speech_file', help='Full path of audio file to be recognized')
	args = parser.parse_args()
	main(args.video_file, args.speech_file)