Skip to content

Instantly share code, notes, and snippets.

@sheikhfaisalanwar
Created August 1, 2017 15:36
Show Gist options
  • Save sheikhfaisalanwar/c69683e4843c76dd5e65aaa93c41364e to your computer and use it in GitHub Desktop.
Save sheikhfaisalanwar/c69683e4843c76dd5e65aaa93c41364e to your computer and use it in GitHub Desktop.
import subprocess
import argparse
import base64
import json
import time
import httplib2
## Going with IBM
from watson_developer_cloud import SpeechToTextV1
"""
You need the following
pip install --upgrade watson-developer-cloud
Credentials from IBM - Username and pass
"""
##IBM CREDS
speech_to_text = SpeechToTextV1(
username='07802403-20f7-477d-bd84-9c849f3658a4',
password='Q44hNwIZNEDW',
x_watson_learning_opt_out=False
)
def extract_audio(video_file, speech_file):
command = "ffmpeg -loglevel panic -i "+ str(video_file) + " -ab 160k -ac 1 -ar 16000 -vn " + str(speech_file)
subprocess.call(command, shell=True)
def main(video_file, speech_file):
"""Transcribe the given audio file extracted from the video file.
Args:
video_file: the name of the video stream to extract audio from
speech_file: the name of the audio file.
"""
extract_audio(video_file, speech_file)
with open(speech_file, 'rb') as speech:
start = time.time()
response = json.dumps(speech_to_text.recognize(
speech, content_type='audio/wav', timestamps=True,
word_confidence=True),
indent=2)
print ("####################")
print ("####################")
response_latency = str(time.time() - start)
print (response)
print response_latency
print ("####################")
print ("####################")
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('video_file', help='Full path of video file to get audio from')
parser.add_argument('speech_file', help='Full path of audio file to be recognized')
args = parser.parse_args()
main(args.video_file, args.speech_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment