Created
August 1, 2017 15:36
-
-
Save sheikhfaisalanwar/c69683e4843c76dd5e65aaa93c41364e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import argparse | |
import base64 | |
import json | |
import time | |
import httplib2 | |
## Going with IBM | |
from watson_developer_cloud import SpeechToTextV1 | |
""" | |
You need the following | |
pip install --upgrade watson-developer-cloud | |
Credentials from IBM - Username and pass | |
""" | |
##IBM CREDS | |
speech_to_text = SpeechToTextV1( | |
username='07802403-20f7-477d-bd84-9c849f3658a4', | |
password='Q44hNwIZNEDW', | |
x_watson_learning_opt_out=False | |
) | |
def extract_audio(video_file, speech_file): | |
command = "ffmpeg -loglevel panic -i "+ str(video_file) + " -ab 160k -ac 1 -ar 16000 -vn " + str(speech_file) | |
subprocess.call(command, shell=True) | |
def main(video_file, speech_file): | |
"""Transcribe the given audio file extracted from the video file. | |
Args: | |
video_file: the name of the video stream to extract audio from | |
speech_file: the name of the audio file. | |
""" | |
extract_audio(video_file, speech_file) | |
with open(speech_file, 'rb') as speech: | |
start = time.time() | |
response = json.dumps(speech_to_text.recognize( | |
speech, content_type='audio/wav', timestamps=True, | |
word_confidence=True), | |
indent=2) | |
print ("####################") | |
print ("####################") | |
response_latency = str(time.time() - start) | |
print (response) | |
print response_latency | |
print ("####################") | |
print ("####################") | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('video_file', help='Full path of video file to get audio from') | |
parser.add_argument('speech_file', help='Full path of audio file to be recognized') | |
args = parser.parse_args() | |
main(args.video_file, args.speech_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment