maciejkorsan · June 19, 2024 13:33
diff --git a/transcribe.py b/transcribe.py
 import http.client
 import json
 import os
 import sys
 import mimetypes

 def transcribe_audio(api_key, audio_file_path):
    conn = http.client.HTTPSConnection("api.openai.com")
    boundary = "----WebKitFormBoundary7MA4YWxkTrZu0gW"
    
    with open(audio_file_path, "rb") as audio_file:
        audio_data = audio_file.read()
    
    payload = (
        f"--{boundary}\r\nContent-Disposition: form-data; name=\"file\"; "
        f"filename=\"{os.path.basename(audio_file_path)}\"\r\nContent-Type: "
        f"{mimetypes.guess_type(audio_file_path)[0]}\r\n\r\n"
    ).encode('utf-8') + audio_data + (
        f"\r\n--{boundary}\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\n"
        f"whisper-1\r\n--{boundary}--"
    ).encode('utf-8')
    
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": f"multipart/form-data; boundary={boundary}"
    }
    
    conn.request("POST", "/v1/audio/transcriptions", body=payload, headers=headers)
    res = conn.getresponse()
    data = res.read()
    conn.close()
    
    response_json = json.loads(data.decode("utf-8"))
    
    if "text" in response_json:
        return response_json["text"]
    else:
        raise ValueError(f"Unexpected response: {response_json}")

 def summarize_text(api_key, text):
    conn = http.client.HTTPSConnection("api.openai.com")
    
    payload = json.dumps({
        "model": "gpt-3.5-turbo",
        "messages": [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Summarize the following text, and don't translate it to english if it's not in english: {text}"}
        ]
    })
    
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    
    conn.request("POST", "/v1/chat/completions", body=payload, headers=headers)
    res = conn.getresponse()
    data = res.read()
    conn.close()
    
    response_json = json.loads(data.decode("utf-8"))
    
    if "choices" in response_json and len(response_json["choices"]) > 0:
        return response_json["choices"][0]["message"]["content"]
    else:
        raise ValueError(f"Unexpected response: {response_json}")

 def save_text_to_file(text, file_path):
    with open(file_path, "w") as file:
        file.write(text)

 def main(audio_file_path):
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("API key not found. Set the OPENAI_API_KEY environment variable.")

    base_filename = os.path.splitext(os.path.basename(audio_file_path))[0]
    transcription_file_path = f"transcribed-{base_filename}.txt"
    summary_file_path = f"summary-{base_filename}.txt"
    
    try:
        transcription = transcribe_audio(api_key, audio_file_path)
        print("Transcription:", transcription)
        save_text_to_file(transcription, transcription_file_path)
        print(f"Transcription saved to: {transcription_file_path}")
        
        summary = summarize_text(api_key, transcription)
        print("Summary:", summary)
        save_text_to_file(summary, summary_file_path)
        print(f"Summary saved to: {summary_file_path}")
    except Exception as e:
        print(f"Error: {e}")

 if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python script.py /path/to/audio_message.wav")
        sys.exit(1)
    
    audio_file_path = sys.argv[1]
    main(audio_file_path)
	import http.client
	import json
	import os
	import sys
	import mimetypes

	def transcribe_audio(api_key, audio_file_path):
	conn = http.client.HTTPSConnection("api.openai.com")
	boundary = "----WebKitFormBoundary7MA4YWxkTrZu0gW"

	with open(audio_file_path, "rb") as audio_file:
	audio_data = audio_file.read()

	payload = (
	f"--{boundary}\r\nContent-Disposition: form-data; name=\"file\"; "
	f"filename=\"{os.path.basename(audio_file_path)}\"\r\nContent-Type: "
	f"{mimetypes.guess_type(audio_file_path)[0]}\r\n\r\n"
	).encode('utf-8') + audio_data + (
	f"\r\n--{boundary}\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\n"
	f"whisper-1\r\n--{boundary}--"
	).encode('utf-8')

	headers = {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": f"multipart/form-data; boundary={boundary}"
	}

	conn.request("POST", "/v1/audio/transcriptions", body=payload, headers=headers)
	res = conn.getresponse()
	data = res.read()
	conn.close()

	response_json = json.loads(data.decode("utf-8"))

	if "text" in response_json:
	return response_json["text"]
	else:
	raise ValueError(f"Unexpected response: {response_json}")

	def summarize_text(api_key, text):
	conn = http.client.HTTPSConnection("api.openai.com")

	payload = json.dumps({
	"model": "gpt-3.5-turbo",
	"messages": [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": f"Summarize the following text, and don't translate it to english if it's not in english: {text}"}
	]
	})

	headers = {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json"
	}

	conn.request("POST", "/v1/chat/completions", body=payload, headers=headers)
	res = conn.getresponse()
	data = res.read()
	conn.close()

	response_json = json.loads(data.decode("utf-8"))

	if "choices" in response_json and len(response_json["choices"]) > 0:
	return response_json["choices"][0]["message"]["content"]
	else:
	raise ValueError(f"Unexpected response: {response_json}")

	def save_text_to_file(text, file_path):
	with open(file_path, "w") as file:
	file.write(text)

	def main(audio_file_path):
	api_key = os.getenv("OPENAI_API_KEY")
	if not api_key:
	raise ValueError("API key not found. Set the OPENAI_API_KEY environment variable.")

	base_filename = os.path.splitext(os.path.basename(audio_file_path))[0]
	transcription_file_path = f"transcribed-{base_filename}.txt"
	summary_file_path = f"summary-{base_filename}.txt"

	try:
	transcription = transcribe_audio(api_key, audio_file_path)
	print("Transcription:", transcription)
	save_text_to_file(transcription, transcription_file_path)
	print(f"Transcription saved to: {transcription_file_path}")

	summary = summarize_text(api_key, transcription)
	print("Summary:", summary)
	save_text_to_file(summary, summary_file_path)
	print(f"Summary saved to: {summary_file_path}")
	except Exception as e:
	print(f"Error: {e}")

	if __name__ == "__main__":
	if len(sys.argv) != 2:
	print("Usage: python script.py /path/to/audio_message.wav")
	sys.exit(1)

	audio_file_path = sys.argv[1]
	main(audio_file_path)