lpabon · June 29, 2023 19:38
diff --git a/narrator.diff b/narrator.diff
 diff --git a/.gitignore b/.gitignore
 index cd5cc6c..13466b5 100644
 --- a/.gitignore
 +++ b/.gitignore
 @@ -3,4 +3,7 @@ venv
 __pycache__
 *.pyc
 *.txt
 +*.wav
 +*.mp3
 !requirements.txt
 +*.diff
 diff --git a/ai_presenter/ai_presenter.py b/ai_presenter/ai_presenter.py
 index 18c6df5..d20e135 100644
 --- a/ai_presenter/ai_presenter.py
 +++ b/ai_presenter/ai_presenter.py
 @@ -19,7 +19,7 @@ class AIPresenter:
             textai = self.generator.get_text()
             for key, scene in self.database.scenes.items():
                 logging.info(f"Working on scene: {scene.name} in " +
 -                             f"{scene.location}")
 +                            f"{scene.location}")
 
                 output = textai.generate(scene)
                 file.write(output + '\n')
 diff --git a/ai_presenter/config/voice.py b/ai_presenter/config/voice.py
 index c4ea3bd..646243b 100644
 --- a/ai_presenter/config/voice.py
 +++ b/ai_presenter/config/voice.py
 @@ -2,8 +2,8 @@
 # VoiceConfig can have passed in name, gender, age,
 # accent, and accent_strength and have these initialized
 class VoiceConfig:
 -    def __init__(self, name, gender, age, accent,
 -                 accent_strength, description):
 +    def __init__(self, name='', gender='', age='', accent='',
 +                 accent_strength=0, description=''):
         self.name = name
         self.gender = gender
         self.age = age
 diff --git a/ai_presenter/database.py b/ai_presenter/database.py
 index 02e05d4..7d8110e 100644
 --- a/ai_presenter/database.py
 +++ b/ai_presenter/database.py
 @@ -1,3 +1,4 @@
 +import logging
 from ai_presenter.config.config import Config
 
 
 @@ -28,6 +29,7 @@ class Actor:
         self.height = data['height']
         self.gender = data['gender']
         self.accent = data['accent']
 +        logging.info(f'actor: name:{self.name} gender:{self.gender}')
 
 
 class Scene:
 diff --git a/ai_presenter/text_ai/chatgpt.py b/ai_presenter/text_ai/chatgpt.py
 index 1278faf..a69faad 100644
 --- a/ai_presenter/text_ai/chatgpt.py
 +++ b/ai_presenter/text_ai/chatgpt.py
 @@ -17,7 +17,7 @@ class TextChatGPT(TextAi):
                 "content": "You will be provided with a set of characters, " +
                 "their description, and a scene in JSON format. " +
                 "Create dialogue using the plot and characters " +
 -                "provided and return it in JSON format."
 +                "provided and return it in JSON format. Add a narrator with key 'narrator' describing the characters, scene, and emotions"
             },
             {
                 "role": "user",
 @@ -33,10 +33,7 @@ class TextChatGPT(TextAi):
             },
             {
                 "role": "assistant",
 -                "content": '{"dialogue":[{"speaker":"Max Doe","message"' +
 -                ':"Joana, I must say, your taste in bagels is utterly ' +
 -                'appalling!"},{"speaker":"Joana Smith","message":' +
 -                '"Max, you are right."}]};'
 +                "content": '{"dialogue":[{"speaker":"narrator","message":"Max stood close to Joana."},{"speaker":"Max Doe","message":"Joana, I must say, your taste in bagels is utterly appalling!"},{"speaker":"Joana Smith","message":"Max, you are right."},{"speaker":"narrator","message":"Finally Max was happy."}]}'
             }
 
         ]
 @@ -53,21 +50,49 @@ class TextChatGPT(TextAi):
         self.messages.append(
             {"role": "user", "content": json.dumps(self.user_message)}
         )
 -        completion = openai.ChatCompletion.create(
 -            model="gpt-3.5-turbo",
 -            messages=self.messages,
 -        )
 -        # clear for next time
 -        # self.messages = []
 +
         self.user_message = {}
 +        messages = self.messages
 +        full_resp = ""
 
 -        resp = completion.choices[0].message.content
 -        self.messages.append(
 -            {"role": "assistant", "content": resp}
 -        )
 -        logging.info("Recieved " + resp)
 -        resp = json_trim(resp)
 -        try:
 -            return json.dumps(json.loads(resp))
 -        except Exception:
 -            return "{}"
 +        count = 5
 +        while count > 0:
 +            count -= 1
 +            completion = openai.ChatCompletion.create(
 +                model="gpt-3.5-turbo",
 +                messages=messages,
 +            )
 +
 +            resp = completion.choices[0].message.content
 +            finish_reason = completion.choices[0].finish_reason
 +            full_resp += resp
 +
 +            logging.info(">> Recieved: " + resp)
 +            logging.info(">> Have: " + full_resp)
 +            logging.info(">> finish_reason: " + completion.choices[0].finish_reason)
 +            if finish_reason == 'stop':
 +                logging.info('chatgpt: got all info')
 +                self.messages.append(
 +                    {"role": "assistant", "content": full_resp}
 +                )
 +                try:
 +                    resp = json_trim(resp)
 +                    return json.dumps(json.loads(resp))
 +                except Exception:
 +                    return "{}"
 +            elif finish_reason == 'length':
 +                logging.info(f'chatgpt: need more info, retrying, usage:{completion.choices[0].usage} count:{count}')
 +                messages += [
 +                    {"role": "assistant", "content": ""},
 +                ]
 +            else:
 +                raise Exception("finish reason is " + finish_reason)
 +
 +            ## DEBUG
 +            try:
 +                json.dumps(resp)
 +            except Exception:
 +                logging.critical("******* >> resp is not json but finish_reason: " + completion.choices[0].finish_reason)
 +
 +
 +        raise Exception('Tried too many times to talk to ChatGPT')
 diff --git a/ai_presenter/voice_ai/base.py b/ai_presenter/voice_ai/base.py
 index f03562f..48a76e2 100644
 --- a/ai_presenter/voice_ai/base.py
 +++ b/ai_presenter/voice_ai/base.py
 @@ -1,6 +1,6 @@
 from ai_presenter.database import Database
 from ai_presenter.config.voice import VoiceConfig
 -from elevenlabs import Iterator, Voice
 +from elevenlabs import Iterator, Voice, Voices
 import json
 import logging
 
 @@ -39,26 +39,27 @@ class VoiceAI:
 
     def create_character_db(self, line: str):
         json_string = line.strip()
 -        data = (json.loads(json_string))
 +        data = json.loads(json_string)
 
         for message in data['dialogue']:
             name = message['speaker']
 +
             if name not in self.characters:
 +                logging.info(f"creating character {name}")
                 try:
                     character_config = VoiceConfig(name,
 -                                                   self.actors[name].gender,
 -                                                   self.actors[name].age,
 -                                                   self.actors[name].accent,
 -                                                   1.99,
 -                                                   self.actors[name].
 -                                                   description)
 +                                                self.actors[name].gender,
 +                                                self.actors[name].age,
 +                                                self.actors[name].accent,
 +                                                1.99,
 +                                                self.actors[name].description)
                     self.characters[name] = self.new_actor(character_config)
                 except Exception:
                     character_config = VoiceConfig(name,
 -                                                   'male',
 -                                                   'middle_aged',
 -                                                   "british",
 -                                                   1.99,
 -                                                   f"This is the {name}")
 +                                                'male',
 +                                                'middle_aged',
 +                                                "british",
 +                                                1.99,
 +                                                f"This is the {name}")
                     self.characters[name] = self.new_actor(character_config)
         return data
 diff --git a/ai_presenter/voice_ai/elevenlabs.py b/ai_presenter/voice_ai/elevenlabs.py
 index 52aa8df..fb080ca 100644
 --- a/ai_presenter/voice_ai/elevenlabs.py
 +++ b/ai_presenter/voice_ai/elevenlabs.py
 @@ -7,6 +7,21 @@ import logging
 import os
 
 
 +class VoiceAIDefaultActorElevenLabs(VoiceAIActor):
 +    def __init__(self, config: VoiceConfig, voice: Voice):
 +        super().__init__(config)
 +        self.voice = voice
 +
 +    # .says takes the message and generates audio from that message
 +    # note: for the real voiceaiactor class, the elevenlabs generate
 +    # methods return raw data called audio which can be manipulated before
 +    # saving to a file(ie. concatenation)
 +    def says(self, message) -> (bytes | Iterator[bytes]):
 +        logging.info(f'{self.name} says {message}')
 +        audio = generate(text=message, model="eleven_monolingual_v1",
 +                         voice=self.voice)
 +        return audio
 +
 class VoiceAIActorElevenLabs(VoiceAIActor):
     def __init__(self, config: VoiceConfig):
         super().__init__(config)
 @@ -16,7 +31,7 @@ class VoiceAIActorElevenLabs(VoiceAIActor):
             f'I am {self.name}. I am a {self.age} year old ' + \
             f'{self.gender} with a {self.accent} accent.'
 
 -        logging.info(f"designing a voice for {self.name}")
 +        logging.info(f"designing a voice for {self.name} gender:{self.gender}")
         self.voice_design = VoiceDesign(name=self.name,
                                         text=self.sample_text,
                                         gender=self.gender,
 @@ -40,7 +55,12 @@ class ElevenLabs(VoiceAI):
     def __init__(self, db: Database):
         super().__init__(db)
 
 -    def new_actor(self, config):
 +    def new_actor(self, config) -> VoiceAIActor:
 +        if config.name == 'narrator':
 +            return VoiceAIDefaultActorElevenLabs(
 +                config, 
 +                Voice(voice_id='idofvoice', name='narrator'),
 +            )
         return VoiceAIActorElevenLabs(config)
 
     # make narrator actor
 @@ -68,7 +88,6 @@ class ElevenLabs(VoiceAI):
                 for message in data['dialogue']:
                     name = message['speaker']
                     text = message['message']
 -                    logging.info('ElevenLabs: Stitching together audio')
                     audio += self.characters[name].says(text)
         logging.info(f"ElevenLabs: Audio can be found in {output_file}")
         save(audio, output_file)
 diff --git a/ai_presenter/voice_ai/fake.py b/ai_presenter/voice_ai/fake.py
 index abee3bc..89a6c2e 100644
 --- a/ai_presenter/voice_ai/fake.py
 +++ b/ai_presenter/voice_ai/fake.py
 @@ -10,7 +10,7 @@ class VoiceAIActorFake(VoiceAIActor):
     def __init__(self, config: VoiceConfig):
         super().__init__(config)
 
 -    def says(self, message, emotion) -> (bytes | Iterator[bytes]):
 +    def says(self, message) -> (bytes | Iterator[bytes]):
         # .says takes the message and generates audio from that message
         # this audio gets saved to a file
         # personally don't think says needs a file passed to it bc
 @@ -18,7 +18,7 @@ class VoiceAIActorFake(VoiceAIActor):
         # methods return raw data called audio which can be manipulated before
         # saving to a file(ie. concatenation)
         logging.info(f'VoiceAIActorFake: {self.name} ' +
 -                     f'says {message} in a {emotion} way')
 +                     f'says {message}')
 
         audio = f'name: {self.name}\ngender: {self.gender}\n' + \
             f'age: {self.age}\naccent: {self.accent}\n' + \
 @@ -27,11 +27,10 @@ class VoiceAIActorFake(VoiceAIActor):
                 f'message: {message}\n\n'
         return audio
 
 -    def __get_voice(self, emotion) -> Voice:
 +    def __get_voice(self) -> Voice:
         logging.info(f'I am {self.name}. I am a {self.age} year old ' +
                      f'{self.gender} with a {self.accent} accent. I am ' +
 -                     f'currently speaking in a {emotion} tone because I' +
 -                     f' am {emotion}')
 +                     f'currently speaking')
 
 
 class VoiceAIFake(VoiceAI):
 @@ -62,9 +61,8 @@ class VoiceAIFake(VoiceAI):
                 for message in data['dialogue']:
                     name = message['speaker']
                     text = message['message']
 -                    emotion = message['emotion']
                     logging.info('VoiceAIFake: Stitching together audio')
 -                    audio += self.characters[name].says(text, emotion)
 +                    audio += self.characters[name].says(text)
 
         logging.info('VoiceAIFake: Generating audio file')
         with open(output_file, 'w') as out:
	diff --git a/.gitignore b/.gitignore
	index cd5cc6c..13466b5 100644
	--- a/.gitignore
	+++ b/.gitignore
	@@ -3,4 +3,7 @@ venv
	__pycache__
	*.pyc
	*.txt
	+*.wav
	+*.mp3
	!requirements.txt
	+*.diff
	diff --git a/ai_presenter/ai_presenter.py b/ai_presenter/ai_presenter.py
	index 18c6df5..d20e135 100644
	--- a/ai_presenter/ai_presenter.py
	+++ b/ai_presenter/ai_presenter.py
	@@ -19,7 +19,7 @@ class AIPresenter:
	textai = self.generator.get_text()
	for key, scene in self.database.scenes.items():
	logging.info(f"Working on scene: {scene.name} in " +
	- f"{scene.location}")
	+ f"{scene.location}")

	output = textai.generate(scene)
	file.write(output + '\n')
	diff --git a/ai_presenter/config/voice.py b/ai_presenter/config/voice.py
	index c4ea3bd..646243b 100644
	--- a/ai_presenter/config/voice.py
	+++ b/ai_presenter/config/voice.py
	@@ -2,8 +2,8 @@
	# VoiceConfig can have passed in name, gender, age,
	# accent, and accent_strength and have these initialized
	class VoiceConfig:
	- def __init__(self, name, gender, age, accent,
	- accent_strength, description):
	+ def __init__(self, name='', gender='', age='', accent='',
	+ accent_strength=0, description=''):
	self.name = name
	self.gender = gender
	self.age = age
	diff --git a/ai_presenter/database.py b/ai_presenter/database.py
	index 02e05d4..7d8110e 100644
	--- a/ai_presenter/database.py
	+++ b/ai_presenter/database.py
	@@ -1,3 +1,4 @@
	+import logging
	from ai_presenter.config.config import Config


	@@ -28,6 +29,7 @@ class Actor:
	self.height = data['height']
	self.gender = data['gender']
	self.accent = data['accent']
	+ logging.info(f'actor: name:{self.name} gender:{self.gender}')


	class Scene:
	diff --git a/ai_presenter/text_ai/chatgpt.py b/ai_presenter/text_ai/chatgpt.py
	index 1278faf..a69faad 100644
	--- a/ai_presenter/text_ai/chatgpt.py
	+++ b/ai_presenter/text_ai/chatgpt.py
	@@ -17,7 +17,7 @@ class TextChatGPT(TextAi):
	"content": "You will be provided with a set of characters, " +
	"their description, and a scene in JSON format. " +
	"Create dialogue using the plot and characters " +
	- "provided and return it in JSON format."
	+ "provided and return it in JSON format. Add a narrator with key 'narrator' describing the characters, scene, and emotions"
	},
	{
	"role": "user",
	@@ -33,10 +33,7 @@ class TextChatGPT(TextAi):
	},
	{
	"role": "assistant",
	- "content": '{"dialogue":[{"speaker":"Max Doe","message"' +
	- ':"Joana, I must say, your taste in bagels is utterly ' +
	- 'appalling!"},{"speaker":"Joana Smith","message":' +
	- '"Max, you are right."}]};'
	+ "content": '{"dialogue":[{"speaker":"narrator","message":"Max stood close to Joana."},{"speaker":"Max Doe","message":"Joana, I must say, your taste in bagels is utterly appalling!"},{"speaker":"Joana Smith","message":"Max, you are right."},{"speaker":"narrator","message":"Finally Max was happy."}]}'
	}

	]
	@@ -53,21 +50,49 @@ class TextChatGPT(TextAi):
	self.messages.append(
	{"role": "user", "content": json.dumps(self.user_message)}
	)
	- completion = openai.ChatCompletion.create(
	- model="gpt-3.5-turbo",
	- messages=self.messages,
	- )
	- # clear for next time
	- # self.messages = []
	+
	self.user_message = {}
	+ messages = self.messages
	+ full_resp = ""

	- resp = completion.choices[0].message.content
	- self.messages.append(
	- {"role": "assistant", "content": resp}
	- )
	- logging.info("Recieved " + resp)
	- resp = json_trim(resp)
	- try:
	- return json.dumps(json.loads(resp))
	- except Exception:
	- return "{}"
	+ count = 5
	+ while count > 0:
	+ count -= 1
	+ completion = openai.ChatCompletion.create(
	+ model="gpt-3.5-turbo",
	+ messages=messages,
	+ )
	+
	+ resp = completion.choices[0].message.content
	+ finish_reason = completion.choices[0].finish_reason
	+ full_resp += resp
	+
	+ logging.info(">> Recieved: " + resp)
	+ logging.info(">> Have: " + full_resp)
	+ logging.info(">> finish_reason: " + completion.choices[0].finish_reason)
	+ if finish_reason == 'stop':
	+ logging.info('chatgpt: got all info')
	+ self.messages.append(
	+ {"role": "assistant", "content": full_resp}
	+ )
	+ try:
	+ resp = json_trim(resp)
	+ return json.dumps(json.loads(resp))
	+ except Exception:
	+ return "{}"
	+ elif finish_reason == 'length':
	+ logging.info(f'chatgpt: need more info, retrying, usage:{completion.choices[0].usage} count:{count}')
	+ messages += [
	+ {"role": "assistant", "content": ""},
	+ ]
	+ else:
	+ raise Exception("finish reason is " + finish_reason)
	+
	+ ## DEBUG
	+ try:
	+ json.dumps(resp)
	+ except Exception:
	+ logging.critical("******* >> resp is not json but finish_reason: " + completion.choices[0].finish_reason)
	+
	+
	+ raise Exception('Tried too many times to talk to ChatGPT')
	diff --git a/ai_presenter/voice_ai/base.py b/ai_presenter/voice_ai/base.py
	index f03562f..48a76e2 100644
	--- a/ai_presenter/voice_ai/base.py
	+++ b/ai_presenter/voice_ai/base.py
	@@ -1,6 +1,6 @@
	from ai_presenter.database import Database
	from ai_presenter.config.voice import VoiceConfig
	-from elevenlabs import Iterator, Voice
	+from elevenlabs import Iterator, Voice, Voices
	import json
	import logging

	@@ -39,26 +39,27 @@ class VoiceAI:

	def create_character_db(self, line: str):
	json_string = line.strip()
	- data = (json.loads(json_string))
	+ data = json.loads(json_string)

	for message in data['dialogue']:
	name = message['speaker']
	+
	if name not in self.characters:
	+ logging.info(f"creating character {name}")
	try:
	character_config = VoiceConfig(name,
	- self.actors[name].gender,
	- self.actors[name].age,
	- self.actors[name].accent,
	- 1.99,
	- self.actors[name].
	- description)
	+ self.actors[name].gender,
	+ self.actors[name].age,
	+ self.actors[name].accent,
	+ 1.99,
	+ self.actors[name].description)
	self.characters[name] = self.new_actor(character_config)
	except Exception:
	character_config = VoiceConfig(name,
	- 'male',
	- 'middle_aged',
	- "british",
	- 1.99,
	- f"This is the {name}")
	+ 'male',
	+ 'middle_aged',
	+ "british",
	+ 1.99,
	+ f"This is the {name}")
	self.characters[name] = self.new_actor(character_config)
	return data
	diff --git a/ai_presenter/voice_ai/elevenlabs.py b/ai_presenter/voice_ai/elevenlabs.py
	index 52aa8df..fb080ca 100644
	--- a/ai_presenter/voice_ai/elevenlabs.py
	+++ b/ai_presenter/voice_ai/elevenlabs.py
	@@ -7,6 +7,21 @@ import logging
	import os


	+class VoiceAIDefaultActorElevenLabs(VoiceAIActor):
	+ def __init__(self, config: VoiceConfig, voice: Voice):
	+ super().__init__(config)
	+ self.voice = voice
	+
	+ # .says takes the message and generates audio from that message
	+ # note: for the real voiceaiactor class, the elevenlabs generate
	+ # methods return raw data called audio which can be manipulated before
	+ # saving to a file(ie. concatenation)
	+ def says(self, message) -> (bytes \| Iterator[bytes]):
	+ logging.info(f'{self.name} says {message}')
	+ audio = generate(text=message, model="eleven_monolingual_v1",
	+ voice=self.voice)
	+ return audio
	+
	class VoiceAIActorElevenLabs(VoiceAIActor):
	def __init__(self, config: VoiceConfig):
	super().__init__(config)
	@@ -16,7 +31,7 @@ class VoiceAIActorElevenLabs(VoiceAIActor):
	f'I am {self.name}. I am a {self.age} year old ' + \
	f'{self.gender} with a {self.accent} accent.'

	- logging.info(f"designing a voice for {self.name}")
	+ logging.info(f"designing a voice for {self.name} gender:{self.gender}")
	self.voice_design = VoiceDesign(name=self.name,
	text=self.sample_text,
	gender=self.gender,
	@@ -40,7 +55,12 @@ class ElevenLabs(VoiceAI):
	def __init__(self, db: Database):
	super().__init__(db)

	- def new_actor(self, config):
	+ def new_actor(self, config) -> VoiceAIActor:
	+ if config.name == 'narrator':
	+ return VoiceAIDefaultActorElevenLabs(
	+ config,
	+ Voice(voice_id='idofvoice', name='narrator'),
	+ )
	return VoiceAIActorElevenLabs(config)

	# make narrator actor
	@@ -68,7 +88,6 @@ class ElevenLabs(VoiceAI):
	for message in data['dialogue']:
	name = message['speaker']
	text = message['message']
	- logging.info('ElevenLabs: Stitching together audio')
	audio += self.characters[name].says(text)
	logging.info(f"ElevenLabs: Audio can be found in {output_file}")
	save(audio, output_file)
	diff --git a/ai_presenter/voice_ai/fake.py b/ai_presenter/voice_ai/fake.py
	index abee3bc..89a6c2e 100644
	--- a/ai_presenter/voice_ai/fake.py
	+++ b/ai_presenter/voice_ai/fake.py
	@@ -10,7 +10,7 @@ class VoiceAIActorFake(VoiceAIActor):
	def __init__(self, config: VoiceConfig):
	super().__init__(config)

	- def says(self, message, emotion) -> (bytes \| Iterator[bytes]):
	+ def says(self, message) -> (bytes \| Iterator[bytes]):
	# .says takes the message and generates audio from that message
	# this audio gets saved to a file
	# personally don't think says needs a file passed to it bc
	@@ -18,7 +18,7 @@ class VoiceAIActorFake(VoiceAIActor):
	# methods return raw data called audio which can be manipulated before
	# saving to a file(ie. concatenation)
	logging.info(f'VoiceAIActorFake: {self.name} ' +
	- f'says {message} in a {emotion} way')
	+ f'says {message}')

	audio = f'name: {self.name}\ngender: {self.gender}\n' + \
	f'age: {self.age}\naccent: {self.accent}\n' + \
	@@ -27,11 +27,10 @@ class VoiceAIActorFake(VoiceAIActor):
	f'message: {message}\n\n'
	return audio

	- def __get_voice(self, emotion) -> Voice:
	+ def __get_voice(self) -> Voice:
	logging.info(f'I am {self.name}. I am a {self.age} year old ' +
	f'{self.gender} with a {self.accent} accent. I am ' +
	- f'currently speaking in a {emotion} tone because I' +
	- f' am {emotion}')
	+ f'currently speaking')


	class VoiceAIFake(VoiceAI):
	@@ -62,9 +61,8 @@ class VoiceAIFake(VoiceAI):
	for message in data['dialogue']:
	name = message['speaker']
	text = message['message']
	- emotion = message['emotion']
	logging.info('VoiceAIFake: Stitching together audio')
	- audio += self.characters[name].says(text, emotion)
	+ audio += self.characters[name].says(text)

	logging.info('VoiceAIFake: Generating audio file')
	with open(output_file, 'w') as out: