graylan0 · February 2, 2024 11:28
diff --git a/gistfile1.txt b/gistfile1.txt
 import asyncio
 from pydub import AudioSegment
 from kivy.lang import Builder
 from kivymd.app import MDApp
 from kivymd.uix.filemanager import MDFileManager
 from kivy.core.window import Window
 from concurrent.futures import ThreadPoolExecutor
 from kivy.clock import Clock
 import os
 import whisper
 from llama_cpp import Llama

 async def load_llama_model():
    try:
        return Llama(model_path="path/to/your/llama/model", n_gpu_layers=-1)
    except Exception as llama_load_error:
        print(f"Error loading LLaMA model: {llama_load_error}")
        return None

 # Use ThreadPoolExecutor to load the LLaMA model in a separate thread
 with ThreadPoolExecutor() as executor:
    llama_model = asyncio.run(executor.submit(load_llama_model))

 try:
    whisper_model = whisper.load_model("base")
 except Exception as whisper_load_error:
    print(f"Error loading Whisper model: {whisper_load_error}")
    whisper_model = None

 KV = '''
 BoxLayout:
    orientation: 'vertical'
    MDToolbar:
        title: "Audio Redactor"
        elevation: 10
    FloatLayout:
        MDRaisedButton:
            text: "Load Audio"
            pos_hint: {'center_x': 0.5, 'center_y': 0.6}
            on_release: app.file_manager_open()
        MDRaisedButton:
            text: "Process Audio"
            pos_hint: {'center_x': 0.5, 'center_y': 0.4}
            on_release: app.process_audio()
 '''

 class AudioRedactorApp(MDApp):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.audio_files = []
        Window.bind(on_dropfile=self.on_file_drop)
        self.file_manager = MDFileManager(exit_manager=self.exit_manager, select_path=self.select_path, preview=True)
        self.executor = ThreadPoolExecutor(max_workers=4)

    def build(self):
        return Builder.load_string(KV)

    def file_manager_open(self):
        self.file_manager.show('/')
        self.manager_open = True

    def select_path(self, path):
        self.audio_files.append(path)
        self.exit_manager()

    def exit_manager(self, *args):
        self.manager_open = False
        self.file_manager.close()

    def on_file_drop(self, window, file_path):
        self.audio_files.append(file_path.decode('utf-8'))

    def process_audio(self):
        asyncio.run(self.async_process_audio_files())

    async def async_process_audio_files(self):
        loop = asyncio.get_running_loop()
        for audio_path in self.audio_files:
            transcription = await loop.run_in_executor(self.executor, self.transcribe_audio, audio_path)
            sensitive_info = await loop.run_in_executor(self.executor, self.identify_sensitive_info, transcription)
            Clock.schedule_once(lambda dt: self.bleep_sensitive_parts(audio_path, sensitive_info), 0)

    def transcribe_audio(self, audio_path):
        if whisper_model:
            result = whisper_model.transcribe(audio_path)
            return result["text"]
        return ""

    def identify_sensitive_info(self, formatted_transcript):
        if llama_model:
            detailed_prompt = f"""
            Please read the following transcript carefully. Your task is to identify and list any sensitive information contained within the text according to the guidelines provided below:

            1. Personal Identifiable Information (PII): Any names, email addresses, phone numbers, or social security numbers.
            2. Financial Information: Credit card numbers, bank account details, or any financial transaction details.
            3. Health Information: Medical records, treatment history, or any health condition descriptions.
            4. Confidential Conversations: Any private discussions, business secrets, or confidential agreements.
            5. Explicit Content: Any explicit language or content not suitable for all audiences.
            6. Location Data: Specific addresses, geographical coordinates, or any location-specific details.

            Guidelines:
            - List each piece of sensitive information found, categorized by the rules above.
            - Provide the type of sensitive information and a brief description or the exact text, if applicable.
            - If no sensitive information is found, state "No sensitive information identified."

            Transcript:
            "{formatted_transcript}"

            Identified Sensitive Information:
            """
            output = llama_model(formatted_transcript, max_tokens=150, stop=["\n"], prompt=detailed_prompt)
            return output['choices'][0]['text'].strip()
        return ""

    def bleep_sensitive_parts(self, audio_path, sensitive_segments):
        if not sensitive_segments:
            return

        sound = AudioSegment.from_mp3(audio_path)
        bleep = AudioSegment.silent(duration=1000)
        for segment_info in sensitive_segments.split('\n'):
            if '-' in segment_info:
                start_time, end_time = map(lambda x: float(x) * 1000, segment_info.split('-'))
                sound = sound[:start_time] + bleep + sound[end_time:]
        output_path = "redacted_" + os.path.basename(audio_path)
        sound.export(output_path, format="mp3")

 if __name__ == '__main__':
    AudioRedactorApp().run()
	import asyncio
	from pydub import AudioSegment
	from kivy.lang import Builder
	from kivymd.app import MDApp
	from kivymd.uix.filemanager import MDFileManager
	from kivy.core.window import Window
	from concurrent.futures import ThreadPoolExecutor
	from kivy.clock import Clock
	import os
	import whisper
	from llama_cpp import Llama

	async def load_llama_model():
	try:
	return Llama(model_path="path/to/your/llama/model", n_gpu_layers=-1)
	except Exception as llama_load_error:
	print(f"Error loading LLaMA model: {llama_load_error}")
	return None

	# Use ThreadPoolExecutor to load the LLaMA model in a separate thread
	with ThreadPoolExecutor() as executor:
	llama_model = asyncio.run(executor.submit(load_llama_model))

	try:
	whisper_model = whisper.load_model("base")
	except Exception as whisper_load_error:
	print(f"Error loading Whisper model: {whisper_load_error}")
	whisper_model = None

	KV = '''
	BoxLayout:
	orientation: 'vertical'
	MDToolbar:
	title: "Audio Redactor"
	elevation: 10
	FloatLayout:
	MDRaisedButton:
	text: "Load Audio"
	pos_hint: {'center_x': 0.5, 'center_y': 0.6}
	on_release: app.file_manager_open()
	MDRaisedButton:
	text: "Process Audio"
	pos_hint: {'center_x': 0.5, 'center_y': 0.4}
	on_release: app.process_audio()
	'''

	class AudioRedactorApp(MDApp):
	def __init__(self, **kwargs):
	super().__init__(**kwargs)
	self.audio_files = []
	Window.bind(on_dropfile=self.on_file_drop)
	self.file_manager = MDFileManager(exit_manager=self.exit_manager, select_path=self.select_path, preview=True)
	self.executor = ThreadPoolExecutor(max_workers=4)

	def build(self):
	return Builder.load_string(KV)

	def file_manager_open(self):
	self.file_manager.show('/')
	self.manager_open = True

	def select_path(self, path):
	self.audio_files.append(path)
	self.exit_manager()

	def exit_manager(self, *args):
	self.manager_open = False
	self.file_manager.close()

	def on_file_drop(self, window, file_path):
	self.audio_files.append(file_path.decode('utf-8'))

	def process_audio(self):
	asyncio.run(self.async_process_audio_files())

	async def async_process_audio_files(self):
	loop = asyncio.get_running_loop()
	for audio_path in self.audio_files:
	transcription = await loop.run_in_executor(self.executor, self.transcribe_audio, audio_path)
	sensitive_info = await loop.run_in_executor(self.executor, self.identify_sensitive_info, transcription)
	Clock.schedule_once(lambda dt: self.bleep_sensitive_parts(audio_path, sensitive_info), 0)

	def transcribe_audio(self, audio_path):
	if whisper_model:
	result = whisper_model.transcribe(audio_path)
	return result["text"]
	return ""

	def identify_sensitive_info(self, formatted_transcript):
	if llama_model:
	detailed_prompt = f"""
	Please read the following transcript carefully. Your task is to identify and list any sensitive information contained within the text according to the guidelines provided below:

	1. Personal Identifiable Information (PII): Any names, email addresses, phone numbers, or social security numbers.
	2. Financial Information: Credit card numbers, bank account details, or any financial transaction details.
	3. Health Information: Medical records, treatment history, or any health condition descriptions.
	4. Confidential Conversations: Any private discussions, business secrets, or confidential agreements.
	5. Explicit Content: Any explicit language or content not suitable for all audiences.
	6. Location Data: Specific addresses, geographical coordinates, or any location-specific details.

	Guidelines:
	- List each piece of sensitive information found, categorized by the rules above.
	- Provide the type of sensitive information and a brief description or the exact text, if applicable.
	- If no sensitive information is found, state "No sensitive information identified."

	Transcript:
	"{formatted_transcript}"

	Identified Sensitive Information:
	"""
	output = llama_model(formatted_transcript, max_tokens=150, stop=["\n"], prompt=detailed_prompt)
	return output['choices'][0]['text'].strip()
	return ""

	def bleep_sensitive_parts(self, audio_path, sensitive_segments):
	if not sensitive_segments:
	return

	sound = AudioSegment.from_mp3(audio_path)
	bleep = AudioSegment.silent(duration=1000)
	for segment_info in sensitive_segments.split('\n'):
	if '-' in segment_info:
	start_time, end_time = map(lambda x: float(x) * 1000, segment_info.split('-'))
	sound = sound[:start_time] + bleep + sound[end_time:]
	output_path = "redacted_" + os.path.basename(audio_path)
	sound.export(output_path, format="mp3")

	if __name__ == '__main__':
	AudioRedactorApp().run()