narrator-david-attenburo/leageoflegends/narrator.py

112 lines
3.0 KiB
Python

import os
from openai import OpenAI
import base64
import json
import time
import simpleaudio as sa
import errno
from elevenlabs import generate, play, set_api_key, voices
from playsound import playsound
client = OpenAI()
set_api_key(os.environ.get("ELEVENLABS_API_KEY"))
def encode_image(image_path):
while True:
try:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
except IOError as e:
if e.errno != errno.EACCES:
# Not a "file in use" error, re-raise
raise
# File is being written to, wait a bit and retry
time.sleep(0.1)
def play_audio(text):
audio = generate(text, voice=os.environ.get("ELEVENLABS_VOICE_ID"))
unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
dir_path = os.path.join("narration", unique_id)
os.makedirs(dir_path, exist_ok=True)
file_path = os.path.join(dir_path, "audio.wav")
with open(file_path, "wb") as f:
f.write(audio)
# play(audio)
# playsound(file_path)
def generate_new_line(base64_image):
return [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image"},
{
"type": "image_url",
"image_url": f"data:image/jpeg;base64,{base64_image}",
},
],
},
]
def analyze_image(base64_image, script):
response = client.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "system",
"content": """
You are real commentator, express more.
Narrate with intensity.
You are watching a game.
Don't repeat yourself. Comment in one line.
Focus on the minute details.
Mention the individual by what the text near the head says.
Focus on what's happenning in the game.
Comments not more than 20 words.
Don't explain about the game, but focus on activities in the game.
""",
},
]
+ script
+ generate_new_line(base64_image),
max_tokens=500,
)
response_text = response.choices[0].message.content
return response_text
def main():
script = []
while True:
# path to your image
image_path = os.path.join(os.getcwd(), "./frames/frame.jpg")
# getting the base64 encoding
base64_image = encode_image(image_path)
# analyze posture
print("👀 Mervin Praison is watching...")
analysis = analyze_image(base64_image, script=script)
print("🎙️ Mervin Praison says:")
print(analysis)
play_audio(analysis)
script = script + [{"role": "assistant", "content": analysis}]
# wait for 5 seconds
# time.sleep(1)
if __name__ == "__main__":
main()