narrator-david-attenburo/narrator.py

import os
from dotenv import load_dotenv
from openai import OpenAI
import base64
import json
import time
import simpleaudio as sa
import errno
from elevenlabs.client import ElevenLabs
from elevenlabs import play

# Load environment variables from .env file
load_dotenv()

client = OpenAI()
elevenlabs_client = ElevenLabs(
    api_key=os.environ.get("ELEVENLABS_API_KEY")
)


def encode_image(image_path, retries=3, delay=0.1):
    for attempt in range(retries):
        try:
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode("utf-8")
        except FileNotFoundError:
            print(f"Error: Image file not found at {image_path}. Retrying...")
            time.sleep(delay)
        except IOError as e:
            # Handles other I/O errors, including permission errors if they still occur
            print(f"IOError when trying to read {image_path}: {e}. Retrying...")
            time.sleep(delay)
        except Exception as e:
            print(f"An unexpected error occurred while encoding image {image_path}: {e}")
            return None # Or raise, depending on desired behavior for unexpected errors
    print(f"Failed to encode image {image_path} after {retries} retries.")
    return None


def play_audio(text):
    try:
        voice_id = os.environ.get("ELEVEN_VOICE_ID", "21m00Tcm4TlvDq8ikWAM")
        # Generate audio using the new ElevenLabs client API
        audio = elevenlabs_client.generate(
            text=text,
            voice=voice_id,
            model="eleven_turbo_v2"
        )
    except Exception as e: # Replace with specific ElevenLabs APIError if available
        print(f"Error generating audio with ElevenLabs: {e}")
        return

    try:
        unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
        dir_path = os.path.join("narration", unique_id)
        os.makedirs(dir_path, exist_ok=True)
        file_path = os.path.join(dir_path, "audio.wav")

        with open(file_path, "wb") as f:
            f.write(audio)

        play(audio) # Assuming play() is blocking; if not, ensure file is written before next step
    except IOError as e:
        print(f"IOError saving or playing audio file: {e}")
    except Exception as e:
        print(f"An unexpected error occurred during audio playback: {e}")


def generate_new_line(base64_image):
    return [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Describe this image"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}"
                    },
                },
            ],
        },
    ]


def analyze_image(base64_image, script):
    if not base64_image:
        return "Error: Could not encode image for analysis."
    try:
        response = client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
            {
                "role": "system",
                "content": """
                Narrate the picture in the style of a nature documentary. Be observational, insightful, and use vivid language. Maintain a respectful and engaging tone. Keep it concise. If anything interesting or unusual is observed, highlight it with a sense of wonder or intrigue.
                Also do not exceed 300 characters.
                take a deep breath and do this step by step.
                """,
            },
        ]
        + script
        + generate_new_line(base64_image),
        max_tokens=500,
    )
        response_text = response.choices[0].message.content
        return response_text
    except Exception as e:
        if "APIConnectionError" in str(type(e)):
            print(f"OpenAI API Connection Error: {e}")
            return "Error: Could not connect to OpenAI API."
        elif "RateLimitError" in str(type(e)):
            print(f"OpenAI API Rate Limit Error: {e}")
            return "Error: OpenAI API rate limit exceeded."
        elif "AuthenticationError" in str(type(e)):
            print(f"OpenAI API Authentication Error: {e}")
            return "Error: Invalid OpenAI API key. Please check your .env file."
        elif "APIStatusError" in str(type(e)):
            print(f"OpenAI API Status Error: {e}")
            return f"Error: OpenAI API returned an error status."
        else:
            print(f"An unexpected error occurred during OpenAI API call: {e}")
            return "Error: An unexpected error occurred during image analysis."


def main():
    script = []

    while True:
        # path to your image
        image_path = os.path.join(os.getcwd(), "./frames/frame.jpg")

        # getting the base64 encoding
        base64_image = encode_image(image_path)

        if base64_image:
            # analyze posture
            print("👀 David is watching...")
            analysis = analyze_image(base64_image, script=script)

            if analysis and not analysis.startswith("Error:") : # Check if analysis produced valid output
                print("🎙️ David says:")
                print(analysis)
                play_audio(analysis)
                script = script + [{"role": "assistant", "content": analysis}]
            else:
                print(analysis) # Print error message from analyze_image or if it's None
        else:
            print("Skipping analysis due to image encoding failure.")

        # wait for 5 seconds
        time.sleep(5)


if __name__ == "__main__":
    main()