narrator-david-attenburo/narrator.py

import os
from openai import OpenAI
import base64
import json
import time
import simpleaudio as sa
import errno
from elevenlabs import generate, play, voices, set_api_key

#Enter your Eleven labs API key
set_api_key("")


client = OpenAI()


def encode_image(image_path):
    while True:
        try:
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode("utf-8")
        except IOError as e:
            if e.errno != errno.EACCES:
                # Not a "file in use" error, re-raise
                raise
            # File is being written to, wait a bit and retry
            time.sleep(0.1)


def play_audio(text):
    audio = generate(text=text, voice="ENfvYmv6CRqDodDZTieQ", model="eleven_turbo_v2")

    unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
    dir_path = os.path.join("narration", unique_id)
    os.makedirs(dir_path, exist_ok=True)
    file_path = os.path.join(dir_path, "audio.wav")

    with open(file_path, "wb") as f:
        f.write(audio)

    play(audio)


def generate_new_line(base64_image):
    return [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Describe this image"},
                {
                    "type": "image_url",
                    "image_url": f"data:image/jpeg;base64,{base64_image}",
                },
            ],
        },
    ]


def analyze_image(base64_image, script):
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=[
            {
                "role": "system",
                "content": """
                You are Sir David Attenborough. Narrate the picture of the human as if it is a nature documentary.
                Make it snarky and funny. Don't repeat yourself. Make it short. If I do anything remotely interesting, make a big deal about it!
                """,
            },
        ]
        + script
        + generate_new_line(base64_image),
        max_tokens=500,
    )
    response_text = response.choices[0].message.content
    return response_text


def main():
    script = []

    while True:
        # path to your image
        image_path = os.path.join(os.getcwd(), "./frames/frame.jpg")

        # getting the base64 encoding
        base64_image = encode_image(image_path)

        # analyze posture
        print("👀 David is watching...")
        analysis = analyze_image(base64_image, script=script)

        print("🎙️ David says:")
        print(analysis)

        play_audio(analysis)

        script = script + [{"role": "assistant", "content": analysis}]

        # wait for 5 seconds
        time.sleep(5)


if __name__ == "__main__":
    main()
update gitignore, change judge to narrator 2023-11-15 09:43:29 -06:00			`import os`
			`from openai import OpenAI`
			`import base64`
			`import json`
			`import time`
			`import simpleaudio as sa`
			`import errno`
added api key in the program added api key in the program 2023-11-16 13:09:32 -06:00			`from elevenlabs import generate, play, voices, set_api_key`

			`#Enter your Eleven labs API key`
			`set_api_key("")`

update gitignore, change judge to narrator 2023-11-15 09:43:29 -06:00
			`client = OpenAI()`


			`def encode_image(image_path):`
			`while True:`
			`try:`
			`with open(image_path, "rb") as image_file:`
			`return base64.b64encode(image_file.read()).decode("utf-8")`
			`except IOError as e:`
			`if e.errno != errno.EACCES:`
			`# Not a "file in use" error, re-raise`
			`raise`
			`# File is being written to, wait a bit and retry`
			`time.sleep(0.1)`


			`def play_audio(text):`
			`audio = generate(text=text, voice="ENfvYmv6CRqDodDZTieQ", model="eleven_turbo_v2")`

			`unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")`
			`dir_path = os.path.join("narration", unique_id)`
			`os.makedirs(dir_path, exist_ok=True)`
			`file_path = os.path.join(dir_path, "audio.wav")`

			`with open(file_path, "wb") as f:`
			`f.write(audio)`

			`play(audio)`


			`def generate_new_line(base64_image):`
			`return [`
			`{`
			`"role": "user",`
			`"content": [`
			`{"type": "text", "text": "Describe this image"},`
			`{`
			`"type": "image_url",`
			`"image_url": f"data:image/jpeg;base64,{base64_image}",`
			`},`
			`],`
			`},`
			`]`


			`def analyze_image(base64_image, script):`
			`response = client.chat.completions.create(`
			`model="gpt-4-vision-preview",`
			`messages=[`
			`{`
			`"role": "system",`
			`"content": """`
			`You are Sir David Attenborough. Narrate the picture of the human as if it is a nature documentary.`
			`Make it snarky and funny. Don't repeat yourself. Make it short. If I do anything remotely interesting, make a big deal about it!`
			`""",`
			`},`
			`]`
			`+ script`
			`+ generate_new_line(base64_image),`
			`max_tokens=500,`
			`)`
			`response_text = response.choices[0].message.content`
			`return response_text`


			`def main():`
			`script = []`

			`while True:`
			`# path to your image`
			`image_path = os.path.join(os.getcwd(), "./frames/frame.jpg")`

			`# getting the base64 encoding`
			`base64_image = encode_image(image_path)`

			`# analyze posture`
			`print("👀 David is watching...")`
			`analysis = analyze_image(base64_image, script=script)`

			`print("🎙️ David says:")`
			`print(analysis)`

			`play_audio(analysis)`

			`script = script + [{"role": "assistant", "content": analysis}]`

			`# wait for 5 seconds`
			`time.sleep(5)`


			`if __name__ == "__main__":`
			`main()`