Fix/imagesave (#7)

* [Image] file saved along side the corresponding audio file (#6) * [Image] file saved along side the corresponding audio file. * [shutil] used thanks to trunk calling out security implication of the native os cp command via the script. * [README] small update to streaming section to mention image file is not saved either. * [README] small setup.sh wording refactor. * [Fix] narrator when streaming is enabled regardign image save logic
2023-11-24 11:04:01 -08:00 · 2023-11-24 11:04:01 -08:00 · cee2b62b63
parent c3e86d8d14
commit cee2b62b63
1 changed files with 18 additions and 8 deletions
--- a/narrator.py
+++ b/narrator.py
@ -39,7 +39,7 @@ def encode_image(image_path):
            time.sleep(0.1)


-def play_audio(text):
+def play_audio(text, dir_path=None):
    audio = generate(
        text,
        voice=os.environ.get("ELEVENLABS_VOICE_ID"),
@ -52,10 +52,7 @@ def play_audio(text):
        stream(audio)
        return

-    # Save the audio to a file
-    unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
-    dir_path = os.path.join("narration", unique_id)
-    os.makedirs(dir_path, exist_ok=True)
+    # Save the audio file to the directory
    file_path = os.path.join(dir_path, "audio.wav")

    with open(file_path, "wb") as f:
@ -113,17 +110,30 @@ def _main():
    # path to your image
    image_path = os.path.join(os.getcwd(), "./frames/frame.jpg")

+    dir_path = None
+    if not isStreaming:
+        # create a unique directory to store the audio and image
+        unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
+        dir_path = os.path.join("narration", unique_id)
+        os.makedirs(dir_path, exist_ok=True)
+
+        # copy the image to the directory
+        new_image_path = os.path.join(dir_path, "image.jpg")
+        shutil.copy(image_path, new_image_path)
+        image_path = new_image_path
+
    # getting the base64 encoding
    base64_image = encode_image(image_path)

-    # analyze posture
+    # analyze the image
    print(f"👀 {narrator} is watching...")
    analysis = analyze_image(base64_image, script=script)

-    print("🎙️ David says:")
+    print(f"🎙️ {narrator} says:")
    print(analysis)

-    play_audio(analysis)
+    # generate and play audio
+    play_audio(analysis, dir_path)

    script = script + [{"role": "assistant", "content": analysis}]