From b1f9236bb6ea0a89193477b3bec93f3efb095dec Mon Sep 17 00:00:00 2001
From: Ray Smets <rayjsmets@gmail.com>
Date: Thu, 23 Nov 2023 01:21:43 -0800
Subject: [PATCH 1/4] [Setup] script. (#1)

---
 README.md |  7 +++++++
 setup.sh  | 16 ++++++++++++++++
 2 files changed, 23 insertions(+)
 create mode 100755 setup.sh
diff --git a/README.md b/README.md
index c10bdcb..1c9b150 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,13 @@ Make a new voice in Eleven and get the voice id of that voice using their [get v
 export ELEVENLABS_VOICE_ID=<voice-id>
 ```
 
+### Setup Script
+
+Alternatively, one can use the `setup.sh` script to facilitate getting the shell envs ready to rock by updating the API key values in `setup.sh` and run. 
+
+_Note: may have to manually run `source source venv/bin/activate` afterwards depending on shell env._
+
+
 ## Run it!
 
 In on terminal, run the webcam capture:
diff --git a/setup.sh b/setup.sh
new file mode 100755
index 0000000..bab016d
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# create a virtual environment
+python3 -m pip install virtualenv
+python3 -m virtualenv venv
+
+# source the virtual environment
+source venv/bin/activate
+
+# install the dependencies
+pip install -r requirements.txt
+
+# set the environment variables
+export ELEVENLABS_VOICE_ID=
+export OPENAI_API_KEY=
+export ELEVENLABS_API_KEY=
\ No newline at end of file

From 4ab05a4b1d13dab4e047e000e78d9c897d02467d Mon Sep 17 00:00:00 2001
From: Ray Smets <rayjsmets@gmail.com>
Date: Thu, 23 Nov 2023 01:22:52 -0800
Subject: [PATCH 2/4] [Narrator] prompt to describe the image like David
 Attenborough for increased complex descriptors. (#2)

---
 narrator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narrator.py b/narrator.py
index cd086f7..845158f 100644
--- a/narrator.py
+++ b/narrator.py
@@ -43,7 +43,7 @@ def generate_new_line(base64_image):
         {
             "role": "user",
             "content": [
-                {"type": "text", "text": "Describe this image"},
+                {"type": "text", "text": "Describe this image as if you David Attenborough"},
                 {
                     "type": "image_url",
                     "image_url": f"data:image/jpeg;base64,{base64_image}",

From 1bb728ada311c0892ac18f61718e6538279a3192 Mon Sep 17 00:00:00 2001
From: Ray Smets <rayjsmets@gmail.com>
Date: Thu, 23 Nov 2023 01:45:28 -0800
Subject: [PATCH 3/4] [Narrator] fix

---
 narrator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narrator.py b/narrator.py
index 845158f..7eca4f5 100644
--- a/narrator.py
+++ b/narrator.py
@@ -43,7 +43,7 @@ def generate_new_line(base64_image):
         {
             "role": "user",
             "content": [
-                {"type": "text", "text": "Describe this image as if you David Attenborough"},
+                {"type": "text", "text": "Describe this image as if you are David Attenborough"},
                 {
                     "type": "image_url",
                     "image_url": f"data:image/jpeg;base64,{base64_image}",

From 94684fca3406adb745a29f89fd3d787cddc78852 Mon Sep 17 00:00:00 2001
From: Ray Smets <rayjsmets@gmail.com>
Date: Thu, 23 Nov 2023 14:45:43 -0800
Subject: [PATCH 4/4] [Narrator] streaming

---
 README.md   | 23 ++++++++++++++++-------
 narrator.py | 36 ++++++++++++++++++++++++++++--------
 setup.sh    |  4 +++-
 3 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 1c9b150..b2c7f25 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,9 @@
-# David Attenborough narrates your life. 
+# David Attenborough narrates your life.
 
 https://twitter.com/charliebholtz/status/1724815159590293764
 
 ## Want to make your own AI app?
+
 Check out [Replicate](https://replicate.com). We make it easy to run machine learning models with an API.
 
 ## Setup
@@ -20,33 +21,41 @@ Then, install the dependencies:
 
 Make a [Replicate](https://replicate.com), [OpenAI](https://beta.openai.com/), and [ElevenLabs](https://elevenlabs.io) account and set your tokens:
 
-```
+```bash
 export OPENAI_API_KEY=<token>
 export ELEVENLABS_API_KEY=<eleven-token>
 ```
 
 Make a new voice in Eleven and get the voice id of that voice using their [get voices](https://elevenlabs.io/docs/api-reference/voices) API, or by clicking the flask icon next to the voice in the VoiceLab tab.
 
-```
+```bash
 export ELEVENLABS_VOICE_ID=<voice-id>
 ```
 
-### Setup Script
+### Streaming
 
-Alternatively, one can use the `setup.sh` script to facilitate getting the shell envs ready to rock by updating the API key values in `setup.sh` and run. 
+If you would like the speech to start quicker via a streaming manner set the environment variable to enable. The concession is that the audio snippet is not saved in the `/narration` directory.
+
+```bash
+export ELEVENLABS_STREAMING=true
+```
+
+### Script
+
+Alternative to running the commands above individually, one can use the `setup.sh` script to facilitate getting the two required shell envs ready to rock by updating the environment variable values in `setup.sh` and executing the script.
 
 _Note: may have to manually run `source source venv/bin/activate` afterwards depending on shell env._
 
-
 ## Run it!
 
 In on terminal, run the webcam capture:
+
 ```bash
 python capture.py
 ```
+
 In another terminal, run the narrator:
 
 ```bash
 python narrator.py
 ```
-
diff --git a/narrator.py b/narrator.py
index 7eca4f5..d33da74 100644
--- a/narrator.py
+++ b/narrator.py
@@ -1,16 +1,24 @@
-import os
-from openai import OpenAI
 import base64
-import json
-import time
-import simpleaudio as sa
 import errno
-from elevenlabs import generate, play, set_api_key, voices
+import json
+import os
+import time
+
+import simpleaudio as sa
+from elevenlabs import generate, play, set_api_key, stream, voices
+from openai import OpenAI
 
 client = OpenAI()
 
 set_api_key(os.environ.get("ELEVENLABS_API_KEY"))
 
+
+# This code initializes the variable 'isStreaming' based on the value of the environment variable 'ELEVENLABS_STREAMIMAGES'.
+# If the value of 'ELEVENLABS_STREAMIMAGES' is "true", then 'isStreaming' is set to True.
+# Otherwise, 'isStreaming' is set to False.
+isStreaming = os.environ.get("ELEVENLABS_STREAMING", "false") == "true"
+
+
 def encode_image(image_path):
     while True:
         try:
@@ -25,7 +33,16 @@ def encode_image(image_path):
 
 
 def play_audio(text):
-    audio = generate(text, voice=os.environ.get("ELEVENLABS_VOICE_ID"))
+    audio = generate(
+        text,
+        voice=os.environ.get("ELEVENLABS_VOICE_ID"),
+        model="eleven_turbo_v2",
+        stream=isStreaming,
+    )
+
+    if isStreaming:
+        stream(audio)
+        return
 
     unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
     dir_path = os.path.join("narration", unique_id)
@@ -43,7 +60,10 @@ def generate_new_line(base64_image):
         {
             "role": "user",
             "content": [
-                {"type": "text", "text": "Describe this image as if you are David Attenborough"},
+                {
+                    "type": "text",
+                    "text": "Describe this image as if you are David Attenborough",
+                },
                 {
                     "type": "image_url",
                     "image_url": f"data:image/jpeg;base64,{base64_image}",
diff --git a/setup.sh b/setup.sh
index bab016d..823a544 100755
--- a/setup.sh
+++ b/setup.sh
@@ -13,4 +13,6 @@ pip install -r requirements.txt
 # set the environment variables
 export ELEVENLABS_VOICE_ID=
 export OPENAI_API_KEY=
-export ELEVENLABS_API_KEY=
\ No newline at end of file
+export ELEVENLABS_API_KEY=
+
+export ELEVENLABS_STREAMING=false