[Narrator] streaming
This commit is contained in:
parent
1bb728ada3
commit
94684fca34
21
README.md
21
README.md
|
@ -3,6 +3,7 @@
|
||||||
https://twitter.com/charliebholtz/status/1724815159590293764
|
https://twitter.com/charliebholtz/status/1724815159590293764
|
||||||
|
|
||||||
## Want to make your own AI app?
|
## Want to make your own AI app?
|
||||||
|
|
||||||
Check out [Replicate](https://replicate.com). We make it easy to run machine learning models with an API.
|
Check out [Replicate](https://replicate.com). We make it easy to run machine learning models with an API.
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
|
@ -20,33 +21,41 @@ Then, install the dependencies:
|
||||||
|
|
||||||
Make a [Replicate](https://replicate.com), [OpenAI](https://beta.openai.com/), and [ElevenLabs](https://elevenlabs.io) account and set your tokens:
|
Make a [Replicate](https://replicate.com), [OpenAI](https://beta.openai.com/), and [ElevenLabs](https://elevenlabs.io) account and set your tokens:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
export OPENAI_API_KEY=<token>
|
export OPENAI_API_KEY=<token>
|
||||||
export ELEVENLABS_API_KEY=<eleven-token>
|
export ELEVENLABS_API_KEY=<eleven-token>
|
||||||
```
|
```
|
||||||
|
|
||||||
Make a new voice in Eleven and get the voice id of that voice using their [get voices](https://elevenlabs.io/docs/api-reference/voices) API, or by clicking the flask icon next to the voice in the VoiceLab tab.
|
Make a new voice in Eleven and get the voice id of that voice using their [get voices](https://elevenlabs.io/docs/api-reference/voices) API, or by clicking the flask icon next to the voice in the VoiceLab tab.
|
||||||
|
|
||||||
```
|
```bash
|
||||||
export ELEVENLABS_VOICE_ID=<voice-id>
|
export ELEVENLABS_VOICE_ID=<voice-id>
|
||||||
```
|
```
|
||||||
|
|
||||||
### Setup Script
|
### Streaming
|
||||||
|
|
||||||
Alternatively, one can use the `setup.sh` script to facilitate getting the shell envs ready to rock by updating the API key values in `setup.sh` and run.
|
If you would like the speech to start quicker via a streaming manner set the environment variable to enable. The concession is that the audio snippet is not saved in the `/narration` directory.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export ELEVENLABS_STREAMING=true
|
||||||
|
```
|
||||||
|
|
||||||
|
### Script
|
||||||
|
|
||||||
|
Alternative to running the commands above individually, one can use the `setup.sh` script to facilitate getting the two required shell envs ready to rock by updating the environment variable values in `setup.sh` and executing the script.
|
||||||
|
|
||||||
_Note: may have to manually run `source source venv/bin/activate` afterwards depending on shell env._
|
_Note: may have to manually run `source source venv/bin/activate` afterwards depending on shell env._
|
||||||
|
|
||||||
|
|
||||||
## Run it!
|
## Run it!
|
||||||
|
|
||||||
In on terminal, run the webcam capture:
|
In on terminal, run the webcam capture:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python capture.py
|
python capture.py
|
||||||
```
|
```
|
||||||
|
|
||||||
In another terminal, run the narrator:
|
In another terminal, run the narrator:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python narrator.py
|
python narrator.py
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
36
narrator.py
36
narrator.py
|
@ -1,16 +1,24 @@
|
||||||
import os
|
|
||||||
from openai import OpenAI
|
|
||||||
import base64
|
import base64
|
||||||
import json
|
|
||||||
import time
|
|
||||||
import simpleaudio as sa
|
|
||||||
import errno
|
import errno
|
||||||
from elevenlabs import generate, play, set_api_key, voices
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import simpleaudio as sa
|
||||||
|
from elevenlabs import generate, play, set_api_key, stream, voices
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
client = OpenAI()
|
client = OpenAI()
|
||||||
|
|
||||||
set_api_key(os.environ.get("ELEVENLABS_API_KEY"))
|
set_api_key(os.environ.get("ELEVENLABS_API_KEY"))
|
||||||
|
|
||||||
|
|
||||||
|
# This code initializes the variable 'isStreaming' based on the value of the environment variable 'ELEVENLABS_STREAMIMAGES'.
|
||||||
|
# If the value of 'ELEVENLABS_STREAMIMAGES' is "true", then 'isStreaming' is set to True.
|
||||||
|
# Otherwise, 'isStreaming' is set to False.
|
||||||
|
isStreaming = os.environ.get("ELEVENLABS_STREAMING", "false") == "true"
|
||||||
|
|
||||||
|
|
||||||
def encode_image(image_path):
|
def encode_image(image_path):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
@ -25,7 +33,16 @@ def encode_image(image_path):
|
||||||
|
|
||||||
|
|
||||||
def play_audio(text):
|
def play_audio(text):
|
||||||
audio = generate(text, voice=os.environ.get("ELEVENLABS_VOICE_ID"))
|
audio = generate(
|
||||||
|
text,
|
||||||
|
voice=os.environ.get("ELEVENLABS_VOICE_ID"),
|
||||||
|
model="eleven_turbo_v2",
|
||||||
|
stream=isStreaming,
|
||||||
|
)
|
||||||
|
|
||||||
|
if isStreaming:
|
||||||
|
stream(audio)
|
||||||
|
return
|
||||||
|
|
||||||
unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
|
unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
|
||||||
dir_path = os.path.join("narration", unique_id)
|
dir_path = os.path.join("narration", unique_id)
|
||||||
|
@ -43,7 +60,10 @@ def generate_new_line(base64_image):
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "Describe this image as if you are David Attenborough"},
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Describe this image as if you are David Attenborough",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": f"data:image/jpeg;base64,{base64_image}",
|
"image_url": f"data:image/jpeg;base64,{base64_image}",
|
||||||
|
|
Loading…
Reference in New Issue