Fix/imagesave (#7)

* [Image] file saved along side the corresponding audio file (#6)

* [Image] file saved along side the corresponding audio file.

* [shutil] used thanks to trunk calling out security implication of the native os cp command via the script.

* [README] small update to streaming section to mention image file is not saved either.

* [README] small setup.sh wording refactor.

* [Fix] narrator when streaming is enabled regardign image save logic
This commit is contained in:
Ray Smets 2023-11-24 11:04:01 -08:00 committed by GitHub
parent c3e86d8d14
commit cee2b62b63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 18 additions and 8 deletions

View File

@ -39,7 +39,7 @@ def encode_image(image_path):
time.sleep(0.1)
def play_audio(text):
def play_audio(text, dir_path=None):
audio = generate(
text,
voice=os.environ.get("ELEVENLABS_VOICE_ID"),
@ -52,10 +52,7 @@ def play_audio(text):
stream(audio)
return
# Save the audio to a file
unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
dir_path = os.path.join("narration", unique_id)
os.makedirs(dir_path, exist_ok=True)
# Save the audio file to the directory
file_path = os.path.join(dir_path, "audio.wav")
with open(file_path, "wb") as f:
@ -113,17 +110,30 @@ def _main():
# path to your image
image_path = os.path.join(os.getcwd(), "./frames/frame.jpg")
dir_path = None
if not isStreaming:
# create a unique directory to store the audio and image
unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
dir_path = os.path.join("narration", unique_id)
os.makedirs(dir_path, exist_ok=True)
# copy the image to the directory
new_image_path = os.path.join(dir_path, "image.jpg")
shutil.copy(image_path, new_image_path)
image_path = new_image_path
# getting the base64 encoding
base64_image = encode_image(image_path)
# analyze posture
# analyze the image
print(f"👀 {narrator} is watching...")
analysis = analyze_image(base64_image, script=script)
print("🎙️ David says:")
print(f"🎙️ {narrator} says:")
print(analysis)
play_audio(analysis)
# generate and play audio
play_audio(analysis, dir_path)
script = script + [{"role": "assistant", "content": analysis}]