⚗️ improved performance of the script to better match expectations

This commit is contained in:
Paolo Pastore 2023-11-25 00:25:09 +01:00
parent df40788db2
commit 7b34c76a43
2 changed files with 11 additions and 10 deletions

View File

@ -37,13 +37,13 @@ while True:
frame = cv2.cvtColor(np.array(resized_img), cv2.COLOR_RGB2BGR) frame = cv2.cvtColor(np.array(resized_img), cv2.COLOR_RGB2BGR)
# Save the frame as an image file # Save the frame as an image file
print("📸 Say cheese! Saving frame.") print("📸 Sorridi! Ti sto riprendendo.")
path = f"{folder}/frame.jpg" path = f"{folder}/frame.jpg"
# Showing captured frame # Showing captured frame
cv2.imshow("image", frame) cv2.imshow("image", frame)
# Keeps window open till the next cycle runs # Keeps window open till the next cycle runs
cv2.waitKey(4900) cv2.waitKey(1900)
# Writing frame on disk # Writing frame on disk
cv2.imwrite(path, frame) cv2.imwrite(path, frame)
@ -51,7 +51,7 @@ while True:
print("Failed to capture image") print("Failed to capture image")
# Wait for 2 seconds # Wait for 2 seconds
time.sleep(2) # time.sleep(2)
# Release the camera and close all windows # Release the camera and close all windows
cap.release() cap.release()

View File

@ -25,7 +25,7 @@ def encode_image(image_path):
def play_audio(text): def play_audio(text):
audio = generate(text, voice=os.environ.get("ELEVENLABS_VOICE_ID")) audio = generate(text, voice=os.environ.get("ELEVENLABS_VOICE_ID"), model="eleven_multilingual_v2")
unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=") unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
dir_path = os.path.join("narration", unique_id) dir_path = os.path.join("narration", unique_id)
@ -43,7 +43,7 @@ def generate_new_line(base64_image):
{ {
"role": "user", "role": "user",
"content": [ "content": [
{"type": "text", "text": "Describe this image"}, {"type": "text", "text": "Continua il racconto del documentario naturalistico sull'essere umano nell'immagine."},
{ {
"type": "image_url", "type": "image_url",
"image_url": f"data:image/jpeg;base64,{base64_image}", "image_url": f"data:image/jpeg;base64,{base64_image}",
@ -60,8 +60,9 @@ def analyze_image(base64_image, script):
{ {
"role": "system", "role": "system",
"content": """ "content": """
Sei Piero Angela. Narra ciò che fa l'essere umano nella foto come se fosse un documentario naturalistico. Sei Piero Angela. Descrivi in italiano le azioni dell'essere umano nell'immagine come se fosse il protagonista di un documentario naturalistico.
Rendilo ironico e divertente. Non ripeterti. Rendilo breve. Se fa qualcosa di anche lontanamente interessante, sottolinealo con enfasi! Sii ironico e divertente. Non ripeterti. Sii breve. Usa un linguaggio forbito e ricco di termini del gergo scientifico. Sottolinea con enfasi ogni minima cosa che fa!
Limitati a un massimo di 30 parole.
""", """,
}, },
] ]
@ -84,7 +85,7 @@ def main():
base64_image = encode_image(image_path) base64_image = encode_image(image_path)
# analyze posture # analyze posture
print("👀 Piero ti sta guardando...") print("👀 Piero ti sta osservando...")
analysis = analyze_image(base64_image, script=script) analysis = analyze_image(base64_image, script=script)
print("🎙️ Piero dice:") print("🎙️ Piero dice:")
@ -94,8 +95,8 @@ def main():
script = script + [{"role": "assistant", "content": analysis}] script = script + [{"role": "assistant", "content": analysis}]
# wait for 5 seconds # wait for 1 seconds
time.sleep(5) time.sleep(1)
if __name__ == "__main__": if __name__ == "__main__":