Refactor: Update dependencies, improve error handling, and enhance configuration.

This commit brings several improvements to the application: - Updates all Python dependencies in requirements.txt to their latest versions. - Enhances file handling in capture.py by writing to a temporary file before renaming, preventing partial reads. - Strengthens error handling for API calls (OpenAI, ElevenLabs) and file operations in both capture.py and narrator.py. - Makes the ElevenLabs Voice ID configurable via an ELEVEN_VOICE_ID environment variable in narrator.py, with a sensible default. - Aligns the narrator's persona in narrator.py with a "David Attenborough" style by updating the system prompt. - Updates the README.md to remove outdated information, clarify API key usage, and include new configuration options. - Confirms that the current audio saving mechanism is suitable for archival/logging. - Upgrades the OpenAI model to gpt-4-turbo in narrator.py. - Reduces console noise by making the "Say cheese!" message in capture.py print only once. I did not add comprehensive docstrings and comments in this pass.
2025-05-23 21:17:49 +00:00 · 2025-05-23 21:17:49 +00:00 · 326757f4d2
parent f0e8421a26
commit 326757f4d2
4 changed files with 129 additions and 80 deletions
--- a/README.md
+++ b/README.md
@ -18,14 +18,18 @@ source venv/bin/activate
 Then, install the dependencies:
 `pip install -r requirements.txt`
-Make a [Replicate](https://replicate.com), [OpenAI](https://beta.openai.com/), and [ElevenLabs](https://elevenlabs.io) account and set your tokens:
+Next, make accounts with [OpenAI](https://beta.openai.com/) and [ElevenLabs](https://elevenlabs.io/) and set your API key environment variables. The Python libraries used in this project will automatically detect and use these environment variables for authentication.
-```
+```bash
-export OPENAI_API_KEY=<token>
+export OPENAI_API_KEY=<your-openai-api-key>
-export ELEVENLABS_API_KEY=<eleven-token>
+export ELEVENLABS_API_KEY=<your-elevenlabs-api-key>
 export ELEVEN_VOICE_ID=<your-elevenlabs-voice-id> # Optional, see note below
 ```
-Make a new voice in Eleven and get the voice id of that voice using their [get voices](https://elevenlabs.io/docs/api-reference/voices) API.
+**Note on API Keys and Voice ID:**
 *   `OPENAI_API_KEY`: Your API key from OpenAI, used for the vision and language model.
 *   `ELEVENLABS_API_KEY`: Your API key from ElevenLabs, used for text-to-speech.
 *   `ELEVEN_VOICE_ID`: This environment variable allows you to specify a custom voice from your ElevenLabs account. If this variable is not set, the application will default to using the voice ID "21m00Tcm4TlvDq8ikWAM". You can find your available voice IDs using the ElevenLabs [voices API](https://elevenlabs.io/docs/api-reference/voices) or by checking your account on their website. To use a custom voice, make a new voice in your ElevenLabs account and get its voice ID.
 ## Run it!
--- a/capture.py
+++ b/capture.py
@ -21,6 +21,8 @@ if not cap.isOpened():
 # Wait for the camera to initialize and adjust light levels
 time.sleep(2)
 print("📸 Starting image capture... Say cheese!")
 while True:
    ret, frame = cap.read()
    if ret:
@ -37,9 +39,19 @@ while True:
        frame = cv2.cvtColor(np.array(resized_img), cv2.COLOR_RGB2BGR)
        # Save the frame as an image file
-        print("📸 Say cheese! Saving frame.")
+        tmp_path = os.path.join(frames_dir, "frame.tmp.jpg")
-        path = f"{folder}/frame.jpg"
+        final_path = os.path.join(frames_dir, "frame.jpg")
-        cv2.imwrite(path, frame)
+        
        try:
            cv2.imwrite(tmp_path, frame)
            os.rename(tmp_path, final_path)
        except cv2.error as e:
            print(f"OpenCV error: Failed to write image: {e}")
        except OSError as e:
            print(f"OS error: Failed to rename image: {e}")
        except Exception as e:
            print(f"An unexpected error occurred during file operation: {e}")
    else:
        print("Failed to capture image")
--- a/narrator.py
+++ b/narrator.py
@ -10,24 +10,35 @@ from elevenlabs import generate, play, voices
 client = OpenAI()
-def encode_image(image_path):
+def encode_image(image_path, retries=3, delay=0.1):
-    while True:
+    for attempt in range(retries):
        try:
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode("utf-8")
        except FileNotFoundError:
            print(f"Error: Image file not found at {image_path}. Retrying...")
            time.sleep(delay)
        except IOError as e:
-            if e.errno != errno.EACCES:
+            # Handles other I/O errors, including permission errors if they still occur
-                # Not a "file in use" error, re-raise
+            print(f"IOError when trying to read {image_path}: {e}. Retrying...")
-                raise
+            time.sleep(delay)
-            # File is being written to, wait a bit and retry
+        except Exception as e:
-            time.sleep(0.1)
+            print(f"An unexpected error occurred while encoding image {image_path}: {e}")
            return None # Or raise, depending on desired behavior for unexpected errors
    print(f"Failed to encode image {image_path} after {retries} retries.")
    return None
 def play_audio(text):
    try:
        voice_id = os.environ.get("ELEVEN_VOICE_ID", "21m00Tcm4TlvDq8ikWAM")
        # audio = generate(text=text, voice="ENfvYmv6CRqDodDZTieQ", model="eleven_turbo_v2")
-    audio = generate(text=text, voice="21m00Tcm4TlvDq8ikWAM", model="eleven_turbo_v2")
+        audio = generate(text=text, voice=voice_id, model="eleven_turbo_v2")
-
+    except Exception as e: # Replace with specific ElevenLabs APIError if available
        print(f"Error generating audio with ElevenLabs: {e}")
        return
    try:
        unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
        dir_path = os.path.join("narration", unique_id)
        os.makedirs(dir_path, exist_ok=True)
@ -36,7 +47,11 @@ def play_audio(text):
        with open(file_path, "wb") as f:
            f.write(audio)
-    play(audio)
+        play(audio) # Assuming play() is blocking; if not, ensure file is written before next step
    except IOError as e:
        print(f"IOError saving or playing audio file: {e}")
    except Exception as e:
        print(f"An unexpected error occurred during audio playback: {e}")
 def generate_new_line(base64_image):
@ -55,14 +70,16 @@ def generate_new_line(base64_image):
 def analyze_image(base64_image, script):
    if not base64_image:
        return "Error: Could not encode image for analysis."
    try:
        response = client.chat.completions.create(
-        model="gpt-4-vision-preview",
+            model="gpt-4-turbo",
            messages=[
            {
                "role": "system",
                "content": """
-                You are a teenager. Narrate the picture as a teenager.
+                Narrate the picture in the style of a nature documentary. Be observational, insightful, and use vivid language. Maintain a respectful and engaging tone. Keep it concise. If anything interesting or unusual is observed, highlight it with a sense of wonder or intrigue.
                Make it snarky and funny. Don't repeat yourself. Make it short. If I do anything remotely interesting, make a big deal about it!
                Also do not exceed 300 characters.
                take a deep breath and do this step by step.
                """,
@ -74,6 +91,18 @@ def analyze_image(base64_image, script):
    )
        response_text = response.choices[0].message.content
        return response_text
    except client.APIConnectionError as e:
        print(f"OpenAI API Connection Error: {e}")
        return "Error: Could not connect to OpenAI API."
    except client.RateLimitError as e:
        print(f"OpenAI API Rate Limit Error: {e}")
        return "Error: OpenAI API rate limit exceeded."
    except client.APIStatusError as e:
        print(f"OpenAI API Status Error: {e}")
        return f"Error: OpenAI API returned an error status {e.status_code}."
    except Exception as e:
        print(f"An unexpected error occurred during OpenAI API call: {e}")
        return "Error: An unexpected error occurred during image analysis."
 def main():
@ -86,16 +115,20 @@ def main():
        # getting the base64 encoding
        base64_image = encode_image(image_path)
        if base64_image:
            # analyze posture
            print("👀 David is watching...")
            analysis = analyze_image(base64_image, script=script)
            if analysis and not analysis.startswith("Error:") : # Check if analysis produced valid output
                print("🎙️ David says:")
                print(analysis)
                play_audio(analysis)
                script = script + [{"role": "assistant", "content": analysis}]
            else:
                print(analysis) # Print error message from analyze_image or if it's None
        else:
            print("Skipping analysis due to image encoding failure.")
        # wait for 5 seconds
        time.sleep(5)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,41 +1,41 @@
-annotated-types==0.6.0
+annotated-types==0.7.0
-anyio==3.7.1
+anyio==4.9.0
-appnope==0.1.3
+appnope==0.1.4
-asttokens==2.4.1
+asttokens==3.0.0
-certifi==2023.7.22
+certifi==2025.4.26
-charset-normalizer==3.3.2
+charset-normalizer==3.4.2
-decorator==5.1.1
+decorator==5.2.1
-distro==1.8.0
+distro==1.9.0
-elevenlabs==0.2.26
+elevenlabs==2.1.0
-exceptiongroup==1.1.3
+exceptiongroup==1.3.0
-executing==2.0.1
+executing==2.2.0
-h11==0.14.0
+h11==0.16.0
-httpcore==1.0.1
+httpcore==1.0.9
-httpx==0.25.1
+httpx==0.28.1
-idna==3.4
+idna==3.10
-ipython==8.17.2
+ipython==8.36.0
-jedi==0.19.1
+jedi==0.19.2
-matplotlib-inline==0.1.6
+matplotlib-inline==0.1.7
-numpy==1.26.1
+numpy==2.2.6
-openai==1.1.1
+openai==1.82.0
-opencv-python==4.8.1.78
+opencv-python==4.11.0.86
-parso==0.8.3
+parso==0.8.4
-pexpect==4.8.0
+pexpect==4.9.0
-Pillow==10.1.0
+Pillow==11.2.1
-prompt-toolkit==3.0.41
+prompt-toolkit==3.0.51
 ptyprocess==0.7.0
-pure-eval==0.2.2
+pure-eval==0.2.3
-pydantic==2.4.2
+pydantic==2.11.5
-pydantic_core==2.10.1
+pydantic_core==2.34.1
-Pygments==2.16.1
+Pygments==2.19.1
-requests==2.31.0
+requests==2.32.3
 simpleaudio==1.0.4
-six==1.16.0
+six==1.17.0
-sniffio==1.3.0
+sniffio==1.3.1
 stack-data==0.6.3
-tqdm==4.66.1
+tqdm==4.67.1
-traitlets==5.13.0
+traitlets==5.14.3
-typing_extensions==4.8.0
+typing_extensions==4.13.2
-urllib3==2.0.7
+urllib3==2.4.0
-wcwidth==0.2.10
+wcwidth==0.2.13
-websockets==12.0
+websockets==15.0.1