diff --git a/README.md b/README.md index 5861d91..1b54543 100644 --- a/README.md +++ b/README.md @@ -18,12 +18,25 @@ source venv/bin/activate Then, install the dependencies: `pip install -r requirements.txt` -Next, make accounts with [OpenAI](https://beta.openai.com/) and [ElevenLabs](https://elevenlabs.io/) and set your API key environment variables. The Python libraries used in this project will automatically detect and use these environment variables for authentication. +Next, make accounts with [OpenAI](https://beta.openai.com/) and [ElevenLabs](https://elevenlabs.io/) and set up your API keys. +Copy the example environment file and add your API keys: ```bash -export OPENAI_API_KEY= -export ELEVENLABS_API_KEY= -export ELEVEN_VOICE_ID= # Optional, see note below +cp env.example .env +``` + +Then edit the `.env` file with your actual API keys: +```bash +# Copy this file to .env and replace with your actual API keys + +# OpenAI API Key - Get from https://beta.openai.com/ +OPENAI_API_KEY=your-openai-api-key-here + +# ElevenLabs API Key - Get from https://elevenlabs.io/ +ELEVENLABS_API_KEY=your-elevenlabs-api-key-here + +# ElevenLabs Voice ID (Optional) - If not set, defaults to "21m00Tcm4TlvDq8ikWAM" +ELEVEN_VOICE_ID=your-elevenlabs-voice-id-here ``` **Note on API Keys and Voice ID:** @@ -31,6 +44,8 @@ export ELEVEN_VOICE_ID= # Optional, see note below * `ELEVENLABS_API_KEY`: Your API key from ElevenLabs, used for text-to-speech. * `ELEVEN_VOICE_ID`: This environment variable allows you to specify a custom voice from your ElevenLabs account. If this variable is not set, the application will default to using the voice ID "21m00Tcm4TlvDq8ikWAM". You can find your available voice IDs using the ElevenLabs [voices API](https://elevenlabs.io/docs/api-reference/voices) or by checking your account on their website. To use a custom voice, make a new voice in your ElevenLabs account and get its voice ID. +The application now reads these values from a `.env` file, which keeps your sensitive API keys secure and out of version control. + ## Run it! ```bash diff --git a/assets/stop_slouching.mp3 b/assets/stop_slouching.mp3 deleted file mode 100644 index d3a68b1..0000000 Binary files a/assets/stop_slouching.mp3 and /dev/null differ diff --git a/assets/stop_slouching.wav b/assets/stop_slouching.wav deleted file mode 100644 index 31a855a..0000000 Binary files a/assets/stop_slouching.wav and /dev/null differ diff --git a/assets/wonderful_posture.mp3 b/assets/wonderful_posture.mp3 deleted file mode 100644 index 74c6604..0000000 Binary files a/assets/wonderful_posture.mp3 and /dev/null differ diff --git a/assets/wonderful_posture.wav b/assets/wonderful_posture.wav deleted file mode 100644 index 7a762ea..0000000 Binary files a/assets/wonderful_posture.wav and /dev/null differ diff --git a/capture.log b/capture.log new file mode 100644 index 0000000..e69de29 diff --git a/capture.py b/capture.py index 4c8a1de..782b856 100644 --- a/capture.py +++ b/capture.py @@ -1,50 +1,74 @@ import cv2 import time -from PIL import Image +from PIL import Image, ImageGrab import numpy as np import os +import glob +from datetime import datetime # Folder folder = "frames" +MAX_IMAGES = 10 # Create the frames folder if it doesn't exist frames_dir = os.path.join(os.getcwd(), folder) os.makedirs(frames_dir, exist_ok=True) -# Initialize the webcam -cap = cv2.VideoCapture(0) +def cleanup_old_images(): + """Keep only the 10 most recent frame images""" + frame_files = glob.glob(os.path.join(frames_dir, "frame_*.jpg")) + frame_files.sort(key=os.path.getctime, reverse=True) # Sort by creation time, newest first + + # Remove older files if we have more than MAX_IMAGES + for old_file in frame_files[MAX_IMAGES:]: + try: + os.remove(old_file) + print(f"Removed old frame: {os.path.basename(old_file)}") + except OSError as e: + print(f"Error removing old frame {old_file}: {e}") -# Check if the webcam is opened correctly -if not cap.isOpened(): - raise IOError("Cannot open webcam") +def get_latest_frame_path(): + """Get the path of the most recent frame file""" + frame_files = glob.glob(os.path.join(frames_dir, "frame_*.jpg")) + if frame_files: + return max(frame_files, key=os.path.getctime) + return None -# Wait for the camera to initialize and adjust light levels -time.sleep(2) - -print("📸 Starting image capture... Say cheese!") +print("📸 Starting screenshot capture... Capture is watching your screen!") while True: - ret, frame = cap.read() - if ret: - # Convert the frame to a PIL image - pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - + try: + # Take a screenshot + screenshot = ImageGrab.grab() + # Resize the image max_size = 250 - ratio = max_size / max(pil_img.size) - new_size = tuple([int(x*ratio) for x in pil_img.size]) - resized_img = pil_img.resize(new_size, Image.LANCZOS) + ratio = max_size / max(screenshot.size) + new_size = tuple([int(x*ratio) for x in screenshot.size]) + resized_img = screenshot.resize(new_size, Image.LANCZOS) - # Convert the PIL image back to an OpenCV image + # Convert the PIL image to an OpenCV image for saving frame = cv2.cvtColor(np.array(resized_img), cv2.COLOR_RGB2BGR) - # Save the frame as an image file - tmp_path = os.path.join(frames_dir, "frame.tmp.jpg") - final_path = os.path.join(frames_dir, "frame.jpg") + # Generate timestamped filename + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + frame_filename = f"frame_{timestamp}.jpg" + tmp_path = os.path.join(frames_dir, f"frame_{timestamp}.tmp.jpg") + final_path = os.path.join(frames_dir, frame_filename) try: cv2.imwrite(tmp_path, frame) os.rename(tmp_path, final_path) + + # Also create/update a symlink to the latest frame for backward compatibility + latest_link = os.path.join(frames_dir, "frame.jpg") + if os.path.exists(latest_link) or os.path.islink(latest_link): + os.remove(latest_link) + os.symlink(frame_filename, latest_link) + + # Clean up old images + cleanup_old_images() + except cv2.error as e: print(f"OpenCV error: Failed to write image: {e}") except OSError as e: @@ -52,12 +76,11 @@ while True: except Exception as e: print(f"An unexpected error occurred during file operation: {e}") - else: - print("Failed to capture image") + except Exception as e: + print(f"Failed to capture screenshot: {e}") # Wait for 2 seconds time.sleep(2) -# Release the camera and close all windows -cap.release() +# Cleanup cv2.destroyAllWindows() diff --git a/capture_error.log b/capture_error.log new file mode 100644 index 0000000..d15ec84 --- /dev/null +++ b/capture_error.log @@ -0,0 +1,24 @@ +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted +shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted +/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted diff --git a/env.example b/env.example new file mode 100644 index 0000000..1c3a02b --- /dev/null +++ b/env.example @@ -0,0 +1,11 @@ +# Copy this file to .env and replace with your actual API keys + +# OpenAI API Key - Get from https://beta.openai.com/ +OPENAI_API_KEY=your-openai-api-key-here + +# ElevenLabs API Key - Get from https://elevenlabs.io/ +ELEVENLABS_API_KEY=your-elevenlabs-api-key-here + +# ElevenLabs Voice ID (Optional) - If not set, defaults to "21m00Tcm4TlvDq8ikWAM" +# You can find available voice IDs at https://elevenlabs.io/docs/api-reference/voices +ELEVEN_VOICE_ID=your-elevenlabs-voice-id-here \ No newline at end of file diff --git a/launch_capture.sh b/launch_capture.sh new file mode 100755 index 0000000..a28babc --- /dev/null +++ b/launch_capture.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# Set working directory +cd /Users/roshanvenugopal/Documents/github/narrator + +# Activate virtual environment and run capture +source venv/bin/activate +python capture.py \ No newline at end of file diff --git a/narrator.py b/narrator.py index 13866d1..c3d8a77 100644 --- a/narrator.py +++ b/narrator.py @@ -1,13 +1,21 @@ import os +from dotenv import load_dotenv from openai import OpenAI import base64 import json import time import simpleaudio as sa import errno -from elevenlabs import generate, play, voices +from elevenlabs.client import ElevenLabs +from elevenlabs import play + +# Load environment variables from .env file +load_dotenv() client = OpenAI() +elevenlabs_client = ElevenLabs( + api_key=os.environ.get("ELEVENLABS_API_KEY") +) def encode_image(image_path, retries=3, delay=0.1): @@ -32,8 +40,12 @@ def encode_image(image_path, retries=3, delay=0.1): def play_audio(text): try: voice_id = os.environ.get("ELEVEN_VOICE_ID", "21m00Tcm4TlvDq8ikWAM") - # audio = generate(text=text, voice="ENfvYmv6CRqDodDZTieQ", model="eleven_turbo_v2") - audio = generate(text=text, voice=voice_id, model="eleven_turbo_v2") + # Generate audio using the new ElevenLabs client API + audio = elevenlabs_client.generate( + text=text, + voice=voice_id, + model="eleven_turbo_v2" + ) except Exception as e: # Replace with specific ElevenLabs APIError if available print(f"Error generating audio with ElevenLabs: {e}") return @@ -62,7 +74,9 @@ def generate_new_line(base64_image): {"type": "text", "text": "Describe this image"}, { "type": "image_url", - "image_url": f"data:image/jpeg;base64,{base64_image}", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}" + }, }, ], }, @@ -91,18 +105,22 @@ def analyze_image(base64_image, script): ) response_text = response.choices[0].message.content return response_text - except client.APIConnectionError as e: - print(f"OpenAI API Connection Error: {e}") - return "Error: Could not connect to OpenAI API." - except client.RateLimitError as e: - print(f"OpenAI API Rate Limit Error: {e}") - return "Error: OpenAI API rate limit exceeded." - except client.APIStatusError as e: - print(f"OpenAI API Status Error: {e}") - return f"Error: OpenAI API returned an error status {e.status_code}." except Exception as e: - print(f"An unexpected error occurred during OpenAI API call: {e}") - return "Error: An unexpected error occurred during image analysis." + if "APIConnectionError" in str(type(e)): + print(f"OpenAI API Connection Error: {e}") + return "Error: Could not connect to OpenAI API." + elif "RateLimitError" in str(type(e)): + print(f"OpenAI API Rate Limit Error: {e}") + return "Error: OpenAI API rate limit exceeded." + elif "AuthenticationError" in str(type(e)): + print(f"OpenAI API Authentication Error: {e}") + return "Error: Invalid OpenAI API key. Please check your .env file." + elif "APIStatusError" in str(type(e)): + print(f"OpenAI API Status Error: {e}") + return f"Error: OpenAI API returned an error status." + else: + print(f"An unexpected error occurred during OpenAI API call: {e}") + return "Error: An unexpected error occurred during image analysis." def main(): diff --git a/requirements.txt b/requirements.txt index 1c9e847..dd9aee1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,8 +26,9 @@ prompt-toolkit==3.0.51 ptyprocess==0.7.0 pure-eval==0.2.3 pydantic==2.11.5 -pydantic_core==2.34.1 +pydantic_core==2.33.2 Pygments==2.19.1 +python-dotenv==1.0.0 requests==2.32.3 simpleaudio==1.0.4 six==1.17.0