major changes
This commit is contained in:
parent
326757f4d2
commit
2ecf20fcfa
23
README.md
23
README.md
|
@ -18,12 +18,25 @@ source venv/bin/activate
|
||||||
Then, install the dependencies:
|
Then, install the dependencies:
|
||||||
`pip install -r requirements.txt`
|
`pip install -r requirements.txt`
|
||||||
|
|
||||||
Next, make accounts with [OpenAI](https://beta.openai.com/) and [ElevenLabs](https://elevenlabs.io/) and set your API key environment variables. The Python libraries used in this project will automatically detect and use these environment variables for authentication.
|
Next, make accounts with [OpenAI](https://beta.openai.com/) and [ElevenLabs](https://elevenlabs.io/) and set up your API keys.
|
||||||
|
|
||||||
|
Copy the example environment file and add your API keys:
|
||||||
```bash
|
```bash
|
||||||
export OPENAI_API_KEY=<your-openai-api-key>
|
cp env.example .env
|
||||||
export ELEVENLABS_API_KEY=<your-elevenlabs-api-key>
|
```
|
||||||
export ELEVEN_VOICE_ID=<your-elevenlabs-voice-id> # Optional, see note below
|
|
||||||
|
Then edit the `.env` file with your actual API keys:
|
||||||
|
```bash
|
||||||
|
# Copy this file to .env and replace with your actual API keys
|
||||||
|
|
||||||
|
# OpenAI API Key - Get from https://beta.openai.com/
|
||||||
|
OPENAI_API_KEY=your-openai-api-key-here
|
||||||
|
|
||||||
|
# ElevenLabs API Key - Get from https://elevenlabs.io/
|
||||||
|
ELEVENLABS_API_KEY=your-elevenlabs-api-key-here
|
||||||
|
|
||||||
|
# ElevenLabs Voice ID (Optional) - If not set, defaults to "21m00Tcm4TlvDq8ikWAM"
|
||||||
|
ELEVEN_VOICE_ID=your-elevenlabs-voice-id-here
|
||||||
```
|
```
|
||||||
|
|
||||||
**Note on API Keys and Voice ID:**
|
**Note on API Keys and Voice ID:**
|
||||||
|
@ -31,6 +44,8 @@ export ELEVEN_VOICE_ID=<your-elevenlabs-voice-id> # Optional, see note below
|
||||||
* `ELEVENLABS_API_KEY`: Your API key from ElevenLabs, used for text-to-speech.
|
* `ELEVENLABS_API_KEY`: Your API key from ElevenLabs, used for text-to-speech.
|
||||||
* `ELEVEN_VOICE_ID`: This environment variable allows you to specify a custom voice from your ElevenLabs account. If this variable is not set, the application will default to using the voice ID "21m00Tcm4TlvDq8ikWAM". You can find your available voice IDs using the ElevenLabs [voices API](https://elevenlabs.io/docs/api-reference/voices) or by checking your account on their website. To use a custom voice, make a new voice in your ElevenLabs account and get its voice ID.
|
* `ELEVEN_VOICE_ID`: This environment variable allows you to specify a custom voice from your ElevenLabs account. If this variable is not set, the application will default to using the voice ID "21m00Tcm4TlvDq8ikWAM". You can find your available voice IDs using the ElevenLabs [voices API](https://elevenlabs.io/docs/api-reference/voices) or by checking your account on their website. To use a custom voice, make a new voice in your ElevenLabs account and get its voice ID.
|
||||||
|
|
||||||
|
The application now reads these values from a `.env` file, which keeps your sensitive API keys secure and out of version control.
|
||||||
|
|
||||||
## Run it!
|
## Run it!
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
71
capture.py
71
capture.py
|
@ -1,50 +1,74 @@
|
||||||
import cv2
|
import cv2
|
||||||
import time
|
import time
|
||||||
from PIL import Image
|
from PIL import Image, ImageGrab
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
import os
|
||||||
|
import glob
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
# Folder
|
# Folder
|
||||||
folder = "frames"
|
folder = "frames"
|
||||||
|
MAX_IMAGES = 10
|
||||||
|
|
||||||
# Create the frames folder if it doesn't exist
|
# Create the frames folder if it doesn't exist
|
||||||
frames_dir = os.path.join(os.getcwd(), folder)
|
frames_dir = os.path.join(os.getcwd(), folder)
|
||||||
os.makedirs(frames_dir, exist_ok=True)
|
os.makedirs(frames_dir, exist_ok=True)
|
||||||
|
|
||||||
# Initialize the webcam
|
def cleanup_old_images():
|
||||||
cap = cv2.VideoCapture(0)
|
"""Keep only the 10 most recent frame images"""
|
||||||
|
frame_files = glob.glob(os.path.join(frames_dir, "frame_*.jpg"))
|
||||||
|
frame_files.sort(key=os.path.getctime, reverse=True) # Sort by creation time, newest first
|
||||||
|
|
||||||
# Check if the webcam is opened correctly
|
# Remove older files if we have more than MAX_IMAGES
|
||||||
if not cap.isOpened():
|
for old_file in frame_files[MAX_IMAGES:]:
|
||||||
raise IOError("Cannot open webcam")
|
try:
|
||||||
|
os.remove(old_file)
|
||||||
|
print(f"Removed old frame: {os.path.basename(old_file)}")
|
||||||
|
except OSError as e:
|
||||||
|
print(f"Error removing old frame {old_file}: {e}")
|
||||||
|
|
||||||
# Wait for the camera to initialize and adjust light levels
|
def get_latest_frame_path():
|
||||||
time.sleep(2)
|
"""Get the path of the most recent frame file"""
|
||||||
|
frame_files = glob.glob(os.path.join(frames_dir, "frame_*.jpg"))
|
||||||
|
if frame_files:
|
||||||
|
return max(frame_files, key=os.path.getctime)
|
||||||
|
return None
|
||||||
|
|
||||||
print("📸 Starting image capture... Say cheese!")
|
print("📸 Starting screenshot capture... Capture is watching your screen!")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
ret, frame = cap.read()
|
try:
|
||||||
if ret:
|
# Take a screenshot
|
||||||
# Convert the frame to a PIL image
|
screenshot = ImageGrab.grab()
|
||||||
pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
|
||||||
|
|
||||||
# Resize the image
|
# Resize the image
|
||||||
max_size = 250
|
max_size = 250
|
||||||
ratio = max_size / max(pil_img.size)
|
ratio = max_size / max(screenshot.size)
|
||||||
new_size = tuple([int(x*ratio) for x in pil_img.size])
|
new_size = tuple([int(x*ratio) for x in screenshot.size])
|
||||||
resized_img = pil_img.resize(new_size, Image.LANCZOS)
|
resized_img = screenshot.resize(new_size, Image.LANCZOS)
|
||||||
|
|
||||||
# Convert the PIL image back to an OpenCV image
|
# Convert the PIL image to an OpenCV image for saving
|
||||||
frame = cv2.cvtColor(np.array(resized_img), cv2.COLOR_RGB2BGR)
|
frame = cv2.cvtColor(np.array(resized_img), cv2.COLOR_RGB2BGR)
|
||||||
|
|
||||||
# Save the frame as an image file
|
# Generate timestamped filename
|
||||||
tmp_path = os.path.join(frames_dir, "frame.tmp.jpg")
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
final_path = os.path.join(frames_dir, "frame.jpg")
|
frame_filename = f"frame_{timestamp}.jpg"
|
||||||
|
tmp_path = os.path.join(frames_dir, f"frame_{timestamp}.tmp.jpg")
|
||||||
|
final_path = os.path.join(frames_dir, frame_filename)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cv2.imwrite(tmp_path, frame)
|
cv2.imwrite(tmp_path, frame)
|
||||||
os.rename(tmp_path, final_path)
|
os.rename(tmp_path, final_path)
|
||||||
|
|
||||||
|
# Also create/update a symlink to the latest frame for backward compatibility
|
||||||
|
latest_link = os.path.join(frames_dir, "frame.jpg")
|
||||||
|
if os.path.exists(latest_link) or os.path.islink(latest_link):
|
||||||
|
os.remove(latest_link)
|
||||||
|
os.symlink(frame_filename, latest_link)
|
||||||
|
|
||||||
|
# Clean up old images
|
||||||
|
cleanup_old_images()
|
||||||
|
|
||||||
except cv2.error as e:
|
except cv2.error as e:
|
||||||
print(f"OpenCV error: Failed to write image: {e}")
|
print(f"OpenCV error: Failed to write image: {e}")
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
|
@ -52,12 +76,11 @@ while True:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"An unexpected error occurred during file operation: {e}")
|
print(f"An unexpected error occurred during file operation: {e}")
|
||||||
|
|
||||||
else:
|
except Exception as e:
|
||||||
print("Failed to capture image")
|
print(f"Failed to capture screenshot: {e}")
|
||||||
|
|
||||||
# Wait for 2 seconds
|
# Wait for 2 seconds
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
# Release the camera and close all windows
|
# Cleanup
|
||||||
cap.release()
|
|
||||||
cv2.destroyAllWindows()
|
cv2.destroyAllWindows()
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
||||||
|
shell-init: error retrieving current directory: getcwd: cannot access parent directories: Operation not permitted
|
||||||
|
/bin/bash: /Users/roshanvenugopal/Documents/github/narrator/launch_capture.sh: Operation not permitted
|
|
@ -0,0 +1,11 @@
|
||||||
|
# Copy this file to .env and replace with your actual API keys
|
||||||
|
|
||||||
|
# OpenAI API Key - Get from https://beta.openai.com/
|
||||||
|
OPENAI_API_KEY=your-openai-api-key-here
|
||||||
|
|
||||||
|
# ElevenLabs API Key - Get from https://elevenlabs.io/
|
||||||
|
ELEVENLABS_API_KEY=your-elevenlabs-api-key-here
|
||||||
|
|
||||||
|
# ElevenLabs Voice ID (Optional) - If not set, defaults to "21m00Tcm4TlvDq8ikWAM"
|
||||||
|
# You can find available voice IDs at https://elevenlabs.io/docs/api-reference/voices
|
||||||
|
ELEVEN_VOICE_ID=your-elevenlabs-voice-id-here
|
|
@ -0,0 +1,8 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
cd /Users/roshanvenugopal/Documents/github/narrator
|
||||||
|
|
||||||
|
# Activate virtual environment and run capture
|
||||||
|
source venv/bin/activate
|
||||||
|
python capture.py
|
36
narrator.py
36
narrator.py
|
@ -1,13 +1,21 @@
|
||||||
import os
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import simpleaudio as sa
|
import simpleaudio as sa
|
||||||
import errno
|
import errno
|
||||||
from elevenlabs import generate, play, voices
|
from elevenlabs.client import ElevenLabs
|
||||||
|
from elevenlabs import play
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
client = OpenAI()
|
client = OpenAI()
|
||||||
|
elevenlabs_client = ElevenLabs(
|
||||||
|
api_key=os.environ.get("ELEVENLABS_API_KEY")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def encode_image(image_path, retries=3, delay=0.1):
|
def encode_image(image_path, retries=3, delay=0.1):
|
||||||
|
@ -32,8 +40,12 @@ def encode_image(image_path, retries=3, delay=0.1):
|
||||||
def play_audio(text):
|
def play_audio(text):
|
||||||
try:
|
try:
|
||||||
voice_id = os.environ.get("ELEVEN_VOICE_ID", "21m00Tcm4TlvDq8ikWAM")
|
voice_id = os.environ.get("ELEVEN_VOICE_ID", "21m00Tcm4TlvDq8ikWAM")
|
||||||
# audio = generate(text=text, voice="ENfvYmv6CRqDodDZTieQ", model="eleven_turbo_v2")
|
# Generate audio using the new ElevenLabs client API
|
||||||
audio = generate(text=text, voice=voice_id, model="eleven_turbo_v2")
|
audio = elevenlabs_client.generate(
|
||||||
|
text=text,
|
||||||
|
voice=voice_id,
|
||||||
|
model="eleven_turbo_v2"
|
||||||
|
)
|
||||||
except Exception as e: # Replace with specific ElevenLabs APIError if available
|
except Exception as e: # Replace with specific ElevenLabs APIError if available
|
||||||
print(f"Error generating audio with ElevenLabs: {e}")
|
print(f"Error generating audio with ElevenLabs: {e}")
|
||||||
return
|
return
|
||||||
|
@ -62,7 +74,9 @@ def generate_new_line(base64_image):
|
||||||
{"type": "text", "text": "Describe this image"},
|
{"type": "text", "text": "Describe this image"},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": f"data:image/jpeg;base64,{base64_image}",
|
"image_url": {
|
||||||
|
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||||
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
@ -91,16 +105,20 @@ def analyze_image(base64_image, script):
|
||||||
)
|
)
|
||||||
response_text = response.choices[0].message.content
|
response_text = response.choices[0].message.content
|
||||||
return response_text
|
return response_text
|
||||||
except client.APIConnectionError as e:
|
except Exception as e:
|
||||||
|
if "APIConnectionError" in str(type(e)):
|
||||||
print(f"OpenAI API Connection Error: {e}")
|
print(f"OpenAI API Connection Error: {e}")
|
||||||
return "Error: Could not connect to OpenAI API."
|
return "Error: Could not connect to OpenAI API."
|
||||||
except client.RateLimitError as e:
|
elif "RateLimitError" in str(type(e)):
|
||||||
print(f"OpenAI API Rate Limit Error: {e}")
|
print(f"OpenAI API Rate Limit Error: {e}")
|
||||||
return "Error: OpenAI API rate limit exceeded."
|
return "Error: OpenAI API rate limit exceeded."
|
||||||
except client.APIStatusError as e:
|
elif "AuthenticationError" in str(type(e)):
|
||||||
|
print(f"OpenAI API Authentication Error: {e}")
|
||||||
|
return "Error: Invalid OpenAI API key. Please check your .env file."
|
||||||
|
elif "APIStatusError" in str(type(e)):
|
||||||
print(f"OpenAI API Status Error: {e}")
|
print(f"OpenAI API Status Error: {e}")
|
||||||
return f"Error: OpenAI API returned an error status {e.status_code}."
|
return f"Error: OpenAI API returned an error status."
|
||||||
except Exception as e:
|
else:
|
||||||
print(f"An unexpected error occurred during OpenAI API call: {e}")
|
print(f"An unexpected error occurred during OpenAI API call: {e}")
|
||||||
return "Error: An unexpected error occurred during image analysis."
|
return "Error: An unexpected error occurred during image analysis."
|
||||||
|
|
||||||
|
|
|
@ -26,8 +26,9 @@ prompt-toolkit==3.0.51
|
||||||
ptyprocess==0.7.0
|
ptyprocess==0.7.0
|
||||||
pure-eval==0.2.3
|
pure-eval==0.2.3
|
||||||
pydantic==2.11.5
|
pydantic==2.11.5
|
||||||
pydantic_core==2.34.1
|
pydantic_core==2.33.2
|
||||||
Pygments==2.19.1
|
Pygments==2.19.1
|
||||||
|
python-dotenv==1.0.0
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
simpleaudio==1.0.4
|
simpleaudio==1.0.4
|
||||||
six==1.17.0
|
six==1.17.0
|
||||||
|
|
Loading…
Reference in New Issue