This commit is contained in:
taradepan 2023-12-18 10:22:30 +05:30
parent b44e7a1269
commit d234f73b46
2 changed files with 14 additions and 10 deletions

View File

@ -40,8 +40,10 @@ python capture.py
```
In another terminal, run the narrator:
```bash
```bash
python narrator.py
```
choose the model by selecting between 1 or 2.
Default model is GPT-4. To use Gemini Pro Vision:
```
python narrator.py -m gemini
```

View File

@ -8,11 +8,11 @@ import errno
from elevenlabs import generate, play, set_api_key, voices
import google.generativeai as genai
import PIL.Image
import argparse
client = OpenAI()
genai.configure(api_key = os.environ.get("GEMINI_API_KEY"))
set_api_key(os.environ.get("ELEVENLABS_API_KEY"))
@ -79,11 +79,12 @@ def analyze_image(base64_image, script):
def main():
parser = argparse.ArgumentParser(description="Image narration script with model selection.")
parser.add_argument("-m", "--model", choices=["gpt-4", "gemini"], default="gpt-4", help="Select the AI model (default: gpt-4)")
args = parser.parse_args()
script = []
model = input("Which model would you like to use? 1. GPT-4 Vision 2. Gemini Pro Vision \n")
if model == "1":
if args.model.lower() == "gpt-4":
print("using GPT-4 Vision")
print("👀 David is watching...")
@ -107,7 +108,8 @@ def main():
# wait for 5 seconds
time.sleep(5)
elif model == "2":
elif args.model.lower() == "gemini":
genai.configure(api_key = os.environ.get("GEMINI_API_KEY"))
print("using Gemini Pro Vision")
print("👀 David is watching...")
@ -137,7 +139,7 @@ def main():
time.sleep(5)
else:
print("Please enter a valid model number")
print("Please enter a valid argument")
if __name__ == "__main__":