147 lines
4.2 KiB
Python
Executable File
147 lines
4.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import base64
|
|
from pathlib import Path
|
|
import requests
|
|
|
|
|
|
def load_env():
|
|
env_path = Path(__file__).parent / ".env"
|
|
if env_path.exists():
|
|
for line in env_path.read_text().splitlines():
|
|
line = line.strip()
|
|
if line and not line.startswith("#") and "=" in line:
|
|
k, v = line.split("=", 1)
|
|
os.environ.setdefault(k.strip(), v.strip().strip("\"'"))
|
|
|
|
|
|
load_env()
|
|
|
|
API_TOKEN = os.environ.get("CHUTES_API_TOKEN")
|
|
API_URL = "https://llm.chutes.ai/v1/chat/completions"
|
|
DEFAULT_MODEL = "moonshotai/Kimi-K2.5-TEE"
|
|
|
|
|
|
def image_to_base64_url(image_path):
|
|
if not os.path.exists(image_path):
|
|
raise FileNotFoundError(f"Image file not found: {image_path}")
|
|
|
|
suffix = Path(image_path).suffix.lower()
|
|
mime_types = {
|
|
".png": "image/png",
|
|
".jpg": "image/jpeg",
|
|
".jpeg": "image/jpeg",
|
|
".gif": "image/gif",
|
|
".webp": "image/webp",
|
|
".bmp": "image/bmp",
|
|
}
|
|
|
|
mime_type = mime_types.get(suffix, "image/jpeg")
|
|
|
|
with open(image_path, "rb") as f:
|
|
image_bytes = f.read()
|
|
|
|
encoded = base64.b64encode(image_bytes).decode("utf-8")
|
|
return f"data:{mime_type};base64,{encoded}"
|
|
|
|
|
|
def analyze_image(
|
|
image_path,
|
|
prompt="Analyze this image in detail. Describe what you see, including objects, people, text, colors, composition, and any relevant context.",
|
|
max_tokens=1024,
|
|
temperature=0.7,
|
|
model=None,
|
|
):
|
|
if not API_TOKEN:
|
|
print("Error: CHUTES_API_TOKEN not set in environment", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if not os.path.exists(image_path):
|
|
print(f"Error: Image file not found: {image_path}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
image_url = image_to_base64_url(image_path)
|
|
|
|
use_model = model or DEFAULT_MODEL
|
|
|
|
payload = {
|
|
"model": use_model,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": prompt},
|
|
{"type": "image_url", "image_url": {"url": image_url}},
|
|
],
|
|
}
|
|
],
|
|
"max_tokens": max_tokens,
|
|
"temperature": temperature,
|
|
"stream": False,
|
|
}
|
|
|
|
try:
|
|
headers = {
|
|
"Authorization": f"Bearer {API_TOKEN}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
response = requests.post(API_URL, headers=headers, json=payload, timeout=120)
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
|
|
|
if "choices" in result and len(result["choices"]) > 0:
|
|
content = result["choices"][0].get("message", {}).get("content", "")
|
|
if content:
|
|
print(content)
|
|
else:
|
|
print("Error: No content in response", file=sys.stderr)
|
|
sys.exit(1)
|
|
else:
|
|
print("Error: Invalid response format", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error: API request failed - {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Analyze images with vision AI")
|
|
parser.add_argument("image_path", help="Path to image file")
|
|
parser.add_argument("prompt", nargs="?", default="", help="Custom analysis prompt")
|
|
parser.add_argument(
|
|
"--max-tokens", type=int, default=1024, help="Max tokens in response"
|
|
)
|
|
parser.add_argument(
|
|
"--temperature", type=float, default=0.7, help="Response creativity (0-2)"
|
|
)
|
|
parser.add_argument("--model", type=str, default=None, help="Vision model to use")
|
|
|
|
args = parser.parse_args()
|
|
|
|
prompt = (
|
|
args.prompt
|
|
if args.prompt
|
|
else "Analyze this image in detail. Describe what you see, including objects, people, text, colors, composition, and any relevant context."
|
|
)
|
|
|
|
analyze_image(
|
|
image_path=args.image_path,
|
|
prompt=prompt,
|
|
max_tokens=args.max_tokens,
|
|
temperature=args.temperature,
|
|
model=args.model,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|