#!/usr/bin/env python3 import os import sys import argparse import base64 from pathlib import Path import requests def load_env(): env_path = Path(__file__).parent / ".env" if env_path.exists(): for line in env_path.read_text().splitlines(): line = line.strip() if line and not line.startswith("#") and "=" in line: k, v = line.split("=", 1) os.environ.setdefault(k.strip(), v.strip().strip("\"'")) load_env() API_TOKEN = os.environ.get("CHUTES_API_TOKEN") API_URL = "https://llm.chutes.ai/v1/chat/completions" DEFAULT_MODEL = "moonshotai/Kimi-K2.5-TEE" def image_to_base64_url(image_path): if not os.path.exists(image_path): raise FileNotFoundError(f"Image file not found: {image_path}") suffix = Path(image_path).suffix.lower() mime_types = { ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".gif": "image/gif", ".webp": "image/webp", ".bmp": "image/bmp", } mime_type = mime_types.get(suffix, "image/jpeg") with open(image_path, "rb") as f: image_bytes = f.read() encoded = base64.b64encode(image_bytes).decode("utf-8") return f"data:{mime_type};base64,{encoded}" def analyze_image( image_path, prompt="Analyze this image in detail. Describe what you see, including objects, people, text, colors, composition, and any relevant context.", max_tokens=1024, temperature=0.7, model=None, ): if not API_TOKEN: print("Error: CHUTES_API_TOKEN not set in environment", file=sys.stderr) sys.exit(1) if not os.path.exists(image_path): print(f"Error: Image file not found: {image_path}", file=sys.stderr) sys.exit(1) image_url = image_to_base64_url(image_path) use_model = model or DEFAULT_MODEL payload = { "model": use_model, "messages": [ { "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": image_url}}, ], } ], "max_tokens": max_tokens, "temperature": temperature, "stream": False, } try: headers = { "Authorization": f"Bearer {API_TOKEN}", "Content-Type": "application/json", } response = requests.post(API_URL, headers=headers, json=payload, timeout=120) response.raise_for_status() result = response.json() if "choices" in result and len(result["choices"]) > 0: content = result["choices"][0].get("message", {}).get("content", "") if content: print(content) else: print("Error: No content in response", file=sys.stderr) sys.exit(1) else: print("Error: Invalid response format", file=sys.stderr) sys.exit(1) except requests.exceptions.RequestException as e: print(f"Error: API request failed - {e}", file=sys.stderr) sys.exit(1) except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) def main(): parser = argparse.ArgumentParser(description="Analyze images with vision AI") parser.add_argument("image_path", help="Path to image file") parser.add_argument("prompt", nargs="?", default="", help="Custom analysis prompt") parser.add_argument( "--max-tokens", type=int, default=1024, help="Max tokens in response" ) parser.add_argument( "--temperature", type=float, default=0.7, help="Response creativity (0-2)" ) parser.add_argument("--model", type=str, default=None, help="Vision model to use") args = parser.parse_args() prompt = ( args.prompt if args.prompt else "Analyze this image in detail. Describe what you see, including objects, people, text, colors, composition, and any relevant context." ) analyze_image( image_path=args.image_path, prompt=prompt, max_tokens=args.max_tokens, temperature=args.temperature, model=args.model, ) if __name__ == "__main__": main()