Auto-sync from website-creator

2026-03-08 23:03:19 +07:00
commit 9be686f587
117 changed files with 24737 additions and 0 deletions
--- a/skills/image-analyze/SKILL.md
+++ b/skills/image-analyze/SKILL.md
@@ -0,0 +1,57 @@
+---
+name: image-analyze
+description: Analyze images using vision AI when the current model doesn't support image input. Use this skill when you need to understand, describe, or extract information from images.
+---
+
+# Image Analyze
+
+Analyze images with vision AI via `python3 scripts/analyze_image.py <image_path> [prompt]`.
+
+## Commands
+
+| Command | Args | Description |
+|---------|------|-------------|
+| `analyze` | `<image_path> [prompt]` | Analyze image with optional custom prompt |
+
+## Options
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--max-tokens` | 1024 | Maximum tokens in response |
+| `--temperature` | 0.7 | Response creativity (0-2) |
+| `--model` | moonshotai/Kimi-K2.5-TEE | Vision model to use |
+
+## Examples
+
+```bash
+# Basic analysis
+python3 scripts/analyze_image.py photo.jpg
+
+# With custom prompt
+python3 scripts/analyze_image.py diagram.png "Extract all text and explain the workflow"
+
+# Detailed analysis
+python3 scripts/analyze_image.py screenshot.png "Describe all UI elements and their positions"
+
+# OCR-like extraction
+python3 scripts/analyze_image.py document.jpg "Transcribe all text exactly as shown"
+```
+
+## Workflow
+
+1. Provide image path (PNG, JPG, JPEG, GIF, WEBP, BMP)
+2. Optionally provide custom analysis prompt
+3. Script converts image to base64 and sends to vision API
+4. Returns detailed analysis text
+
+## Output Format
+
+- Success: Analysis text directly
+- Error: `Error: message` (to stderr)
+
+## Notes
+
+- Requires `CHUTES_API_TOKEN` in environment
+- Uses Kimi-K2.5-TEE vision model via Chutes AI
+- Supports common image formats
+- Best for: image description, OCR, UI analysis, diagram interpretation
--- a/skills/image-analyze/scripts/.env.example
+++ b/skills/image-analyze/scripts/.env.example
@@ -0,0 +1,7 @@
+# Chutes AI API Token
+# Same token as image-generation and image-edit skills
+# Get your token from your Chutes AI account
+#
+# WARNING: Never commit actual credentials!
+
+CHUTES_API_TOKEN=your_chutes_api_token_here
--- a/skills/image-analyze/scripts/analyze_image.py
+++ b/skills/image-analyze/scripts/analyze_image.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import argparse
+import base64
+from pathlib import Path
+import requests
+
+
+def load_env():
+    env_path = Path(__file__).parent / ".env"
+    if env_path.exists():
+        for line in env_path.read_text().splitlines():
+            line = line.strip()
+            if line and not line.startswith("#") and "=" in line:
+                k, v = line.split("=", 1)
+                os.environ.setdefault(k.strip(), v.strip().strip("\"'"))
+
+
+load_env()
+
+API_TOKEN = os.environ.get("CHUTES_API_TOKEN")
+API_URL = "https://llm.chutes.ai/v1/chat/completions"
+DEFAULT_MODEL = "moonshotai/Kimi-K2.5-TEE"
+
+
+def image_to_base64_url(image_path):
+    if not os.path.exists(image_path):
+        raise FileNotFoundError(f"Image file not found: {image_path}")
+
+    suffix = Path(image_path).suffix.lower()
+    mime_types = {
+        ".png": "image/png",
+        ".jpg": "image/jpeg",
+        ".jpeg": "image/jpeg",
+        ".gif": "image/gif",
+        ".webp": "image/webp",
+        ".bmp": "image/bmp",
+    }
+
+    mime_type = mime_types.get(suffix, "image/jpeg")
+
+    with open(image_path, "rb") as f:
+        image_bytes = f.read()
+
+    encoded = base64.b64encode(image_bytes).decode("utf-8")
+    return f"data:{mime_type};base64,{encoded}"
+
+
+def analyze_image(
+    image_path,
+    prompt="Analyze this image in detail. Describe what you see, including objects, people, text, colors, composition, and any relevant context.",
+    max_tokens=1024,
+    temperature=0.7,
+    model=None,
+):
+    if not API_TOKEN:
+        print("Error: CHUTES_API_TOKEN not set in environment", file=sys.stderr)
+        sys.exit(1)
+
+    if not os.path.exists(image_path):
+        print(f"Error: Image file not found: {image_path}", file=sys.stderr)
+        sys.exit(1)
+
+    image_url = image_to_base64_url(image_path)
+
+    use_model = model or DEFAULT_MODEL
+
+    payload = {
+        "model": use_model,
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt},
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                ],
+            }
+        ],
+        "max_tokens": max_tokens,
+        "temperature": temperature,
+        "stream": False,
+    }
+
+    try:
+        headers = {
+            "Authorization": f"Bearer {API_TOKEN}",
+            "Content-Type": "application/json",
+        }
+
+        response = requests.post(API_URL, headers=headers, json=payload, timeout=120)
+        response.raise_for_status()
+
+        result = response.json()
+
+        if "choices" in result and len(result["choices"]) > 0:
+            content = result["choices"][0].get("message", {}).get("content", "")
+            if content:
+                print(content)
+            else:
+                print("Error: No content in response", file=sys.stderr)
+                sys.exit(1)
+        else:
+            print("Error: Invalid response format", file=sys.stderr)
+            sys.exit(1)
+
+    except requests.exceptions.RequestException as e:
+        print(f"Error: API request failed - {e}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Analyze images with vision AI")
+    parser.add_argument("image_path", help="Path to image file")
+    parser.add_argument("prompt", nargs="?", default="", help="Custom analysis prompt")
+    parser.add_argument(
+        "--max-tokens", type=int, default=1024, help="Max tokens in response"
+    )
+    parser.add_argument(
+        "--temperature", type=float, default=0.7, help="Response creativity (0-2)"
+    )
+    parser.add_argument("--model", type=str, default=None, help="Vision model to use")
+
+    args = parser.parse_args()
+
+    prompt = (
+        args.prompt
+        if args.prompt
+        else "Analyze this image in detail. Describe what you see, including objects, people, text, colors, composition, and any relevant context."
+    )
+
+    analyze_image(
+        image_path=args.image_path,
+        prompt=prompt,
+        max_tokens=args.max_tokens,
+        temperature=args.temperature,
+        model=args.model,
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/skills/image-analyze/scripts/requirements.txt
+++ b/skills/image-analyze/scripts/requirements.txt
@@ -0,0 +1 @@
+requests>=2.28.0