Remove image-analyze, image-edit, image-generation skills

2026-03-22 11:08:12 +07:00
parent 12515acd5d
commit 1d2a5514d4
15 changed files with 3 additions and 749 deletions
--- a/skills/image-analyze/SKILL.md
+++ b/skills/image-analyze/SKILL.md
@@ -1,57 +0,0 @@
---
-name: image-analyze
-description: Analyze images using vision AI when the current model doesn't support image input. Use this skill when you need to understand, describe, or extract information from images.
---
-
-# Image Analyze
-
-Analyze images with vision AI via `python3 scripts/analyze_image.py <image_path> [prompt]`.
-
-## Commands
-
-| Command | Args | Description |
-|---------|------|-------------|
-| `analyze` | `<image_path> [prompt]` | Analyze image with optional custom prompt |
-
-## Options
-
-| Option | Default | Description |
-|--------|---------|-------------|
-| `--max-tokens` | 1024 | Maximum tokens in response |
-| `--temperature` | 0.7 | Response creativity (0-2) |
-| `--model` | moonshotai/Kimi-K2.5-TEE | Vision model to use |
-
-## Examples
-
-```bash
-# Basic analysis
-python3 scripts/analyze_image.py photo.jpg
-
-# With custom prompt
-python3 scripts/analyze_image.py diagram.png "Extract all text and explain the workflow"
-
-# Detailed analysis
-python3 scripts/analyze_image.py screenshot.png "Describe all UI elements and their positions"
-
-# OCR-like extraction
-python3 scripts/analyze_image.py document.jpg "Transcribe all text exactly as shown"
-```
-
-## Workflow
-
-1. Provide image path (PNG, JPG, JPEG, GIF, WEBP, BMP)
-2. Optionally provide custom analysis prompt
-3. Script converts image to base64 and sends to vision API
-4. Returns detailed analysis text
-
-## Output Format
-
- Success: Analysis text directly
- Error: `Error: message` (to stderr)
-
-## Notes
-
- Requires `CHUTES_API_TOKEN` in environment
- Uses Kimi-K2.5-TEE vision model via Chutes AI
- Supports common image formats
- Best for: image description, OCR, UI analysis, diagram interpretation
--- a/skills/image-analyze/scripts/.env.example
+++ b/skills/image-analyze/scripts/.env.example
@@ -1,7 +0,0 @@
-# Chutes AI API Token
-# Same token as image-generation and image-edit skills
-# Get your token from your Chutes AI account
-#
-# WARNING: Never commit actual credentials!
-
-CHUTES_API_TOKEN=your_chutes_api_token_here
--- a/skills/image-analyze/scripts/analyze_image.py
+++ b/skills/image-analyze/scripts/analyze_image.py
@@ -1,146 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import sys
-import argparse
-import base64
-from pathlib import Path
-import requests
-
-
-def load_env():
-    env_path = Path(__file__).parent / ".env"
-    if env_path.exists():
-        for line in env_path.read_text().splitlines():
-            line = line.strip()
-            if line and not line.startswith("#") and "=" in line:
-                k, v = line.split("=", 1)
-                os.environ.setdefault(k.strip(), v.strip().strip("\"'"))
-
-
-load_env()
-
-API_TOKEN = os.environ.get("CHUTES_API_TOKEN")
-API_URL = "https://llm.chutes.ai/v1/chat/completions"
-DEFAULT_MODEL = "moonshotai/Kimi-K2.5-TEE"
-
-
-def image_to_base64_url(image_path):
-    if not os.path.exists(image_path):
-        raise FileNotFoundError(f"Image file not found: {image_path}")
-
-    suffix = Path(image_path).suffix.lower()
-    mime_types = {
-        ".png": "image/png",
-        ".jpg": "image/jpeg",
-        ".jpeg": "image/jpeg",
-        ".gif": "image/gif",
-        ".webp": "image/webp",
-        ".bmp": "image/bmp",
-    }
-
-    mime_type = mime_types.get(suffix, "image/jpeg")
-
-    with open(image_path, "rb") as f:
-        image_bytes = f.read()
-
-    encoded = base64.b64encode(image_bytes).decode("utf-8")
-    return f"data:{mime_type};base64,{encoded}"
-
-
-def analyze_image(
-    image_path,
-    prompt="Analyze this image in detail. Describe what you see, including objects, people, text, colors, composition, and any relevant context.",
-    max_tokens=1024,
-    temperature=0.7,
-    model=None,
-):
-    if not API_TOKEN:
-        print("Error: CHUTES_API_TOKEN not set in environment", file=sys.stderr)
-        sys.exit(1)
-
-    if not os.path.exists(image_path):
-        print(f"Error: Image file not found: {image_path}", file=sys.stderr)
-        sys.exit(1)
-
-    image_url = image_to_base64_url(image_path)
-
-    use_model = model or DEFAULT_MODEL
-
-    payload = {
-        "model": use_model,
-        "messages": [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": prompt},
-                    {"type": "image_url", "image_url": {"url": image_url}},
-                ],
-            }
-        ],
-        "max_tokens": max_tokens,
-        "temperature": temperature,
-        "stream": False,
-    }
-
-    try:
-        headers = {
-            "Authorization": f"Bearer {API_TOKEN}",
-            "Content-Type": "application/json",
-        }
-
-        response = requests.post(API_URL, headers=headers, json=payload, timeout=120)
-        response.raise_for_status()
-
-        result = response.json()
-
-        if "choices" in result and len(result["choices"]) > 0:
-            content = result["choices"][0].get("message", {}).get("content", "")
-            if content:
-                print(content)
-            else:
-                print("Error: No content in response", file=sys.stderr)
-                sys.exit(1)
-        else:
-            print("Error: Invalid response format", file=sys.stderr)
-            sys.exit(1)
-
-    except requests.exceptions.RequestException as e:
-        print(f"Error: API request failed - {e}", file=sys.stderr)
-        sys.exit(1)
-    except Exception as e:
-        print(f"Error: {e}", file=sys.stderr)
-        sys.exit(1)
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Analyze images with vision AI")
-    parser.add_argument("image_path", help="Path to image file")
-    parser.add_argument("prompt", nargs="?", default="", help="Custom analysis prompt")
-    parser.add_argument(
-        "--max-tokens", type=int, default=1024, help="Max tokens in response"
-    )
-    parser.add_argument(
-        "--temperature", type=float, default=0.7, help="Response creativity (0-2)"
-    )
-    parser.add_argument("--model", type=str, default=None, help="Vision model to use")
-
-    args = parser.parse_args()
-
-    prompt = (
-        args.prompt
-        if args.prompt
-        else "Analyze this image in detail. Describe what you see, including objects, people, text, colors, composition, and any relevant context."
-    )
-
-    analyze_image(
-        image_path=args.image_path,
-        prompt=prompt,
-        max_tokens=args.max_tokens,
-        temperature=args.temperature,
-        model=args.model,
-    )
-
-
-if __name__ == "__main__":
-    main()
--- a/skills/image-analyze/scripts/requirements.txt
+++ b/skills/image-analyze/scripts/requirements.txt
@@ -1 +0,0 @@
-requests>=2.28.0
--- a/skills/image-edit/SKILL.md
+++ b/skills/image-edit/SKILL.md
@@ -1,63 +0,0 @@
---
-name: image-edit
-description: Edit images using AI with text prompts and input images. Use this skill when the user wants to modify or transform an existing image with AI editing.
---
-
-# Image Edit
-
-Edit images with AI by combining source images with text prompts via `python3 scripts/image_edit.py edit <prompt> <image_path> [options]`.
-
-## Commands
-
-| Command | Args | Description |
-|---------|------|-------------|
-| `edit` | `<prompt> <image_path> [--width W] [--height H] [--steps N] [--cfg-scale N]` | Edit image with prompt |
-
-## Options
-
-| Option | Default | Range | Description |
-|--------|---------|-------|-------------|
-| `--width` | 1024 | 128-2048 | Output image width in pixels |
-| `--height` | 1024 | 128-2048 | Output image height in pixels |
-| `--steps` | 40 | 5-100 | Number of inference steps |
-| `--seed` | null | 0-4294967295 | Random seed (null = random) |
-| `--cfg-scale` | 4 | 0-10 | True CFG scale for guidance |
-| `--negative-prompt` | "" | - | Negative prompt to avoid |
-
-## Examples
-
-```bash
-# Basic edit
-python3 scripts/image_edit.py edit "make it look like oil painting" photo.jpg
-
-# Style transfer
-python3 scripts/image_edit.py edit "convert to anime style" portrait.png
-
-# Object modification
-python3 scripts/image_edit.py edit "change the car color to red" street.jpg --steps 50
-
-# With negative prompt
-python3 scripts/image_edit.py edit "add a sunset background" landscape.png --negative-prompt "water, ocean"
-```
-
-## Workflow
-
-1. Provide a `prompt` describing the desired edit
-2. Provide an `image_path` to the source image (PNG, JPG, etc.)
-3. Script converts image to base64 and sends to API
-4. Saves edited image as `edited_[timestamp].jpg`
-5. Returns image path: `edited_1234567890.jpg [12345]`
-
-## Output Format
-
- Success: `Image saved: filename.jpg [id]`
- Error: `Error: message` (to stderr)
- Images saved to current working directory as JPEG files
-
-## Notes
-
- Requires `CHUTES_API_TOKEN` in environment
- Supports up to 3 input images (currently uses first image)
- Input file must be a valid image format (PNG, JPG, etc.)
- Output is always JPEG format to save memory
- Images are saved locally, not returned as base64 to save memory
--- a/skills/image-edit/scripts/.env.example
+++ b/skills/image-edit/scripts/.env.example
@@ -1,7 +0,0 @@
-# Chutes AI API Token
-# Get your token from your Chutes AI account
-#
-# WARNING: Never commit this file with actual credentials!
-# Keep your .env file private and add it to .gitignore
-
-CHUTES_API_TOKEN=your_chutes_api_token_here
--- a/skills/image-edit/scripts/image_edit.py
+++ b/skills/image-edit/scripts/image_edit.py
@@ -1,165 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import sys
-import argparse
-import time
-import base64
-from pathlib import Path
-import requests
-
-
-def load_env():
-    env_path = Path(__file__).parent / ".env"
-    if env_path.exists():
-        for line in env_path.read_text().splitlines():
-            line = line.strip()
-            if line and not line.startswith("#") and "=" in line:
-                k, v = line.split("=", 1)
-                os.environ.setdefault(k.strip(), v.strip().strip("\"'"))
-
-
-load_env()
-
-API_TOKEN = os.environ.get("CHUTES_API_TOKEN")
-API_URL = "https://chutes-qwen-image-edit-2511.chutes.ai/generate"
-
-
-def image_to_base64(image_path):
-    if not os.path.exists(image_path):
-        raise FileNotFoundError(f"Image file not found: {image_path}")
-
-    with open(image_path, "rb") as f:
-        image_bytes = f.read()
-
-    return base64.b64encode(image_bytes).decode("utf-8")
-
-
-def edit_image(
-    prompt,
-    image_path,
-    width=1024,
-    height=1024,
-    steps=40,
-    seed=None,
-    cfg_scale=4,
-    negative_prompt="",
-):
-    if not API_TOKEN:
-        print("Error: CHUTES_API_TOKEN not set in environment", file=sys.stderr)
-        sys.exit(1)
-
-    if not os.path.exists(image_path):
-        print(f"Error: Image file not found: {image_path}", file=sys.stderr)
-        sys.exit(1)
-
-    if not prompt:
-        print("Error: Prompt cannot be empty", file=sys.stderr)
-        sys.exit(1)
-
-    image_b64 = image_to_base64(image_path)
-
-    payload = {
-        "seed": seed,
-        "width": width,
-        "height": height,
-        "prompt": prompt,
-        "image_b64s": [image_b64],
-        "true_cfg_scale": cfg_scale,
-        "negative_prompt": negative_prompt,
-        "num_inference_steps": steps,
-    }
-
-    try:
-        headers = {
-            "Authorization": f"Bearer {API_TOKEN}",
-            "Content-Type": "application/json",
-        }
-
-        response = requests.post(API_URL, headers=headers, json=payload, timeout=300)
-        response.raise_for_status()
-
-        content_type = response.headers.get("Content-Type", "")
-
-        if "image/" in content_type:
-            image_bytes = response.content
-        else:
-            result = response.json()
-            if isinstance(result, list) and len(result) > 0:
-                item = result[0]
-                image_data = item.get("data", "")
-                if image_data.startswith("data:image"):
-                    image_bytes = base64.b64decode(image_data.split(",", 1)[1])
-                else:
-                    image_bytes = base64.b64decode(image_data)
-            else:
-                print("Error: Invalid response format", file=sys.stderr)
-                sys.exit(1)
-
-        timestamp = int(time.time())
-        filename = f"edited_{timestamp}.jpg"
-
-        with open(filename, "wb") as f:
-            f.write(image_bytes)
-
-        print(f"Image saved: {filename} [{timestamp}]")
-
-    except requests.exceptions.RequestException as e:
-        print(f"Error: API request failed - {e}", file=sys.stderr)
-        sys.exit(1)
-    except Exception as e:
-        print(f"Error: {e}", file=sys.stderr)
-        sys.exit(1)
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Edit images with AI")
-    parser.add_argument("prompt", help="Text prompt describing the edit")
-    parser.add_argument("image_path", help="Path to input image file")
-    parser.add_argument(
-        "--width", type=int, default=1024, help="Output width (128-2048)"
-    )
-    parser.add_argument(
-        "--height", type=int, default=1024, help="Output height (128-2048)"
-    )
-    parser.add_argument("--steps", type=int, default=40, help="Inference steps (5-100)")
-    parser.add_argument("--seed", type=int, default=None, help="Random seed")
-    parser.add_argument(
-        "--cfg-scale", type=float, default=4, help="True CFG scale (0-10)"
-    )
-    parser.add_argument(
-        "--negative-prompt", type=str, default="", help="Negative prompt"
-    )
-
-    args = parser.parse_args()
-
-    if not (128 <= args.width <= 2048):
-        print("Error: width must be between 128 and 2048", file=sys.stderr)
-        sys.exit(1)
-    if not (128 <= args.height <= 2048):
-        print("Error: height must be between 128 and 2048", file=sys.stderr)
-        sys.exit(1)
-    if not (5 <= args.steps <= 100):
-        print("Error: steps must be between 5 and 100", file=sys.stderr)
-        sys.exit(1)
-    if args.seed is not None and not (0 <= args.seed <= 4294967295):
-        print("Error: seed must be between 0 and 4294967295", file=sys.stderr)
-        sys.exit(1)
-    if not (0 <= args.cfg_scale <= 10):
-        print("Error: cfg-scale must be between 0 and 10", file=sys.stderr)
-        sys.exit(1)
-
-    edit_image(
-        prompt=args.prompt,
-        image_path=args.image_path,
-        width=args.width,
-        height=args.height,
-        steps=args.steps,
-        seed=args.seed,
-        cfg_scale=args.cfg_scale,
-        negative_prompt=args.negative_prompt,
-    )
-
-
-if __name__ == "__main__":
-    main()
--- a/skills/image-edit/scripts/requirements.txt
+++ b/skills/image-edit/scripts/requirements.txt
@@ -1 +0,0 @@
-requests>=2.28.0
--- a/skills/image-generation/SKILL.md
+++ b/skills/image-generation/SKILL.md
@@ -1,61 +0,0 @@
---
-name: image-generation
-description: Generate images from text prompts using Chutes AI image generation. Use this skill when the user wants to create AI-generated images from descriptions.
---
-
-# Image Generation
-
-Generate AI images from text prompts via `python3 scripts/image_gen.py generate <prompt> [options]`.
-
-## Commands
-
-| Command | Args | Description |
-|---------|------|-------------|
-| `generate` | `<prompt> [--width W] [--height H] [--steps N] [--seed N]` | Generate image from prompt |
-
-## Options
-
-| Option | Default | Range | Description |
-|--------|---------|-------|-------------|
-| `--width` | 1024 | 576-2048 | Image width in pixels |
-| `--height` | 1024 | 576-2048 | Image height in pixels |
-| `--steps` | 9 | 1-100 | Number of inference steps |
-| `--seed` | null | 0-4294967295 | Random seed (null = random) |
-| `--guidance-scale` | 0 | 0-5 | Guidance scale for generation |
-| `--shift` | 3 | 1-10 | Shift parameter |
-| `--max-seq-len` | 512 | 256-2048 | Max sequence length |
-
-## Examples
-
-```bash
-# Basic generation
-python3 scripts/image_gen.py generate "a high quality photo of a sunrise over the mountains"
-
-# Custom dimensions
-python3 scripts/image_gen.py generate "a futuristic city at night" --width 1280 --height 720
-
-# With seed for reproducibility
-python3 scripts/image_gen.py generate "a cute cat sitting on a windowsill" --seed 42
-
-# High quality with more steps
-python3 scripts/image_gen.py generate "a detailed portrait of a woman in renaissance style" --steps 20
-```
-
-## Workflow
-
-1. Run `generate` with your prompt
-2. Script saves image as `generated_[timestamp].png`
-3. Returns image path: `generated_1234567890.png [12345]`
-
-## Output Format
-
- Success: `Image saved: filename.png [id]`
- Error: `Error: message` (to stderr)
- Images saved to current working directory as PNG files
-
-## Notes
-
- Requires `CHUTES_API_TOKEN` in environment
- Prompt length: 3-1200 characters
- Large images (2048x2048) take longer to generate
- Images are saved locally, not returned as base64 to save memory
--- a/skills/image-generation/scripts/.env.example
+++ b/skills/image-generation/scripts/.env.example
@@ -1,7 +0,0 @@
-# Chutes AI API Token
-# Get your token from your Chutes AI account
-#
-# WARNING: Never commit this file with actual credentials!
-# Keep your .env file private and add it to .gitignore
-
-CHUTES_API_TOKEN=your_chutes_api_token_here
--- a/skills/image-generation/scripts/image_gen.py
+++ b/skills/image-generation/scripts/image_gen.py
@@ -1,160 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import sys
-import argparse
-import time
-from pathlib import Path
-import requests
-import base64
-
-
-def load_env():
-    env_path = Path(__file__).parent / ".env"
-    if env_path.exists():
-        for line in env_path.read_text().splitlines():
-            line = line.strip()
-            if line and not line.startswith("#") and "=" in line:
-                k, v = line.split("=", 1)
-                os.environ.setdefault(k.strip(), v.strip().strip("\"'"))
-
-
-load_env()
-
-API_TOKEN = os.environ.get("CHUTES_API_TOKEN")
-API_URL = "https://chutes-z-image-turbo.chutes.ai/generate"
-
-
-def generate_image(
-    prompt,
-    width=1024,
-    height=1024,
-    steps=9,
-    seed=None,
-    guidance_scale=0,
-    shift=3,
-    max_seq_len=512,
-):
-    if not API_TOKEN:
-        print("Error: CHUTES_API_TOKEN not set in environment", file=sys.stderr)
-        sys.exit(1)
-
-    if not prompt or len(prompt) < 3:
-        print("Error: Prompt must be at least 3 characters", file=sys.stderr)
-        sys.exit(1)
-    if len(prompt) > 1200:
-        print(
-            "Error: Prompt exceeds maximum length of 1200 characters", file=sys.stderr
-        )
-        sys.exit(1)
-
-    payload = {
-        "prompt": prompt,
-        "width": width,
-        "height": height,
-        "num_inference_steps": steps,
-        "guidance_scale": guidance_scale,
-        "shift": shift,
-        "max_sequence_length": max_seq_len,
-        "seed": seed,
-    }
-
-    try:
-        headers = {
-            "Authorization": f"Bearer {API_TOKEN}",
-            "Content-Type": "application/json",
-        }
-
-        response = requests.post(API_URL, headers=headers, json=payload, timeout=300)
-        response.raise_for_status()
-
-        content_type = response.headers.get("Content-Type", "")
-
-        if "image/" in content_type:
-            image_bytes = response.content
-        else:
-            result = response.json()
-            if isinstance(result, list) and len(result) > 0:
-                item = result[0]
-                image_data = item.get("data", "")
-                if image_data.startswith("data:image"):
-                    image_bytes = base64.b64decode(image_data.split(",", 1)[1])
-                else:
-                    image_bytes = base64.b64decode(image_data)
-            else:
-                print("Error: Invalid response format", file=sys.stderr)
-                sys.exit(1)
-
-        timestamp = int(time.time())
-        filename = f"generated_{timestamp}.png"
-
-        with open(filename, "wb") as f:
-            f.write(image_bytes)
-
-        print(f"Image saved: {filename} [{timestamp}]")
-
-    except requests.exceptions.RequestException as e:
-        print(f"Error: API request failed - {e}", file=sys.stderr)
-        sys.exit(1)
-    except Exception as e:
-        print(f"Error: {e}", file=sys.stderr)
-        sys.exit(1)
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Generate images from text prompts")
-    parser.add_argument("prompt", help="Text prompt for image generation")
-    parser.add_argument(
-        "--width", type=int, default=1024, help="Image width (576-2048)"
-    )
-    parser.add_argument(
-        "--height", type=int, default=1024, help="Image height (576-2048)"
-    )
-    parser.add_argument("--steps", type=int, default=9, help="Inference steps (1-100)")
-    parser.add_argument("--seed", type=int, default=None, help="Random seed")
-    parser.add_argument(
-        "--guidance-scale", type=float, default=0, help="Guidance scale (0-5)"
-    )
-    parser.add_argument("--shift", type=float, default=3, help="Shift parameter (1-10)")
-    parser.add_argument(
-        "--max-seq-len", type=int, default=512, help="Max sequence length (256-2048)"
-    )
-
-    args = parser.parse_args()
-
-    if not (576 <= args.width <= 2048):
-        print("Error: width must be between 576 and 2048", file=sys.stderr)
-        sys.exit(1)
-    if not (576 <= args.height <= 2048):
-        print("Error: height must be between 576 and 2048", file=sys.stderr)
-        sys.exit(1)
-    if not (1 <= args.steps <= 100):
-        print("Error: steps must be between 1 and 100", file=sys.stderr)
-        sys.exit(1)
-    if args.seed is not None and not (0 <= args.seed <= 4294967295):
-        print("Error: seed must be between 0 and 4294967295", file=sys.stderr)
-        sys.exit(1)
-    if not (0 <= args.guidance_scale <= 5):
-        print("Error: guidance-scale must be between 0 and 5", file=sys.stderr)
-        sys.exit(1)
-    if not (1 <= args.shift <= 10):
-        print("Error: shift must be between 1 and 10", file=sys.stderr)
-        sys.exit(1)
-    if not (256 <= args.max_seq_len <= 2048):
-        print("Error: max-seq-len must be between 256 and 2048", file=sys.stderr)
-        sys.exit(1)
-
-    generate_image(
-        prompt=args.prompt,
-        width=args.width,
-        height=args.height,
-        steps=args.steps,
-        seed=args.seed,
-        guidance_scale=args.guidance_scale,
-        shift=args.shift,
-        max_seq_len=args.max_seq_len,
-    )
-
-
-if __name__ == "__main__":
-    main()
--- a/skills/image-generation/scripts/requirements.txt
+++ b/skills/image-generation/scripts/requirements.txt
@@ -1 +0,0 @@
-requests>=2.28.0