Remove image-analyze, image-edit, image-generation skills

This commit is contained in:
Kunthawat Greethong
2026-03-22 11:08:12 +07:00
parent 12515acd5d
commit 1d2a5514d4
15 changed files with 3 additions and 749 deletions

View File

@@ -1,57 +0,0 @@
---
name: image-analyze
description: Analyze images using vision AI when the current model doesn't support image input. Use this skill when you need to understand, describe, or extract information from images.
---
# Image Analyze
Analyze images with vision AI via `python3 scripts/analyze_image.py <image_path> [prompt]`.
## Commands
| Command | Args | Description |
|---------|------|-------------|
| `analyze` | `<image_path> [prompt]` | Analyze image with optional custom prompt |
## Options
| Option | Default | Description |
|--------|---------|-------------|
| `--max-tokens` | 1024 | Maximum tokens in response |
| `--temperature` | 0.7 | Response creativity (0-2) |
| `--model` | moonshotai/Kimi-K2.5-TEE | Vision model to use |
## Examples
```bash
# Basic analysis
python3 scripts/analyze_image.py photo.jpg
# With custom prompt
python3 scripts/analyze_image.py diagram.png "Extract all text and explain the workflow"
# Detailed analysis
python3 scripts/analyze_image.py screenshot.png "Describe all UI elements and their positions"
# OCR-like extraction
python3 scripts/analyze_image.py document.jpg "Transcribe all text exactly as shown"
```
## Workflow
1. Provide image path (PNG, JPG, JPEG, GIF, WEBP, BMP)
2. Optionally provide custom analysis prompt
3. Script converts image to base64 and sends to vision API
4. Returns detailed analysis text
## Output Format
- Success: Analysis text directly
- Error: `Error: message` (to stderr)
## Notes
- Requires `CHUTES_API_TOKEN` in environment
- Uses Kimi-K2.5-TEE vision model via Chutes AI
- Supports common image formats
- Best for: image description, OCR, UI analysis, diagram interpretation

View File

@@ -1,7 +0,0 @@
# Chutes AI API Token
# Same token as image-generation and image-edit skills
# Get your token from your Chutes AI account
#
# WARNING: Never commit actual credentials!
CHUTES_API_TOKEN=your_chutes_api_token_here

View File

@@ -1,146 +0,0 @@
#!/usr/bin/env python3
import os
import sys
import argparse
import base64
from pathlib import Path
import requests
def load_env():
env_path = Path(__file__).parent / ".env"
if env_path.exists():
for line in env_path.read_text().splitlines():
line = line.strip()
if line and not line.startswith("#") and "=" in line:
k, v = line.split("=", 1)
os.environ.setdefault(k.strip(), v.strip().strip("\"'"))
load_env()
API_TOKEN = os.environ.get("CHUTES_API_TOKEN")
API_URL = "https://llm.chutes.ai/v1/chat/completions"
DEFAULT_MODEL = "moonshotai/Kimi-K2.5-TEE"
def image_to_base64_url(image_path):
if not os.path.exists(image_path):
raise FileNotFoundError(f"Image file not found: {image_path}")
suffix = Path(image_path).suffix.lower()
mime_types = {
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".webp": "image/webp",
".bmp": "image/bmp",
}
mime_type = mime_types.get(suffix, "image/jpeg")
with open(image_path, "rb") as f:
image_bytes = f.read()
encoded = base64.b64encode(image_bytes).decode("utf-8")
return f"data:{mime_type};base64,{encoded}"
def analyze_image(
image_path,
prompt="Analyze this image in detail. Describe what you see, including objects, people, text, colors, composition, and any relevant context.",
max_tokens=1024,
temperature=0.7,
model=None,
):
if not API_TOKEN:
print("Error: CHUTES_API_TOKEN not set in environment", file=sys.stderr)
sys.exit(1)
if not os.path.exists(image_path):
print(f"Error: Image file not found: {image_path}", file=sys.stderr)
sys.exit(1)
image_url = image_to_base64_url(image_path)
use_model = model or DEFAULT_MODEL
payload = {
"model": use_model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_url}},
],
}
],
"max_tokens": max_tokens,
"temperature": temperature,
"stream": False,
}
try:
headers = {
"Authorization": f"Bearer {API_TOKEN}",
"Content-Type": "application/json",
}
response = requests.post(API_URL, headers=headers, json=payload, timeout=120)
response.raise_for_status()
result = response.json()
if "choices" in result and len(result["choices"]) > 0:
content = result["choices"][0].get("message", {}).get("content", "")
if content:
print(content)
else:
print("Error: No content in response", file=sys.stderr)
sys.exit(1)
else:
print("Error: Invalid response format", file=sys.stderr)
sys.exit(1)
except requests.exceptions.RequestException as e:
print(f"Error: API request failed - {e}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
def main():
parser = argparse.ArgumentParser(description="Analyze images with vision AI")
parser.add_argument("image_path", help="Path to image file")
parser.add_argument("prompt", nargs="?", default="", help="Custom analysis prompt")
parser.add_argument(
"--max-tokens", type=int, default=1024, help="Max tokens in response"
)
parser.add_argument(
"--temperature", type=float, default=0.7, help="Response creativity (0-2)"
)
parser.add_argument("--model", type=str, default=None, help="Vision model to use")
args = parser.parse_args()
prompt = (
args.prompt
if args.prompt
else "Analyze this image in detail. Describe what you see, including objects, people, text, colors, composition, and any relevant context."
)
analyze_image(
image_path=args.image_path,
prompt=prompt,
max_tokens=args.max_tokens,
temperature=args.temperature,
model=args.model,
)
if __name__ == "__main__":
main()

View File

@@ -1 +0,0 @@
requests>=2.28.0

View File

@@ -1,63 +0,0 @@
---
name: image-edit
description: Edit images using AI with text prompts and input images. Use this skill when the user wants to modify or transform an existing image with AI editing.
---
# Image Edit
Edit images with AI by combining source images with text prompts via `python3 scripts/image_edit.py edit <prompt> <image_path> [options]`.
## Commands
| Command | Args | Description |
|---------|------|-------------|
| `edit` | `<prompt> <image_path> [--width W] [--height H] [--steps N] [--cfg-scale N]` | Edit image with prompt |
## Options
| Option | Default | Range | Description |
|--------|---------|-------|-------------|
| `--width` | 1024 | 128-2048 | Output image width in pixels |
| `--height` | 1024 | 128-2048 | Output image height in pixels |
| `--steps` | 40 | 5-100 | Number of inference steps |
| `--seed` | null | 0-4294967295 | Random seed (null = random) |
| `--cfg-scale` | 4 | 0-10 | True CFG scale for guidance |
| `--negative-prompt` | "" | - | Negative prompt to avoid |
## Examples
```bash
# Basic edit
python3 scripts/image_edit.py edit "make it look like oil painting" photo.jpg
# Style transfer
python3 scripts/image_edit.py edit "convert to anime style" portrait.png
# Object modification
python3 scripts/image_edit.py edit "change the car color to red" street.jpg --steps 50
# With negative prompt
python3 scripts/image_edit.py edit "add a sunset background" landscape.png --negative-prompt "water, ocean"
```
## Workflow
1. Provide a `prompt` describing the desired edit
2. Provide an `image_path` to the source image (PNG, JPG, etc.)
3. Script converts image to base64 and sends to API
4. Saves edited image as `edited_[timestamp].jpg`
5. Returns image path: `edited_1234567890.jpg [12345]`
## Output Format
- Success: `Image saved: filename.jpg [id]`
- Error: `Error: message` (to stderr)
- Images saved to current working directory as JPEG files
## Notes
- Requires `CHUTES_API_TOKEN` in environment
- Supports up to 3 input images (currently uses first image)
- Input file must be a valid image format (PNG, JPG, etc.)
- Output is always JPEG format to save memory
- Images are saved locally, not returned as base64 to save memory

View File

@@ -1,7 +0,0 @@
# Chutes AI API Token
# Get your token from your Chutes AI account
#
# WARNING: Never commit this file with actual credentials!
# Keep your .env file private and add it to .gitignore
CHUTES_API_TOKEN=your_chutes_api_token_here

View File

@@ -1,165 +0,0 @@
#!/usr/bin/env python3
import os
import sys
import argparse
import time
import base64
from pathlib import Path
import requests
def load_env():
env_path = Path(__file__).parent / ".env"
if env_path.exists():
for line in env_path.read_text().splitlines():
line = line.strip()
if line and not line.startswith("#") and "=" in line:
k, v = line.split("=", 1)
os.environ.setdefault(k.strip(), v.strip().strip("\"'"))
load_env()
API_TOKEN = os.environ.get("CHUTES_API_TOKEN")
API_URL = "https://chutes-qwen-image-edit-2511.chutes.ai/generate"
def image_to_base64(image_path):
if not os.path.exists(image_path):
raise FileNotFoundError(f"Image file not found: {image_path}")
with open(image_path, "rb") as f:
image_bytes = f.read()
return base64.b64encode(image_bytes).decode("utf-8")
def edit_image(
prompt,
image_path,
width=1024,
height=1024,
steps=40,
seed=None,
cfg_scale=4,
negative_prompt="",
):
if not API_TOKEN:
print("Error: CHUTES_API_TOKEN not set in environment", file=sys.stderr)
sys.exit(1)
if not os.path.exists(image_path):
print(f"Error: Image file not found: {image_path}", file=sys.stderr)
sys.exit(1)
if not prompt:
print("Error: Prompt cannot be empty", file=sys.stderr)
sys.exit(1)
image_b64 = image_to_base64(image_path)
payload = {
"seed": seed,
"width": width,
"height": height,
"prompt": prompt,
"image_b64s": [image_b64],
"true_cfg_scale": cfg_scale,
"negative_prompt": negative_prompt,
"num_inference_steps": steps,
}
try:
headers = {
"Authorization": f"Bearer {API_TOKEN}",
"Content-Type": "application/json",
}
response = requests.post(API_URL, headers=headers, json=payload, timeout=300)
response.raise_for_status()
content_type = response.headers.get("Content-Type", "")
if "image/" in content_type:
image_bytes = response.content
else:
result = response.json()
if isinstance(result, list) and len(result) > 0:
item = result[0]
image_data = item.get("data", "")
if image_data.startswith("data:image"):
image_bytes = base64.b64decode(image_data.split(",", 1)[1])
else:
image_bytes = base64.b64decode(image_data)
else:
print("Error: Invalid response format", file=sys.stderr)
sys.exit(1)
timestamp = int(time.time())
filename = f"edited_{timestamp}.jpg"
with open(filename, "wb") as f:
f.write(image_bytes)
print(f"Image saved: {filename} [{timestamp}]")
except requests.exceptions.RequestException as e:
print(f"Error: API request failed - {e}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
def main():
parser = argparse.ArgumentParser(description="Edit images with AI")
parser.add_argument("prompt", help="Text prompt describing the edit")
parser.add_argument("image_path", help="Path to input image file")
parser.add_argument(
"--width", type=int, default=1024, help="Output width (128-2048)"
)
parser.add_argument(
"--height", type=int, default=1024, help="Output height (128-2048)"
)
parser.add_argument("--steps", type=int, default=40, help="Inference steps (5-100)")
parser.add_argument("--seed", type=int, default=None, help="Random seed")
parser.add_argument(
"--cfg-scale", type=float, default=4, help="True CFG scale (0-10)"
)
parser.add_argument(
"--negative-prompt", type=str, default="", help="Negative prompt"
)
args = parser.parse_args()
if not (128 <= args.width <= 2048):
print("Error: width must be between 128 and 2048", file=sys.stderr)
sys.exit(1)
if not (128 <= args.height <= 2048):
print("Error: height must be between 128 and 2048", file=sys.stderr)
sys.exit(1)
if not (5 <= args.steps <= 100):
print("Error: steps must be between 5 and 100", file=sys.stderr)
sys.exit(1)
if args.seed is not None and not (0 <= args.seed <= 4294967295):
print("Error: seed must be between 0 and 4294967295", file=sys.stderr)
sys.exit(1)
if not (0 <= args.cfg_scale <= 10):
print("Error: cfg-scale must be between 0 and 10", file=sys.stderr)
sys.exit(1)
edit_image(
prompt=args.prompt,
image_path=args.image_path,
width=args.width,
height=args.height,
steps=args.steps,
seed=args.seed,
cfg_scale=args.cfg_scale,
negative_prompt=args.negative_prompt,
)
if __name__ == "__main__":
main()

View File

@@ -1 +0,0 @@
requests>=2.28.0

View File

@@ -1,61 +0,0 @@
---
name: image-generation
description: Generate images from text prompts using Chutes AI image generation. Use this skill when the user wants to create AI-generated images from descriptions.
---
# Image Generation
Generate AI images from text prompts via `python3 scripts/image_gen.py generate <prompt> [options]`.
## Commands
| Command | Args | Description |
|---------|------|-------------|
| `generate` | `<prompt> [--width W] [--height H] [--steps N] [--seed N]` | Generate image from prompt |
## Options
| Option | Default | Range | Description |
|--------|---------|-------|-------------|
| `--width` | 1024 | 576-2048 | Image width in pixels |
| `--height` | 1024 | 576-2048 | Image height in pixels |
| `--steps` | 9 | 1-100 | Number of inference steps |
| `--seed` | null | 0-4294967295 | Random seed (null = random) |
| `--guidance-scale` | 0 | 0-5 | Guidance scale for generation |
| `--shift` | 3 | 1-10 | Shift parameter |
| `--max-seq-len` | 512 | 256-2048 | Max sequence length |
## Examples
```bash
# Basic generation
python3 scripts/image_gen.py generate "a high quality photo of a sunrise over the mountains"
# Custom dimensions
python3 scripts/image_gen.py generate "a futuristic city at night" --width 1280 --height 720
# With seed for reproducibility
python3 scripts/image_gen.py generate "a cute cat sitting on a windowsill" --seed 42
# High quality with more steps
python3 scripts/image_gen.py generate "a detailed portrait of a woman in renaissance style" --steps 20
```
## Workflow
1. Run `generate` with your prompt
2. Script saves image as `generated_[timestamp].png`
3. Returns image path: `generated_1234567890.png [12345]`
## Output Format
- Success: `Image saved: filename.png [id]`
- Error: `Error: message` (to stderr)
- Images saved to current working directory as PNG files
## Notes
- Requires `CHUTES_API_TOKEN` in environment
- Prompt length: 3-1200 characters
- Large images (2048x2048) take longer to generate
- Images are saved locally, not returned as base64 to save memory

View File

@@ -1,7 +0,0 @@
# Chutes AI API Token
# Get your token from your Chutes AI account
#
# WARNING: Never commit this file with actual credentials!
# Keep your .env file private and add it to .gitignore
CHUTES_API_TOKEN=your_chutes_api_token_here

View File

@@ -1,160 +0,0 @@
#!/usr/bin/env python3
import os
import sys
import argparse
import time
from pathlib import Path
import requests
import base64
def load_env():
env_path = Path(__file__).parent / ".env"
if env_path.exists():
for line in env_path.read_text().splitlines():
line = line.strip()
if line and not line.startswith("#") and "=" in line:
k, v = line.split("=", 1)
os.environ.setdefault(k.strip(), v.strip().strip("\"'"))
load_env()
API_TOKEN = os.environ.get("CHUTES_API_TOKEN")
API_URL = "https://chutes-z-image-turbo.chutes.ai/generate"
def generate_image(
prompt,
width=1024,
height=1024,
steps=9,
seed=None,
guidance_scale=0,
shift=3,
max_seq_len=512,
):
if not API_TOKEN:
print("Error: CHUTES_API_TOKEN not set in environment", file=sys.stderr)
sys.exit(1)
if not prompt or len(prompt) < 3:
print("Error: Prompt must be at least 3 characters", file=sys.stderr)
sys.exit(1)
if len(prompt) > 1200:
print(
"Error: Prompt exceeds maximum length of 1200 characters", file=sys.stderr
)
sys.exit(1)
payload = {
"prompt": prompt,
"width": width,
"height": height,
"num_inference_steps": steps,
"guidance_scale": guidance_scale,
"shift": shift,
"max_sequence_length": max_seq_len,
"seed": seed,
}
try:
headers = {
"Authorization": f"Bearer {API_TOKEN}",
"Content-Type": "application/json",
}
response = requests.post(API_URL, headers=headers, json=payload, timeout=300)
response.raise_for_status()
content_type = response.headers.get("Content-Type", "")
if "image/" in content_type:
image_bytes = response.content
else:
result = response.json()
if isinstance(result, list) and len(result) > 0:
item = result[0]
image_data = item.get("data", "")
if image_data.startswith("data:image"):
image_bytes = base64.b64decode(image_data.split(",", 1)[1])
else:
image_bytes = base64.b64decode(image_data)
else:
print("Error: Invalid response format", file=sys.stderr)
sys.exit(1)
timestamp = int(time.time())
filename = f"generated_{timestamp}.png"
with open(filename, "wb") as f:
f.write(image_bytes)
print(f"Image saved: {filename} [{timestamp}]")
except requests.exceptions.RequestException as e:
print(f"Error: API request failed - {e}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
def main():
parser = argparse.ArgumentParser(description="Generate images from text prompts")
parser.add_argument("prompt", help="Text prompt for image generation")
parser.add_argument(
"--width", type=int, default=1024, help="Image width (576-2048)"
)
parser.add_argument(
"--height", type=int, default=1024, help="Image height (576-2048)"
)
parser.add_argument("--steps", type=int, default=9, help="Inference steps (1-100)")
parser.add_argument("--seed", type=int, default=None, help="Random seed")
parser.add_argument(
"--guidance-scale", type=float, default=0, help="Guidance scale (0-5)"
)
parser.add_argument("--shift", type=float, default=3, help="Shift parameter (1-10)")
parser.add_argument(
"--max-seq-len", type=int, default=512, help="Max sequence length (256-2048)"
)
args = parser.parse_args()
if not (576 <= args.width <= 2048):
print("Error: width must be between 576 and 2048", file=sys.stderr)
sys.exit(1)
if not (576 <= args.height <= 2048):
print("Error: height must be between 576 and 2048", file=sys.stderr)
sys.exit(1)
if not (1 <= args.steps <= 100):
print("Error: steps must be between 1 and 100", file=sys.stderr)
sys.exit(1)
if args.seed is not None and not (0 <= args.seed <= 4294967295):
print("Error: seed must be between 0 and 4294967295", file=sys.stderr)
sys.exit(1)
if not (0 <= args.guidance_scale <= 5):
print("Error: guidance-scale must be between 0 and 5", file=sys.stderr)
sys.exit(1)
if not (1 <= args.shift <= 10):
print("Error: shift must be between 1 and 10", file=sys.stderr)
sys.exit(1)
if not (256 <= args.max_seq_len <= 2048):
print("Error: max-seq-len must be between 256 and 2048", file=sys.stderr)
sys.exit(1)
generate_image(
prompt=args.prompt,
width=args.width,
height=args.height,
steps=args.steps,
seed=args.seed,
guidance_scale=args.guidance_scale,
shift=args.shift,
max_seq_len=args.max_seq_len,
)
if __name__ == "__main__":
main()

View File

@@ -1 +0,0 @@
requests>=2.28.0