feat: Import 35+ skills, merge duplicates, add openclaw installer
Major updates: - Added 35+ new skills from awesome-opencode-skills and antigravity repos - Merged SEO skills into seo-master - Merged architecture skills into architecture - Merged security skills into security-auditor and security-coder - Merged testing skills into testing-master and testing-patterns - Merged pentesting skills into pentesting - Renamed website-creator to thai-frontend-dev - Replaced skill-creator with github version - Removed Chutes references (use MiniMax API instead) - Added install-openclaw-skills.sh for cross-platform installation - Updated .env.example with MiniMax API credentials
This commit is contained in:
156
skills/minimax-multimodal-toolkit/scripts/check_environment.sh
Executable file
156
skills/minimax-multimodal-toolkit/scripts/check_environment.sh
Executable file
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env bash
|
||||
# MiniMax Multi-Modal Toolkit — Environment Check
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/check_environment.sh
|
||||
# bash scripts/check_environment.sh --test-api
|
||||
set -euo pipefail
|
||||
|
||||
PASSED=0
|
||||
FAILED=0
|
||||
TOTAL=0
|
||||
|
||||
check() {
|
||||
TOTAL=$((TOTAL + 1))
|
||||
if "$@"; then
|
||||
PASSED=$((PASSED + 1))
|
||||
else
|
||||
FAILED=$((FAILED + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
check_curl() {
|
||||
if command -v curl &>/dev/null; then
|
||||
echo "[OK] curl installed"
|
||||
return 0
|
||||
fi
|
||||
echo "[FAIL] curl not installed"
|
||||
return 1
|
||||
}
|
||||
|
||||
check_ffmpeg() {
|
||||
if command -v ffmpeg &>/dev/null; then
|
||||
echo "[OK] FFmpeg installed"
|
||||
return 0
|
||||
fi
|
||||
echo "[FAIL] FFmpeg not installed"
|
||||
return 1
|
||||
}
|
||||
|
||||
check_ffprobe() {
|
||||
if command -v ffprobe &>/dev/null; then
|
||||
echo "[OK] ffprobe installed"
|
||||
return 0
|
||||
fi
|
||||
echo "[FAIL] ffprobe not installed"
|
||||
return 1
|
||||
}
|
||||
|
||||
check_jq() {
|
||||
if command -v jq &>/dev/null; then
|
||||
echo "[OK] jq installed"
|
||||
return 0
|
||||
fi
|
||||
echo "[FAIL] jq not installed (brew install jq / apt install jq)"
|
||||
return 1
|
||||
}
|
||||
|
||||
check_xxd() {
|
||||
if command -v xxd &>/dev/null; then
|
||||
echo "[OK] xxd installed"
|
||||
return 0
|
||||
fi
|
||||
echo "[FAIL] xxd not installed"
|
||||
return 1
|
||||
}
|
||||
|
||||
check_api_host() {
|
||||
local api_host="${MINIMAX_API_HOST:-}"
|
||||
if [[ -z "$api_host" ]]; then
|
||||
echo "[FAIL] MINIMAX_API_HOST not set"
|
||||
echo " China Mainland: export MINIMAX_API_HOST='https://api.minimaxi.com'"
|
||||
echo " Global: export MINIMAX_API_HOST='https://api.minimax.io'"
|
||||
return 1
|
||||
fi
|
||||
if [[ "$api_host" != "https://api.minimaxi.com" && "$api_host" != "https://api.minimax.io" ]]; then
|
||||
echo "[WARN] MINIMAX_API_HOST has non-standard value: $api_host"
|
||||
echo " Expected: https://api.minimaxi.com (China) or https://api.minimax.io (Global)"
|
||||
return 0
|
||||
fi
|
||||
echo "[OK] MINIMAX_API_HOST set ($api_host)"
|
||||
return 0
|
||||
}
|
||||
|
||||
check_api_key() {
|
||||
local api_key="${MINIMAX_API_KEY:-}"
|
||||
if [[ -z "$api_key" ]]; then
|
||||
echo "[FAIL] MINIMAX_API_KEY not set"
|
||||
echo " export MINIMAX_API_KEY='your-key'"
|
||||
return 1
|
||||
fi
|
||||
if [[ "$api_key" != sk-api* && "$api_key" != sk-cp* ]]; then
|
||||
echo "[FAIL] Invalid API key format"
|
||||
echo " Expected: sk-api-xxx... or sk-cp-xxx..."
|
||||
echo " Got: ${api_key:0:20}..."
|
||||
return 1
|
||||
fi
|
||||
echo "[OK] MINIMAX_API_KEY set (${#api_key} chars)"
|
||||
return 0
|
||||
}
|
||||
|
||||
check_api_connectivity() {
|
||||
local api_host="${MINIMAX_API_HOST:-}"
|
||||
local api_key="${MINIMAX_API_KEY:-}"
|
||||
if [[ -z "$api_key" ]]; then
|
||||
echo "[FAIL] API connectivity skipped (MINIMAX_API_KEY not set)"
|
||||
return 1
|
||||
fi
|
||||
if [[ -z "$api_host" ]]; then
|
||||
echo "[FAIL] API connectivity skipped (MINIMAX_API_HOST not set)"
|
||||
return 1
|
||||
fi
|
||||
local http_code
|
||||
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: Bearer $api_key" \
|
||||
--max-time 10 \
|
||||
"$api_host" 2>/dev/null) || true
|
||||
if [[ -n "$http_code" && "$http_code" -lt 500 ]] 2>/dev/null; then
|
||||
echo "[OK] API host reachable (HTTP $http_code)"
|
||||
return 0
|
||||
fi
|
||||
echo "[FAIL] API host unreachable ($api_host)"
|
||||
return 1
|
||||
}
|
||||
|
||||
# --- Main ---
|
||||
TEST_API=false
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--test-api) TEST_API=true ;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "MiniMax Multi-Modal Toolkit — Environment Check"
|
||||
echo "========================================"
|
||||
|
||||
check check_curl
|
||||
check check_ffmpeg
|
||||
check check_ffprobe
|
||||
check check_jq
|
||||
check check_xxd
|
||||
check check_api_host
|
||||
check check_api_key
|
||||
|
||||
if $TEST_API; then
|
||||
check check_api_connectivity
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "========================================"
|
||||
if [[ $FAILED -eq 0 ]]; then
|
||||
echo "All $TOTAL checks passed!"
|
||||
exit 0
|
||||
else
|
||||
echo "$FAILED check(s) failed out of $TOTAL"
|
||||
exit 1
|
||||
fi
|
||||
277
skills/minimax-multimodal-toolkit/scripts/image/generate_image.sh
Executable file
277
skills/minimax-multimodal-toolkit/scripts/image/generate_image.sh
Executable file
@@ -0,0 +1,277 @@
|
||||
#!/usr/bin/env bash
|
||||
# MiniMax Image Generation CLI (pure bash)
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/image/generate_image.sh --prompt "A cat on a rooftop at sunset" -o minimax-output/cat.png
|
||||
# bash scripts/image/generate_image.sh --mode i2i --prompt "A girl reading in a library" --ref-image face.jpg -o minimax-output/girl.png
|
||||
# bash scripts/image/generate_image.sh --prompt "Mountain landscape" --aspect-ratio 16:9 -n 3 -o minimax-output/landscape.png
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# ============================================================================
|
||||
# Common functions
|
||||
# ============================================================================
|
||||
|
||||
load_env() {
|
||||
local env_file
|
||||
for env_file in "$PROJECT_ROOT/.env" "$(pwd)/.env"; do
|
||||
if [[ -f "$env_file" ]]; then
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
line="${line%%#*}"; line="$(echo "$line" | xargs)"
|
||||
[[ -z "$line" || "$line" != *=* ]] && continue
|
||||
local key="${line%%=*}" val="${line#*=}"
|
||||
key="$(echo "$key" | xargs)"; val="$(echo "$val" | xargs)"
|
||||
if [[ ${#val} -ge 2 ]]; then
|
||||
case "$val" in \"*\") val="${val:1:${#val}-2}" ;; \'*\') val="${val:1:${#val}-2}" ;; esac
|
||||
fi
|
||||
[[ -z "${!key:-}" ]] && export "$key=$val"
|
||||
done < "$env_file"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
check_api_key() {
|
||||
if [[ -z "${MINIMAX_API_KEY:-}" ]]; then
|
||||
echo "Error: MINIMAX_API_KEY environment variable is not set." >&2; exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
image_to_data_url() {
|
||||
local path="$1"
|
||||
[[ -f "$path" ]] || { echo "Error: Image not found: $path" >&2; exit 1; }
|
||||
local mime
|
||||
mime="$(file -b --mime-type "$path" 2>/dev/null)" || mime="image/jpeg"
|
||||
local b64
|
||||
b64="$(base64 < "$path")"
|
||||
echo "data:${mime};base64,${b64}"
|
||||
}
|
||||
|
||||
resolve_image() {
|
||||
local input="$1"
|
||||
[[ -z "$input" ]] && return
|
||||
case "$input" in
|
||||
http://*|https://*|data:*) echo "$input" ;;
|
||||
*) image_to_data_url "$input" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Main
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
load_env
|
||||
check_api_key
|
||||
|
||||
local mode="t2i" prompt="" model="image-01"
|
||||
local aspect_ratio="" width="" height=""
|
||||
local response_format="url" n=1 seed=""
|
||||
local prompt_optimizer=false aigc_watermark=false
|
||||
local ref_image=""
|
||||
local output="" download=true
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--mode) mode="$2"; shift 2 ;;
|
||||
--prompt) prompt="$2"; shift 2 ;;
|
||||
--aspect-ratio|--ratio) aspect_ratio="$2"; shift 2 ;;
|
||||
--width) width="$2"; shift 2 ;;
|
||||
--height) height="$2"; shift 2 ;;
|
||||
--response-format) response_format="$2"; shift 2 ;;
|
||||
-n|--count) n="$2"; shift 2 ;;
|
||||
--seed) seed="$2"; shift 2 ;;
|
||||
--prompt-optimizer) prompt_optimizer=true; shift ;;
|
||||
--aigc-watermark) aigc_watermark=true; shift ;;
|
||||
--ref-image) ref_image="$2"; shift 2 ;;
|
||||
--no-download) download=false; shift ;;
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
-h|--help)
|
||||
cat <<'USAGE'
|
||||
MiniMax Image Generation CLI (model: image-01)
|
||||
|
||||
Usage:
|
||||
generate_image.sh [--mode MODE] [options] -o OUTPUT
|
||||
|
||||
Modes:
|
||||
t2i Text-to-image (default) — generate image from text prompt
|
||||
i2i Image-to-image — generate image using a character reference photo
|
||||
|
||||
Options:
|
||||
--mode MODE Generation mode: t2i (default), i2i
|
||||
--prompt TEXT Text description of the image (max 1500 chars, required)
|
||||
--aspect-ratio RATIO Aspect ratio: 1:1, 16:9, 4:3, 3:2, 2:3, 3:4, 9:16, 21:9
|
||||
--width PX Custom width in pixels (512-2048, multiple of 8)
|
||||
--height PX Custom height in pixels (512-2048, multiple of 8)
|
||||
-n, --count N Number of images to generate (1-9, default: 1)
|
||||
--seed N Random seed for reproducibility
|
||||
--prompt-optimizer Enable automatic prompt optimization
|
||||
--aigc-watermark Add AIGC watermark to generated images
|
||||
--ref-image FILE Character reference image (local file or URL, i2i mode)
|
||||
--response-format FMT Response format: url (default), base64
|
||||
--no-download Don't download, just print URL(s)
|
||||
-o, --output FILE Output file path (required)
|
||||
|
||||
Examples:
|
||||
# Text-to-image (default)
|
||||
generate_image.sh --prompt "A cat on a rooftop at sunset, cinematic" -o cat.png
|
||||
|
||||
# Custom aspect ratio
|
||||
generate_image.sh --prompt "Mountain landscape" --aspect-ratio 16:9 -o landscape.png
|
||||
|
||||
# Multiple images
|
||||
generate_image.sh --prompt "Abstract art" -n 3 -o art.png
|
||||
|
||||
# Image-to-image with character reference
|
||||
generate_image.sh --mode i2i --prompt "A girl reading in a library" --ref-image face.jpg -o girl.png
|
||||
USAGE
|
||||
exit 0
|
||||
;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$prompt" ]]; then
|
||||
echo "Error: --prompt is required" >&2; exit 1
|
||||
fi
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: --output / -o is required" >&2; exit 1
|
||||
fi
|
||||
|
||||
# Validate n range
|
||||
if [[ "$n" -lt 1 || "$n" -gt 9 ]] 2>/dev/null; then
|
||||
echo "Error: -n must be between 1 and 9" >&2; exit 1
|
||||
fi
|
||||
|
||||
# Build payload
|
||||
local payload
|
||||
payload=$(jq -n \
|
||||
--arg model "$model" \
|
||||
--arg prompt "$prompt" \
|
||||
--arg rf "$response_format" \
|
||||
--argjson n "$n" \
|
||||
--argjson po "$prompt_optimizer" \
|
||||
--argjson aw "$aigc_watermark" \
|
||||
'{model: $model, prompt: $prompt, response_format: $rf, n: $n, prompt_optimizer: $po, aigc_watermark: $aw}')
|
||||
|
||||
[[ -n "$aspect_ratio" ]] && payload=$(echo "$payload" | jq --arg ar "$aspect_ratio" '. + {aspect_ratio: $ar}')
|
||||
[[ -n "$width" ]] && payload=$(echo "$payload" | jq --argjson w "$width" '. + {width: $w}')
|
||||
[[ -n "$height" ]] && payload=$(echo "$payload" | jq --argjson h "$height" '. + {height: $h}')
|
||||
[[ -n "$seed" ]] && payload=$(echo "$payload" | jq --argjson s "$seed" '. + {seed: $s}')
|
||||
|
||||
# Subject reference (i2i mode)
|
||||
if [[ "$mode" == "i2i" ]]; then
|
||||
if [[ -z "$ref_image" ]]; then
|
||||
echo "Error: --ref-image is required for i2i mode" >&2; exit 1
|
||||
fi
|
||||
local img_url
|
||||
img_url="$(resolve_image "$ref_image")"
|
||||
payload=$(echo "$payload" | jq --arg img "$img_url" '. + {subject_reference: [{type: "character", image_file: $img}]}')
|
||||
fi
|
||||
|
||||
local api_host="${MINIMAX_API_HOST:-https://api.minimaxi.com}"
|
||||
local api_url="${api_host}/v1/image_generation"
|
||||
|
||||
echo "Mode: $mode"
|
||||
echo "Model: $model"
|
||||
echo "Generating $n image(s)..."
|
||||
|
||||
local raw_output http_code response
|
||||
raw_output="$(curl -s -w "\n%{http_code}" \
|
||||
-X POST "$api_url" \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--max-time 120 \
|
||||
-d "$payload" 2>/dev/null)" || {
|
||||
echo "Error: curl request failed" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
http_code="${raw_output##*$'\n'}"
|
||||
response="${raw_output%$'\n'*}"
|
||||
|
||||
if [[ "$http_code" -ge 400 ]] 2>/dev/null; then
|
||||
echo "Error: API returned HTTP $http_code" >&2
|
||||
echo "$response" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local status_code
|
||||
status_code="$(echo "$response" | jq -r '.base_resp.status_code // 0')" 2>/dev/null || true
|
||||
if [[ "$status_code" != "0" && -n "$status_code" ]]; then
|
||||
local status_msg
|
||||
status_msg="$(echo "$response" | jq -r '.base_resp.status_msg // "Unknown error"')"
|
||||
echo "Error: API error (code $status_code): $status_msg" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local success_count failed_count
|
||||
success_count="$(echo "$response" | jq -r '.metadata.success_count // 0')" 2>/dev/null || true
|
||||
failed_count="$(echo "$response" | jq -r '.metadata.failed_count // 0')" 2>/dev/null || true
|
||||
echo "Success: $success_count, Failed: $failed_count"
|
||||
|
||||
mkdir -p "$(dirname "$output")"
|
||||
|
||||
if [[ "$response_format" == "base64" ]]; then
|
||||
local count
|
||||
count="$(echo "$response" | jq '.data.image_base64 | length')" 2>/dev/null || count=0
|
||||
if [[ "$count" -eq 0 ]]; then
|
||||
echo "Error: No image data in response" >&2; exit 1
|
||||
fi
|
||||
|
||||
if [[ "$count" -eq 1 ]]; then
|
||||
echo "$response" | jq -r '.data.image_base64[0]' | base64 -d > "$output"
|
||||
echo "Image saved to: $output"
|
||||
else
|
||||
local ext="${output##*.}"
|
||||
local base="${output%.*}"
|
||||
for ((i=0; i<count; i++)); do
|
||||
local out_file="${base}_$((i+1)).${ext}"
|
||||
echo "$response" | jq -r ".data.image_base64[$i]" | base64 -d > "$out_file"
|
||||
echo "Image saved to: $out_file"
|
||||
done
|
||||
fi
|
||||
|
||||
elif [[ "$response_format" == "url" ]]; then
|
||||
local count
|
||||
count="$(echo "$response" | jq '.data.image_urls | length')" 2>/dev/null || count=0
|
||||
if [[ "$count" -eq 0 ]]; then
|
||||
echo "Error: No image URLs in response" >&2
|
||||
echo "$response" | jq . >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if $download; then
|
||||
if [[ "$count" -eq 1 ]]; then
|
||||
local img_url
|
||||
img_url="$(echo "$response" | jq -r '.data.image_urls[0]')"
|
||||
echo "URL: $img_url"
|
||||
curl -s -o "$output" --max-time 120 "$img_url"
|
||||
echo "Image downloaded to: $output"
|
||||
else
|
||||
local ext="${output##*.}"
|
||||
local base="${output%.*}"
|
||||
for ((i=0; i<count; i++)); do
|
||||
local img_url out_file
|
||||
img_url="$(echo "$response" | jq -r ".data.image_urls[$i]")"
|
||||
out_file="${base}_$((i+1)).${ext}"
|
||||
echo "URL $((i+1)): $img_url"
|
||||
curl -s -o "$out_file" --max-time 120 "$img_url"
|
||||
echo "Image downloaded to: $out_file"
|
||||
done
|
||||
fi
|
||||
else
|
||||
for ((i=0; i<count; i++)); do
|
||||
local img_url
|
||||
img_url="$(echo "$response" | jq -r ".data.image_urls[$i]")"
|
||||
echo "Image URL $((i+1)): $img_url"
|
||||
done
|
||||
echo "Use without --no-download to save files automatically."
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Done!"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
543
skills/minimax-multimodal-toolkit/scripts/media_tools.sh
Executable file
543
skills/minimax-multimodal-toolkit/scripts/media_tools.sh
Executable file
@@ -0,0 +1,543 @@
|
||||
#!/usr/bin/env bash
|
||||
# MiniMax Multi-Modal Toolkit Media Tools CLI (pure bash)
|
||||
#
|
||||
# FFmpeg-based utilities for audio/video format conversion, concatenation,
|
||||
# extraction, and trimming.
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/media_tools.sh convert-video input.webm -o output.mp4
|
||||
# bash scripts/media_tools.sh convert-audio input.wav -o output.mp3
|
||||
# bash scripts/media_tools.sh concat-video seg1.mp4 seg2.mp4 -o merged.mp4
|
||||
# bash scripts/media_tools.sh concat-audio part1.mp3 part2.mp3 -o combined.mp3
|
||||
# bash scripts/media_tools.sh extract-audio input.mp4 -o audio.mp3
|
||||
# bash scripts/media_tools.sh trim-video input.mp4 --start 5 --end 15 -o clip.mp4
|
||||
# bash scripts/media_tools.sh add-audio --video video.mp4 --audio bgm.mp3 -o output.mp4
|
||||
# bash scripts/media_tools.sh probe input.mp4
|
||||
set -euo pipefail
|
||||
|
||||
# ============================================================================
|
||||
# Probe / info helpers
|
||||
# ============================================================================
|
||||
|
||||
probe_media() {
|
||||
ffprobe -v error -show_format -show_streams -of json "$1" 2>/dev/null
|
||||
}
|
||||
|
||||
get_duration() {
|
||||
probe_media "$1" | jq -r '.format.duration // "0"'
|
||||
}
|
||||
|
||||
get_video_fps() {
|
||||
local fps_str
|
||||
fps_str="$(ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate -of csv=p=0 "$1" 2>/dev/null)" || { echo 25; return; }
|
||||
local num="${fps_str%/*}" den="${fps_str#*/}"
|
||||
echo $(( (num + den/2) / den )) 2>/dev/null || echo 25
|
||||
}
|
||||
|
||||
has_audio_stream() {
|
||||
local out
|
||||
out="$(ffprobe -v error -select_streams a -show_entries stream=codec_type -of csv=p=0 "$1" 2>/dev/null)"
|
||||
[[ "$out" == *audio* ]]
|
||||
}
|
||||
|
||||
has_video_stream() {
|
||||
local out
|
||||
out="$(ffprobe -v error -select_streams v -show_entries stream=codec_type -of csv=p=0 "$1" 2>/dev/null)"
|
||||
[[ "$out" == *video* ]]
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Video codec maps
|
||||
# ============================================================================
|
||||
|
||||
video_codec_for() {
|
||||
case "$1" in
|
||||
mp4|mov|mkv|avi|ts|flv) echo "libx264" ;;
|
||||
webm) echo "libvpx-vp9" ;;
|
||||
*) echo "libx264" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
audio_codec_for_container() {
|
||||
case "$1" in
|
||||
mp4|mov|mkv|ts|flv) echo "aac" ;;
|
||||
webm) echo "libopus" ;;
|
||||
avi) echo "mp3" ;;
|
||||
*) echo "aac" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
audio_codec_for_format() {
|
||||
case "$1" in
|
||||
mp3) echo "libmp3lame" ;;
|
||||
wav) echo "pcm_s16le" ;;
|
||||
flac) echo "flac" ;;
|
||||
ogg) echo "libvorbis" ;;
|
||||
aac|m4a) echo "aac" ;;
|
||||
opus) echo "libopus" ;;
|
||||
wma) echo "wmav2" ;;
|
||||
*) echo "libmp3lame" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
get_ext() {
|
||||
local name="$1"
|
||||
echo "${name##*.}" | tr '[:upper:]' '[:lower:]'
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: convert-video
|
||||
# ============================================================================
|
||||
cmd_convert_video() {
|
||||
local input="" output="" crf=18 preset="medium" resolution="" fps=""
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then input="$1"; shift; fi
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--crf) crf="$2"; shift 2 ;;
|
||||
--preset) preset="$2"; shift 2 ;;
|
||||
--resolution) resolution="$2"; shift 2 ;;
|
||||
--fps) fps="$2"; shift 2 ;;
|
||||
*) [[ -z "$input" ]] && input="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ -z "$input" || ! -f "$input" ]] && { echo "Error: Input file not found: ${input:-<none>}" >&2; exit 1; }
|
||||
[[ -z "$output" ]] && { echo "Error: -o/--output required" >&2; exit 1; }
|
||||
|
||||
local ext; ext="$(get_ext "$output")"
|
||||
local v_codec; v_codec="$(video_codec_for "$ext")"
|
||||
local a_codec; a_codec="$(audio_codec_for_container "$ext")"
|
||||
|
||||
mkdir -p "$(dirname "$output")"
|
||||
|
||||
local cmd=(ffmpeg -y -i "$input")
|
||||
|
||||
# Video filters
|
||||
if [[ -n "$resolution" ]]; then
|
||||
local w="${resolution%%x*}" h="${resolution##*x}"
|
||||
cmd+=(-vf "scale=${w}:${h}")
|
||||
fi
|
||||
|
||||
cmd+=(-c:v "$v_codec")
|
||||
case "$v_codec" in
|
||||
libx264|libx265) cmd+=(-crf "$crf" -preset "$preset" -pix_fmt yuv420p) ;;
|
||||
libvpx-vp9) cmd+=(-crf "$crf" -b:v 0) ;;
|
||||
esac
|
||||
|
||||
[[ -n "$fps" ]] && cmd+=(-r "$fps")
|
||||
|
||||
if has_audio_stream "$input"; then
|
||||
cmd+=(-c:a "$a_codec" -b:a 192k)
|
||||
else
|
||||
cmd+=(-an)
|
||||
fi
|
||||
|
||||
cmd+=("$output")
|
||||
|
||||
echo "Converting: $input -> $output ($v_codec/$a_codec)"
|
||||
"${cmd[@]}" 2>/dev/null
|
||||
echo " Done: $output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: convert-audio
|
||||
# ============================================================================
|
||||
cmd_convert_audio() {
|
||||
local input="" output="" bitrate="192k" sample_rate="" channels=""
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then input="$1"; shift; fi
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--bitrate) bitrate="$2"; shift 2 ;;
|
||||
--sample-rate) sample_rate="$2"; shift 2 ;;
|
||||
--channels) channels="$2"; shift 2 ;;
|
||||
*) [[ -z "$input" ]] && input="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ -z "$input" || ! -f "$input" ]] && { echo "Error: Input file not found: ${input:-<none>}" >&2; exit 1; }
|
||||
[[ -z "$output" ]] && { echo "Error: -o/--output required" >&2; exit 1; }
|
||||
|
||||
local ext; ext="$(get_ext "$output")"
|
||||
local codec; codec="$(audio_codec_for_format "$ext")"
|
||||
|
||||
mkdir -p "$(dirname "$output")"
|
||||
|
||||
local cmd=(ffmpeg -y -i "$input" -c:a "$codec" -b:a "$bitrate")
|
||||
[[ -n "$sample_rate" ]] && cmd+=(-ar "$sample_rate")
|
||||
[[ -n "$channels" ]] && cmd+=(-ac "$channels")
|
||||
cmd+=("$output")
|
||||
|
||||
echo "Converting audio: $input -> $output ($codec)"
|
||||
"${cmd[@]}" 2>/dev/null
|
||||
echo " Done: $output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: concat-video
|
||||
# ============================================================================
|
||||
cmd_concat_video() {
|
||||
local output="" crossfade=0.5
|
||||
local inputs=()
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--crossfade) crossfade="$2"; shift 2 ;;
|
||||
*) inputs+=("$1"); shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ ${#inputs[@]} -lt 2 ]] && { echo "Error: At least 2 input files required" >&2; exit 1; }
|
||||
[[ -z "$output" ]] && { echo "Error: -o/--output required" >&2; exit 1; }
|
||||
|
||||
mkdir -p "$(dirname "$output")"
|
||||
|
||||
if [[ ${#inputs[@]} -eq 1 ]]; then
|
||||
cp "${inputs[0]}" "$output"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local fps; fps="$(get_video_fps "${inputs[0]}")"
|
||||
local has_audio=true
|
||||
for vp in "${inputs[@]}"; do
|
||||
has_audio_stream "$vp" || { has_audio=false; break; }
|
||||
done
|
||||
|
||||
if [[ "$(echo "$crossfade > 0" | bc -l)" == "1" ]]; then
|
||||
local durations=()
|
||||
for vp in "${inputs[@]}"; do durations+=("$(get_duration "$vp")"); done
|
||||
|
||||
local ff_inputs=()
|
||||
for vp in "${inputs[@]}"; do ff_inputs+=(-i "$(cd "$(dirname "$vp")" && pwd)/$(basename "$vp")"); done
|
||||
|
||||
local n=${#inputs[@]}
|
||||
local offsets=() cumulative=0
|
||||
for ((i=0; i<n-1; i++)); do
|
||||
local offset; offset="$(echo "$cumulative + ${durations[$i]} - $crossfade" | bc -l)"
|
||||
offsets+=("$offset"); cumulative="$offset"
|
||||
done
|
||||
|
||||
local vf_parts=() af_parts=()
|
||||
if [[ $n -eq 2 ]]; then
|
||||
vf_parts+=("[0:v][1:v]xfade=transition=fade:duration=${crossfade}:offset=${offsets[0]}[vout]")
|
||||
$has_audio && af_parts+=("[0:a][1:a]acrossfade=d=${crossfade}:c1=tri:c2=tri[aout]")
|
||||
else
|
||||
vf_parts+=("[0:v][1:v]xfade=transition=fade:duration=${crossfade}:offset=${offsets[0]}[xv1]")
|
||||
$has_audio && af_parts+=("[0:a][1:a]acrossfade=d=${crossfade}:c1=tri:c2=tri[xa1]")
|
||||
for ((i=2; i<n; i++)); do
|
||||
local out_v="[xv${i}]" out_a="[xa${i}]"
|
||||
[[ $i -eq $((n-1)) ]] && { out_v="[vout]"; out_a="[aout]"; }
|
||||
vf_parts+=("[xv$((i-1))][${i}:v]xfade=transition=fade:duration=${crossfade}:offset=${offsets[$((i-1))]}${out_v}")
|
||||
$has_audio && af_parts+=("[xa$((i-1))][${i}:a]acrossfade=d=${crossfade}:c1=tri:c2=tri${out_a}")
|
||||
done
|
||||
fi
|
||||
|
||||
local fc
|
||||
fc="$(IFS=';'; echo "${vf_parts[*]}${af_parts[*]:+;${af_parts[*]}}")"
|
||||
|
||||
local cmd=(ffmpeg -y "${ff_inputs[@]}" -filter_complex "$fc" -map "[vout]")
|
||||
$has_audio && cmd+=(-map "[aout]")
|
||||
cmd+=(-c:v libx264 -preset medium -crf 18 -pix_fmt yuv420p -r "$fps")
|
||||
$has_audio && cmd+=(-c:a aac -b:a 192k)
|
||||
cmd+=("$output")
|
||||
|
||||
echo "Concatenating $n videos with ${crossfade}s crossfade..."
|
||||
if "${cmd[@]}" 2>/dev/null; then
|
||||
echo " Done: $output"
|
||||
return 0
|
||||
fi
|
||||
echo " Crossfade failed, falling back to re-encode..."
|
||||
fi
|
||||
|
||||
# Fallback
|
||||
local concat_file; concat_file="$(mktemp /tmp/concat_XXXXXX.txt)"
|
||||
for vp in "${inputs[@]}"; do
|
||||
echo "file '$(cd "$(dirname "$vp")" && pwd)/$(basename "$vp")'" >> "$concat_file"
|
||||
done
|
||||
ffmpeg -y -f concat -safe 0 -i "$concat_file" \
|
||||
-c:v libx264 -preset medium -crf 18 -pix_fmt yuv420p -r "$fps" \
|
||||
-c:a aac -b:a 192k "$output" 2>/dev/null
|
||||
rm -f "$concat_file"
|
||||
echo " Done: $output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: concat-audio
|
||||
# ============================================================================
|
||||
cmd_concat_audio() {
|
||||
local output="" crossfade=0
|
||||
local inputs=()
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--crossfade) crossfade="$2"; shift 2 ;;
|
||||
*) inputs+=("$1"); shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ ${#inputs[@]} -lt 1 ]] && { echo "Error: At least 1 input file required" >&2; exit 1; }
|
||||
[[ -z "$output" ]] && { echo "Error: -o/--output required" >&2; exit 1; }
|
||||
|
||||
mkdir -p "$(dirname "$output")"
|
||||
|
||||
if [[ ${#inputs[@]} -eq 1 ]]; then
|
||||
cp "${inputs[0]}" "$output"
|
||||
echo " Done: $output"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local ext; ext="$(get_ext "$output")"
|
||||
local codec; codec="$(audio_codec_for_format "$ext")"
|
||||
local n=${#inputs[@]}
|
||||
|
||||
if [[ "$(echo "$crossfade > 0" | bc -l)" == "1" ]]; then
|
||||
local ff_inputs=()
|
||||
for ap in "${inputs[@]}"; do ff_inputs+=(-i "$(cd "$(dirname "$ap")" && pwd)/$(basename "$ap")"); done
|
||||
|
||||
local af_parts=()
|
||||
if [[ $n -eq 2 ]]; then
|
||||
af_parts+=("[0:a][1:a]acrossfade=d=${crossfade}:c1=tri:c2=tri[aout]")
|
||||
else
|
||||
af_parts+=("[0:a][1:a]acrossfade=d=${crossfade}:c1=tri:c2=tri[xa1]")
|
||||
for ((i=2; i<n; i++)); do
|
||||
local prev="[xa$((i-1))]" out="[xa${i}]"
|
||||
[[ $i -eq $((n-1)) ]] && out="[aout]"
|
||||
af_parts+=("${prev}[${i}:a]acrossfade=d=${crossfade}:c1=tri:c2=tri${out}")
|
||||
done
|
||||
fi
|
||||
|
||||
local fc; fc="$(IFS=';'; echo "${af_parts[*]}")"
|
||||
|
||||
echo "Concatenating $n audio files with ${crossfade}s crossfade..."
|
||||
if ffmpeg -y "${ff_inputs[@]}" -filter_complex "$fc" -map "[aout]" \
|
||||
-c:a "$codec" -b:a 192k "$output" 2>/dev/null; then
|
||||
echo " Done: $output"
|
||||
return 0
|
||||
fi
|
||||
echo " Crossfade failed, falling back..."
|
||||
fi
|
||||
|
||||
# Fallback: concat demuxer
|
||||
local concat_file; concat_file="$(mktemp /tmp/concat_XXXXXX.txt)"
|
||||
for ap in "${inputs[@]}"; do
|
||||
echo "file '$(cd "$(dirname "$ap")" && pwd)/$(basename "$ap")'" >> "$concat_file"
|
||||
done
|
||||
ffmpeg -y -f concat -safe 0 -i "$concat_file" -c:a "$codec" -b:a 192k "$output" 2>/dev/null
|
||||
rm -f "$concat_file"
|
||||
echo " Done: $output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: extract-audio
|
||||
# ============================================================================
|
||||
cmd_extract_audio() {
|
||||
local input="" output="" bitrate="192k"
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then input="$1"; shift; fi
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--bitrate) bitrate="$2"; shift 2 ;;
|
||||
*) [[ -z "$input" ]] && input="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ -z "$input" || ! -f "$input" ]] && { echo "Error: Input not found: ${input:-<none>}" >&2; exit 1; }
|
||||
[[ -z "$output" ]] && { echo "Error: -o/--output required" >&2; exit 1; }
|
||||
has_audio_stream "$input" || { echo "Error: No audio stream in $input" >&2; exit 1; }
|
||||
|
||||
local ext; ext="$(get_ext "$output")"
|
||||
local codec; codec="$(audio_codec_for_format "$ext")"
|
||||
|
||||
mkdir -p "$(dirname "$output")"
|
||||
|
||||
echo "Extracting audio: $input -> $output"
|
||||
ffmpeg -y -i "$input" -vn -c:a "$codec" -b:a "$bitrate" "$output" 2>/dev/null
|
||||
echo " Done: $output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: trim-video
|
||||
# ============================================================================
|
||||
cmd_trim_video() {
|
||||
local input="" output="" start="" end="" duration=""
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then input="$1"; shift; fi
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--start) start="$2"; shift 2 ;;
|
||||
--end) end="$2"; shift 2 ;;
|
||||
--duration) duration="$2"; shift 2 ;;
|
||||
*) [[ -z "$input" ]] && input="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ -z "$input" || ! -f "$input" ]] && { echo "Error: Input not found: ${input:-<none>}" >&2; exit 1; }
|
||||
[[ -z "$output" ]] && { echo "Error: -o/--output required" >&2; exit 1; }
|
||||
|
||||
mkdir -p "$(dirname "$output")"
|
||||
|
||||
local cmd=(ffmpeg -y)
|
||||
[[ -n "$start" ]] && cmd+=(-ss "$start")
|
||||
cmd+=(-i "$input")
|
||||
|
||||
if [[ -n "$duration" ]]; then
|
||||
cmd+=(-t "$duration")
|
||||
elif [[ -n "$end" ]]; then
|
||||
local actual_start="${start:-0}"
|
||||
local dur; dur="$(echo "$end - $actual_start" | bc -l)"
|
||||
cmd+=(-t "$dur")
|
||||
fi
|
||||
|
||||
cmd+=(-c:v libx264 -preset medium -crf 18 -pix_fmt yuv420p)
|
||||
has_audio_stream "$input" && cmd+=(-c:a aac -b:a 192k)
|
||||
cmd+=("$output")
|
||||
|
||||
local start_str="${start:-0}s"
|
||||
local end_str="${end:+${end}s}"
|
||||
[[ -z "$end_str" && -n "$duration" ]] && end_str="+${duration}s"
|
||||
[[ -z "$end_str" ]] && end_str="end"
|
||||
echo "Trimming: $input [$start_str - $end_str] -> $output"
|
||||
"${cmd[@]}" 2>/dev/null
|
||||
echo " Done: $output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: add-audio
|
||||
# ============================================================================
|
||||
cmd_add_audio() {
|
||||
local video="" audio="" output="" volume=1.0 fade_in=0 fade_out=0 replace=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--video) video="$2"; shift 2 ;;
|
||||
--audio) audio="$2"; shift 2 ;;
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--volume) volume="$2"; shift 2 ;;
|
||||
--fade-in) fade_in="$2"; shift 2 ;;
|
||||
--fade-out) fade_out="$2"; shift 2 ;;
|
||||
--replace) replace=true; shift ;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ -z "$video" || ! -f "$video" ]] && { echo "Error: Video not found: ${video:-<none>}" >&2; exit 1; }
|
||||
[[ -z "$audio" || ! -f "$audio" ]] && { echo "Error: Audio not found: ${audio:-<none>}" >&2; exit 1; }
|
||||
[[ -z "$output" ]] && { echo "Error: -o/--output required" >&2; exit 1; }
|
||||
|
||||
mkdir -p "$(dirname "$output")"
|
||||
|
||||
local duration; duration="$(get_duration "$video")"
|
||||
local video_audio=false
|
||||
has_audio_stream "$video" && video_audio=true
|
||||
|
||||
local af="[1:a]volume=${volume}"
|
||||
[[ "$(echo "$fade_in > 0" | bc -l)" == "1" ]] && af+=",afade=t=in:d=${fade_in}"
|
||||
if [[ "$(echo "$fade_out > 0" | bc -l)" == "1" ]]; then
|
||||
local fo_start; fo_start="$(echo "$duration - $fade_out" | bc -l)"
|
||||
[[ "$(echo "$fo_start < 0" | bc -l)" == "1" ]] && fo_start=0
|
||||
af+=",afade=t=out:st=${fo_start}:d=${fade_out}"
|
||||
fi
|
||||
|
||||
if $video_audio && ! $replace; then
|
||||
af+="[newaudio];[0:a][newaudio]amix=inputs=2:duration=first:dropout_transition=2[aout]"
|
||||
local mode="mixing with"
|
||||
else
|
||||
af+="[aout]"
|
||||
local mode="replacing"
|
||||
fi
|
||||
|
||||
echo "Adding audio ($mode original): $output"
|
||||
ffmpeg -y -i "$video" -i "$audio" \
|
||||
-filter_complex "$af" \
|
||||
-map 0:v -map "[aout]" \
|
||||
-c:v copy -c:a aac -b:a 192k -shortest "$output" 2>/dev/null
|
||||
echo " Done: $output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: probe
|
||||
# ============================================================================
|
||||
cmd_probe() {
|
||||
local input=""
|
||||
if [[ $# -gt 0 ]]; then input="$1"; fi
|
||||
|
||||
[[ -z "$input" || ! -f "$input" ]] && { echo "Error: File not found: ${input:-<none>}" >&2; exit 1; }
|
||||
|
||||
local info; info="$(probe_media "$input")"
|
||||
|
||||
local fmt_name dur size br
|
||||
fmt_name="$(echo "$info" | jq -r '.format.format_long_name // "unknown"')"
|
||||
dur="$(echo "$info" | jq -r '.format.duration // "0"')"
|
||||
size="$(echo "$info" | jq -r '.format.size // "0"')"
|
||||
br="$(echo "$info" | jq -r '.format.bit_rate // "0"')"
|
||||
|
||||
echo "File: $input"
|
||||
echo "Format: $fmt_name"
|
||||
printf "Duration: %.2fs\n" "$dur"
|
||||
printf "Size: %.2f MB\n" "$(echo "$size / 1048576" | bc -l)"
|
||||
printf "Bitrate: %.0f kbps\n" "$(echo "$br / 1000" | bc -l)"
|
||||
|
||||
echo "$info" | jq -r '.streams[] | if .codec_type == "video" then "Video: \(.codec_name) \(.width)x\(.height) @ \(.r_frame_rate) fps" elif .codec_type == "audio" then "Audio: \(.codec_name) \(.sample_rate)Hz \(.channels)ch" else empty end'
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Main dispatcher
|
||||
# ============================================================================
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
MiniMax Multi-Modal Toolkit Media Tools
|
||||
|
||||
Usage:
|
||||
media_tools.sh <command> [options]
|
||||
|
||||
Commands:
|
||||
convert-video Convert video format
|
||||
convert-audio Convert audio format
|
||||
concat-video Concatenate videos with crossfade
|
||||
concat-audio Concatenate audio files
|
||||
extract-audio Extract audio from video
|
||||
trim-video Trim video by time range
|
||||
add-audio Add/overlay audio on video
|
||||
probe Show media file info
|
||||
|
||||
Examples:
|
||||
media_tools.sh convert-video input.webm -o output.mp4
|
||||
media_tools.sh convert-audio input.wav -o output.mp3
|
||||
media_tools.sh concat-video seg1.mp4 seg2.mp4 -o merged.mp4
|
||||
media_tools.sh extract-audio video.mp4 -o audio.mp3
|
||||
media_tools.sh trim-video input.mp4 --start 5 --end 15 -o clip.mp4
|
||||
media_tools.sh add-audio --video video.mp4 --audio bgm.mp3 -o output.mp4
|
||||
media_tools.sh probe input.mp4
|
||||
EOF
|
||||
}
|
||||
|
||||
main() {
|
||||
if [[ $# -eq 0 ]]; then
|
||||
usage; exit 0
|
||||
fi
|
||||
|
||||
local command="$1"; shift
|
||||
|
||||
case "$command" in
|
||||
convert-video) cmd_convert_video "$@" ;;
|
||||
convert-audio) cmd_convert_audio "$@" ;;
|
||||
concat-video) cmd_concat_video "$@" ;;
|
||||
concat-audio) cmd_concat_audio "$@" ;;
|
||||
extract-audio) cmd_extract_audio "$@" ;;
|
||||
trim-video) cmd_trim_video "$@" ;;
|
||||
add-audio) cmd_add_audio "$@" ;;
|
||||
probe) cmd_probe "$@" ;;
|
||||
-h|--help|help) usage ;;
|
||||
*) echo "Unknown command: $command" >&2; usage >&2; exit 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
266
skills/minimax-multimodal-toolkit/scripts/music/generate_music.sh
Executable file
266
skills/minimax-multimodal-toolkit/scripts/music/generate_music.sh
Executable file
@@ -0,0 +1,266 @@
|
||||
#!/usr/bin/env bash
|
||||
# MiniMax Music Generation CLI (pure bash)
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/music/generate_music.sh --lyrics "[verse]\nHello world" --output output/song.mp3 --download
|
||||
# bash scripts/music/generate_music.sh --instrumental --prompt "ambient electronic" -o output/ambient.mp3 --download
|
||||
# bash scripts/music/generate_music.sh --lyrics "[verse]\nStars" --genre pop --mood happy -o output/happy.mp3 --download
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# ============================================================================
|
||||
# Common functions (shared with generate_voice.sh)
|
||||
# ============================================================================
|
||||
|
||||
load_env() {
|
||||
local env_file
|
||||
for env_file in "$PROJECT_ROOT/.env" "$(pwd)/.env"; do
|
||||
if [[ -f "$env_file" ]]; then
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
line="${line%%#*}"
|
||||
line="$(echo "$line" | xargs)"
|
||||
[[ -z "$line" || "$line" != *=* ]] && continue
|
||||
local key="${line%%=*}" val="${line#*=}"
|
||||
key="$(echo "$key" | xargs)"; val="$(echo "$val" | xargs)"
|
||||
if [[ ${#val} -ge 2 ]]; then
|
||||
case "$val" in
|
||||
\"*\") val="${val:1:${#val}-2}" ;;
|
||||
\'*\') val="${val:1:${#val}-2}" ;;
|
||||
esac
|
||||
fi
|
||||
[[ -z "${!key:-}" ]] && export "$key=$val"
|
||||
done < "$env_file"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
check_api_key() {
|
||||
if [[ -z "${MINIMAX_API_KEY:-}" ]]; then
|
||||
echo "Error: MINIMAX_API_KEY environment variable is not set." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Main
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
load_env
|
||||
check_api_key
|
||||
|
||||
local lyrics="" prompt="" model="music-2.5" instrumental=false
|
||||
local genre="" mood="" tempo="" bpm="" key="" instruments="" vocals=""
|
||||
local use_case="" structure="" avoid="" references=""
|
||||
local output="" output_format="url" stream=false download=false
|
||||
local sample_rate="" bitrate="" format="" aigc_watermark=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--lyrics) lyrics="$2"; shift 2 ;;
|
||||
--prompt) prompt="$2"; shift 2 ;;
|
||||
--model) model="$2"; shift 2 ;;
|
||||
--instrumental) instrumental=true; shift ;;
|
||||
--genre) genre="$2"; shift 2 ;;
|
||||
--mood) mood="$2"; shift 2 ;;
|
||||
--tempo) tempo="$2"; shift 2 ;;
|
||||
--bpm) bpm="$2"; shift 2 ;;
|
||||
--key) key="$2"; shift 2 ;;
|
||||
--instruments) instruments="$2"; shift 2 ;;
|
||||
--vocals) vocals="$2"; shift 2 ;;
|
||||
--use-case) use_case="$2"; shift 2 ;;
|
||||
--structure) structure="$2"; shift 2 ;;
|
||||
--avoid) avoid="$2"; shift 2 ;;
|
||||
--references) references="$2"; shift 2 ;;
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--output-format) output_format="$2"; shift 2 ;;
|
||||
--stream) stream=true; shift ;;
|
||||
--download) download=true; shift ;;
|
||||
--sample-rate) sample_rate="$2"; shift 2 ;;
|
||||
--bitrate) bitrate="$2"; shift 2 ;;
|
||||
--format) format="$2"; shift 2 ;;
|
||||
--aigc-watermark) aigc_watermark="$2"; shift 2 ;;
|
||||
-h|--help)
|
||||
cat <<'USAGE'
|
||||
MiniMax Music Generation CLI
|
||||
|
||||
Usage:
|
||||
generate_music.sh [options]
|
||||
|
||||
Options:
|
||||
--lyrics TEXT Song lyrics (with [verse]/[chorus] tags)
|
||||
--prompt TEXT Music style/description prompt
|
||||
--instrumental Generate instrumental (no vocals)
|
||||
--model MODEL Model name (default: music-2.5)
|
||||
--genre TEXT Genre (e.g. pop, rock, jazz)
|
||||
--mood TEXT Mood (e.g. happy, melancholic)
|
||||
--tempo TEXT Tempo description (e.g. fast, slow)
|
||||
--bpm NUMBER Beats per minute
|
||||
--key TEXT Musical key (e.g. C major, A minor)
|
||||
--instruments TEXT Instruments to include
|
||||
--vocals TEXT Vocal style description
|
||||
--use-case TEXT Use case (e.g. background, theme song)
|
||||
--structure TEXT Song structure
|
||||
--avoid TEXT Elements to avoid
|
||||
--references TEXT Reference tracks/artists
|
||||
--output-format FMT Output format: url (default) or hex
|
||||
--download Download audio file (for url format)
|
||||
--sample-rate N Audio sample rate
|
||||
--bitrate N Audio bitrate
|
||||
--format FMT Audio format (mp3, wav, etc.)
|
||||
-o, --output FILE Output file path (required)
|
||||
|
||||
Examples:
|
||||
generate_music.sh --instrumental --prompt "ambient electronic" -o ambient.mp3 --download
|
||||
generate_music.sh --lyrics "[verse]\nHello world" -o song.mp3 --download
|
||||
generate_music.sh --lyrics "[verse]\nStars" --genre pop --mood happy -o happy.mp3 --download
|
||||
USAGE
|
||||
exit 0
|
||||
;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: --output / -o is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Build prompt from structured fields
|
||||
local field_parts=()
|
||||
[[ -n "$genre" ]] && field_parts+=("Genre: $genre")
|
||||
[[ -n "$mood" ]] && field_parts+=("Mood: $mood")
|
||||
[[ -n "$tempo" ]] && field_parts+=("Tempo: $tempo")
|
||||
[[ -n "$bpm" ]] && field_parts+=("BPM: $bpm")
|
||||
[[ -n "$key" ]] && field_parts+=("Key: $key")
|
||||
[[ -n "$instruments" ]] && field_parts+=("Instruments: $instruments")
|
||||
[[ -n "$vocals" ]] && field_parts+=("Vocals: $vocals")
|
||||
[[ -n "$use_case" ]] && field_parts+=("Use case: $use_case")
|
||||
[[ -n "$structure" ]] && field_parts+=("Structure: $structure")
|
||||
[[ -n "$avoid" ]] && field_parts+=("Avoid: $avoid")
|
||||
[[ -n "$references" ]] && field_parts+=("References: $references")
|
||||
|
||||
local field_prompt=""
|
||||
if [[ ${#field_parts[@]} -gt 0 ]]; then
|
||||
field_prompt="$(IFS='. '; echo "${field_parts[*]}")"
|
||||
fi
|
||||
|
||||
if [[ -n "$field_prompt" ]]; then
|
||||
if [[ -n "$prompt" ]]; then
|
||||
prompt="$prompt. $field_prompt"
|
||||
else
|
||||
prompt="$field_prompt"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Build payload
|
||||
local payload
|
||||
payload=$(jq -n \
|
||||
--arg model "$model" \
|
||||
--arg prompt "$prompt" \
|
||||
--arg of "$output_format" \
|
||||
--argjson stream "$stream" \
|
||||
'{model: $model, prompt: $prompt, output_format: $of, stream: $stream}')
|
||||
|
||||
if $instrumental; then
|
||||
# music-2.5 does not support is_instrumental — use lyrics workaround
|
||||
payload=$(echo "$payload" | jq '. + {lyrics: "[intro] [outro]"}')
|
||||
local current_prompt
|
||||
current_prompt="$(echo "$payload" | jq -r '.prompt // ""')"
|
||||
if [[ -n "$current_prompt" ]]; then
|
||||
payload=$(echo "$payload" | jq --arg p "$current_prompt. pure music, no lyrics" '.prompt = $p')
|
||||
else
|
||||
payload=$(echo "$payload" | jq '.prompt = "pure music, no lyrics"')
|
||||
fi
|
||||
else
|
||||
payload=$(echo "$payload" | jq --arg l "$lyrics" '. + {lyrics: $l}')
|
||||
fi
|
||||
|
||||
# Audio settings
|
||||
local audio_setting="{}"
|
||||
[[ -n "$sample_rate" ]] && audio_setting=$(echo "$audio_setting" | jq --argjson sr "$sample_rate" '. + {sample_rate: $sr}')
|
||||
[[ -n "$bitrate" ]] && audio_setting=$(echo "$audio_setting" | jq --argjson br "$bitrate" '. + {bitrate: $br}')
|
||||
[[ -n "$format" ]] && audio_setting=$(echo "$audio_setting" | jq --arg f "$format" '. + {format: $f}')
|
||||
if [[ "$audio_setting" != "{}" ]]; then
|
||||
payload=$(echo "$payload" | jq --argjson as "$audio_setting" '. + {audio_setting: $as}')
|
||||
fi
|
||||
|
||||
[[ -n "$aigc_watermark" ]] && payload=$(echo "$payload" | jq --argjson aw "$aigc_watermark" '. + {aigc_watermark: $aw}')
|
||||
|
||||
local api_host="${MINIMAX_API_HOST:-https://api.minimaxi.com}"
|
||||
local api_url="${api_host}/v1/music_generation"
|
||||
|
||||
echo "Generating music with model: $model"
|
||||
echo "Output format: $output_format"
|
||||
|
||||
# Send request via curl
|
||||
local raw_output http_code response
|
||||
raw_output="$(curl -s -w "\n%{http_code}" \
|
||||
-X POST "$api_url" \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--max-time 300 \
|
||||
-d "$payload" 2>/dev/null)" || {
|
||||
echo "Error: curl request failed" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
http_code="${raw_output##*$'\n'}"
|
||||
response="${raw_output%$'\n'*}"
|
||||
|
||||
if [[ "$http_code" -ge 400 ]] 2>/dev/null; then
|
||||
echo "Error: API returned HTTP $http_code" >&2
|
||||
echo "$response" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local status_code
|
||||
status_code="$(echo "$response" | jq -r '.base_resp.status_code // 0')" 2>/dev/null || true
|
||||
if [[ "$status_code" != "0" && -n "$status_code" ]]; then
|
||||
echo "API error: $(echo "$response" | jq '.base_resp')" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$output")"
|
||||
|
||||
if [[ "$output_format" == "hex" ]]; then
|
||||
local audio_hex
|
||||
audio_hex="$(echo "$response" | jq -r '.data.audio // empty')"
|
||||
if [[ -z "$audio_hex" ]]; then
|
||||
echo "Error: No audio hex data in response." >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "$audio_hex" | xxd -r -p > "$output"
|
||||
echo "Audio saved to: $output"
|
||||
|
||||
elif [[ "$output_format" == "url" ]]; then
|
||||
local audio_url
|
||||
audio_url="$(echo "$response" | jq -r '.data.audio_url // .data.audio // .data.audio_file.download_url // empty')"
|
||||
if [[ -z "$audio_url" ]]; then
|
||||
echo "Error: No audio URL in response." >&2
|
||||
echo "$response" | jq . >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "Audio URL: $audio_url"
|
||||
if $download; then
|
||||
curl -s -o "$output" --max-time 120 "$audio_url"
|
||||
echo "Audio downloaded to: $output"
|
||||
else
|
||||
echo "Use --download to save the file, or download manually from the URL above."
|
||||
echo "$audio_url" > "$output"
|
||||
echo "URL written to: $output"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Print extra info if present
|
||||
local extra
|
||||
extra="$(echo "$response" | jq -r '.extra_info // .data.extra_info // empty')" 2>/dev/null || true
|
||||
if [[ -n "$extra" && "$extra" != "null" ]]; then
|
||||
echo "Extra info: $extra"
|
||||
fi
|
||||
}
|
||||
|
||||
main "$@"
|
||||
934
skills/minimax-multimodal-toolkit/scripts/tts/generate_voice.sh
Executable file
934
skills/minimax-multimodal-toolkit/scripts/tts/generate_voice.sh
Executable file
@@ -0,0 +1,934 @@
|
||||
#!/usr/bin/env bash
|
||||
# MiniMax Voice CLI — Unified TTS command-line interface (pure bash)
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/tts/generate_voice.sh tts "Hello world" -o hello.mp3
|
||||
# bash scripts/tts/generate_voice.sh clone my_voice.mp3 --voice-id my-custom-voice
|
||||
# bash scripts/tts/generate_voice.sh design "A gentle female voice" --voice-id designed-voice-1
|
||||
# bash scripts/tts/generate_voice.sh list-voices
|
||||
# bash scripts/tts/generate_voice.sh validate segments.json
|
||||
# bash scripts/tts/generate_voice.sh generate segments.json -o output.mp3
|
||||
# bash scripts/tts/generate_voice.sh merge file1.mp3 file2.mp3 -o combined.mp3
|
||||
# bash scripts/tts/generate_voice.sh convert input.wav -o output.mp3
|
||||
# bash scripts/tts/generate_voice.sh check-env
|
||||
set -euo pipefail
|
||||
|
||||
# ============================================================================
|
||||
# Configuration
|
||||
# ============================================================================
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# ============================================================================
|
||||
# Common functions
|
||||
# ============================================================================
|
||||
|
||||
load_env() {
|
||||
local env_file
|
||||
for env_file in "$PROJECT_ROOT/.env" "$(pwd)/.env"; do
|
||||
if [[ -f "$env_file" ]]; then
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
line="${line%%#*}" # strip comments
|
||||
line="$(echo "$line" | xargs)" # trim whitespace
|
||||
[[ -z "$line" || "$line" != *=* ]] && continue
|
||||
local key="${line%%=*}"
|
||||
local val="${line#*=}"
|
||||
key="$(echo "$key" | xargs)"
|
||||
val="$(echo "$val" | xargs)"
|
||||
# Remove surrounding quotes
|
||||
if [[ ${#val} -ge 2 ]]; then
|
||||
case "$val" in
|
||||
\"*\") val="${val:1:${#val}-2}" ;;
|
||||
\'*\') val="${val:1:${#val}-2}" ;;
|
||||
esac
|
||||
fi
|
||||
# Only set if not already in environment
|
||||
if [[ -z "${!key:-}" ]]; then
|
||||
export "$key=$val"
|
||||
fi
|
||||
done < "$env_file"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 0
|
||||
}
|
||||
|
||||
check_api_key() {
|
||||
if [[ -z "${MINIMAX_API_KEY:-}" ]]; then
|
||||
echo "Error: MINIMAX_API_KEY environment variable is not set" >&2
|
||||
echo " export MINIMAX_API_KEY='your-key'" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_dir() {
|
||||
local dir="$1"
|
||||
[[ -n "$dir" ]] && mkdir -p "$dir"
|
||||
}
|
||||
|
||||
API_BASE="${MINIMAX_API_HOST:-https://api.minimaxi.com}/v1"
|
||||
|
||||
api_request() {
|
||||
# api_request METHOD ENDPOINT [JSON_BODY]
|
||||
# Outputs raw JSON response to stdout.
|
||||
local method="$1" endpoint="$2" body="${3:-}"
|
||||
local url="${API_BASE}/${endpoint#/}"
|
||||
|
||||
local args=(
|
||||
-s -w "\n%{http_code}"
|
||||
-X "$method"
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}"
|
||||
-H "Accept-Encoding: gzip, deflate"
|
||||
--compressed
|
||||
--max-time 120
|
||||
)
|
||||
if [[ -n "$body" ]]; then
|
||||
args+=(-H "Content-Type: application/json" -d "$body")
|
||||
fi
|
||||
args+=("$url")
|
||||
|
||||
local output http_code response
|
||||
output="$(curl "${args[@]}" 2>/dev/null)" || {
|
||||
echo "Error: curl request failed" >&2
|
||||
exit 1
|
||||
}
|
||||
http_code="${output##*$'\n'}"
|
||||
response="${output%$'\n'*}"
|
||||
|
||||
if [[ "$http_code" -ge 400 ]] 2>/dev/null; then
|
||||
echo "Error: API returned HTTP $http_code" >&2
|
||||
echo "$response" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check API-level error
|
||||
local status_code
|
||||
status_code="$(echo "$response" | jq -r '.base_resp.status_code // 0')" 2>/dev/null || true
|
||||
if [[ "$status_code" != "0" && -n "$status_code" ]]; then
|
||||
local status_msg
|
||||
status_msg="$(echo "$response" | jq -r '.base_resp.status_msg // "Unknown error"')"
|
||||
echo "Error: API error [$status_code]: $status_msg" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$response"
|
||||
}
|
||||
|
||||
api_upload() {
|
||||
# api_upload ENDPOINT FILE_PATH PURPOSE
|
||||
local endpoint="$1" file_path="$2" purpose="$3"
|
||||
local url="${API_BASE}/${endpoint#/}"
|
||||
|
||||
local output http_code response
|
||||
output="$(curl -s -w "\n%{http_code}" \
|
||||
-X POST \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
-H "Accept-Encoding: gzip, deflate" \
|
||||
--compressed \
|
||||
-F "file=@${file_path}" \
|
||||
-F "purpose=${purpose}" \
|
||||
--max-time 120 \
|
||||
"$url" 2>/dev/null)" || {
|
||||
echo "Error: curl upload failed" >&2
|
||||
exit 1
|
||||
}
|
||||
http_code="${output##*$'\n'}"
|
||||
response="${output%$'\n'*}"
|
||||
|
||||
if [[ "$http_code" -ge 400 ]] 2>/dev/null; then
|
||||
echo "Error: API returned HTTP $http_code" >&2
|
||||
echo "$response" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local status_code
|
||||
status_code="$(echo "$response" | jq -r '.base_resp.status_code // 0')" 2>/dev/null || true
|
||||
if [[ "$status_code" != "0" && -n "$status_code" ]]; then
|
||||
local status_msg
|
||||
status_msg="$(echo "$response" | jq -r '.base_resp.status_msg // "Unknown error"')"
|
||||
echo "Error: API error [$status_code]: $status_msg" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$response"
|
||||
}
|
||||
|
||||
hex_to_file() {
|
||||
# hex_to_file HEX_STRING OUTPUT_PATH
|
||||
local hex="$1" output="$2"
|
||||
ensure_dir "$(dirname "$output")"
|
||||
echo "$hex" | xxd -r -p > "$output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: tts
|
||||
# ============================================================================
|
||||
cmd_tts() {
|
||||
local text="" voice_id="male-qn-qingse" output="" model="speech-2.8-hd"
|
||||
local speed=1.0 volume=1.0 pitch=0 emotion="" audio_format="mp3"
|
||||
local sample_rate=32000 language_boost=""
|
||||
|
||||
# First positional arg is text
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
text="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-v|--voice-id) voice_id="$2"; shift 2 ;;
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--model) model="$2"; shift 2 ;;
|
||||
--speed) speed="$2"; shift 2 ;;
|
||||
--volume) volume="$2"; shift 2 ;;
|
||||
--pitch) pitch="$2"; shift 2 ;;
|
||||
--emotion) emotion="$2"; shift 2 ;;
|
||||
--format) audio_format="$2"; shift 2 ;;
|
||||
--sample-rate) sample_rate="$2"; shift 2 ;;
|
||||
--language-boost) language_boost="$2"; shift 2 ;;
|
||||
*) text="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$text" ]]; then
|
||||
echo "Error: text is required" >&2
|
||||
echo "Usage: $(basename "$0") tts \"Text to speak\" -o output.mp3" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Build voice_setting
|
||||
local voice_setting
|
||||
voice_setting=$(jq -n \
|
||||
--arg vid "$voice_id" \
|
||||
--argjson spd "$speed" \
|
||||
--argjson vol "$volume" \
|
||||
--argjson pit "$pitch" \
|
||||
'{voice_id: $vid, speed: $spd, vol: $vol, pitch: $pit}')
|
||||
|
||||
if [[ -n "$emotion" ]]; then
|
||||
voice_setting=$(echo "$voice_setting" | jq --arg e "$emotion" '. + {emotion: $e}')
|
||||
fi
|
||||
|
||||
# Build payload
|
||||
local payload
|
||||
payload=$(jq -n \
|
||||
--arg model "$model" \
|
||||
--arg text "$text" \
|
||||
--argjson vs "$voice_setting" \
|
||||
--arg fmt "$audio_format" \
|
||||
--argjson sr "$sample_rate" \
|
||||
'{
|
||||
model: $model,
|
||||
text: $text,
|
||||
voice_setting: $vs,
|
||||
audio_setting: {sample_rate: $sr, bitrate: 128000, format: $fmt, channel: 1},
|
||||
stream: false,
|
||||
subtitle_enable: false,
|
||||
output_format: "hex"
|
||||
}')
|
||||
|
||||
if [[ -n "$language_boost" ]]; then
|
||||
payload=$(echo "$payload" | jq --arg lb "$language_boost" '. + {language_boost: $lb}')
|
||||
fi
|
||||
|
||||
echo "Synthesizing: ${text:0:50}..."
|
||||
local response
|
||||
response="$(api_request POST t2a_v2 "$payload")"
|
||||
|
||||
# Extract hex audio
|
||||
local audio_hex
|
||||
audio_hex="$(echo "$response" | jq -r '.data.audio // .extra_info.audio // empty')"
|
||||
|
||||
if [[ -z "$audio_hex" ]]; then
|
||||
echo "Error: No audio data returned from API" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -n "$output" ]]; then
|
||||
hex_to_file "$audio_hex" "$output"
|
||||
echo "Done: $output"
|
||||
else
|
||||
echo "Generated ${#audio_hex} hex chars of audio"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: clone
|
||||
# ============================================================================
|
||||
cmd_clone() {
|
||||
local audio_file="" voice_id="" preview_text="" preview_output=""
|
||||
|
||||
# First positional arg is audio file
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
audio_file="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--voice-id) voice_id="$2"; shift 2 ;;
|
||||
--preview) preview_text="$2"; shift 2 ;;
|
||||
--preview-output) preview_output="$2"; shift 2 ;;
|
||||
*) [[ -z "$audio_file" ]] && audio_file="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$audio_file" ]]; then
|
||||
echo "Error: audio file is required" >&2
|
||||
echo "Usage: $(basename "$0") clone audio.mp3 --voice-id my-voice" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -f "$audio_file" ]]; then
|
||||
echo "Error: Audio file not found: $audio_file" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "$voice_id" ]]; then
|
||||
echo "Error: --voice-id is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Cloning voice from: $audio_file"
|
||||
echo "Voice ID: $voice_id"
|
||||
|
||||
# Step 1: Upload audio
|
||||
local upload_response file_id
|
||||
upload_response="$(api_upload files/upload "$audio_file" voice_clone)"
|
||||
file_id="$(echo "$upload_response" | jq -r '.file.file_id // .file_id // empty')"
|
||||
|
||||
if [[ -z "$file_id" ]]; then
|
||||
echo "Error: Upload succeeded but no file_id was returned" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Step 2: Clone voice
|
||||
local clone_payload
|
||||
clone_payload=$(jq -n \
|
||||
--arg vid "$voice_id" \
|
||||
--argjson fid "$file_id" \
|
||||
'{voice_id: $vid, file_id: $fid}')
|
||||
|
||||
api_request POST voice_clone "$clone_payload" > /dev/null
|
||||
echo "Voice cloned successfully: $voice_id"
|
||||
|
||||
# Step 3: Preview if requested
|
||||
if [[ -n "$preview_text" ]]; then
|
||||
echo "Generating preview..."
|
||||
local pout="${preview_output:-${voice_id}_preview.mp3}"
|
||||
cmd_tts "$preview_text" -v "$voice_id" -o "$pout"
|
||||
echo "Preview saved to: $pout"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: design
|
||||
# ============================================================================
|
||||
cmd_design() {
|
||||
local description="" voice_id="" preview_text="" preview_output=""
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
description="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--voice-id) voice_id="$2"; shift 2 ;;
|
||||
--preview) preview_text="$2"; shift 2 ;;
|
||||
--preview-output) preview_output="$2"; shift 2 ;;
|
||||
*) [[ -z "$description" ]] && description="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$description" ]]; then
|
||||
echo "Error: description is required" >&2
|
||||
echo "Usage: $(basename \"$0\") design \"A warm female voice\" --voice-id narrator" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local ptext="${preview_text:-This is a preview of the designed voice.}"
|
||||
|
||||
echo "Designing voice from: \"$description\""
|
||||
[[ -n "$voice_id" ]] && echo "Voice ID: $voice_id"
|
||||
|
||||
local payload
|
||||
payload=$(jq -n \
|
||||
--arg prompt "$description" \
|
||||
--arg pt "$ptext" \
|
||||
'{prompt: $prompt, preview_text: $pt}')
|
||||
|
||||
if [[ -n "$voice_id" ]]; then
|
||||
payload=$(echo "$payload" | jq --arg vid "$voice_id" '. + {voice_id: $vid}')
|
||||
fi
|
||||
|
||||
local response
|
||||
response="$(api_request POST voice_design "$payload")"
|
||||
|
||||
local actual_voice_id
|
||||
actual_voice_id="${voice_id:-$(echo "$response" | jq -r '.voice_id // "unknown"')}"
|
||||
echo "Voice designed: $actual_voice_id"
|
||||
|
||||
local trial_audio
|
||||
trial_audio="$(echo "$response" | jq -r '.trial_audio // empty')"
|
||||
if [[ -n "$trial_audio" ]]; then
|
||||
local pout="${preview_output:-${actual_voice_id}_preview.mp3}"
|
||||
hex_to_file "$trial_audio" "$pout"
|
||||
echo "Preview saved to: $pout"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: list-voices
|
||||
# ============================================================================
|
||||
cmd_list_voices() {
|
||||
echo "=== System Voices ==="
|
||||
local sys_response
|
||||
sys_response="$(api_request POST voice/list '{"voice_type":"system"}' 2>/dev/null)" || true
|
||||
|
||||
if [[ -n "$sys_response" ]]; then
|
||||
local count
|
||||
count="$(echo "$sys_response" | jq '.voice_list | length')" 2>/dev/null || count=0
|
||||
if [[ "$count" -gt 0 ]]; then
|
||||
echo "$sys_response" | jq -r '.voice_list[:10][] | " \(.voice_id): \(.name // "N/A")"'
|
||||
if [[ "$count" -gt 10 ]]; then
|
||||
echo " ... and $((count - 10)) more"
|
||||
fi
|
||||
else
|
||||
echo " (None found)"
|
||||
fi
|
||||
else
|
||||
echo " (Could not fetch system voices)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Custom Voices ==="
|
||||
|
||||
local clone_response design_response
|
||||
clone_response="$(api_request POST voice/list '{"voice_type":"voice_cloning"}' 2>/dev/null)" || true
|
||||
design_response="$(api_request POST voice/list '{"voice_type":"voice_generation"}' 2>/dev/null)" || true
|
||||
|
||||
local has_custom=false
|
||||
|
||||
if [[ -n "$clone_response" ]]; then
|
||||
local cc
|
||||
cc="$(echo "$clone_response" | jq '.voice_list | length')" 2>/dev/null || cc=0
|
||||
if [[ "$cc" -gt 0 ]]; then
|
||||
has_custom=true
|
||||
echo "Cloned ($cc):"
|
||||
echo "$clone_response" | jq -r '.voice_list[] | " \(.voice_id)"'
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -n "$design_response" ]]; then
|
||||
local dc
|
||||
dc="$(echo "$design_response" | jq '.voice_list | length')" 2>/dev/null || dc=0
|
||||
if [[ "$dc" -gt 0 ]]; then
|
||||
has_custom=true
|
||||
echo "Designed ($dc):"
|
||||
echo "$design_response" | jq -r '.voice_list[] | " \(.voice_id)"'
|
||||
fi
|
||||
fi
|
||||
|
||||
if ! $has_custom; then
|
||||
echo " (None found)"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: validate
|
||||
# ============================================================================
|
||||
cmd_validate() {
|
||||
local segments_file="" model="speech-2.8-hd" strict=false verbose=false
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
segments_file="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--model) model="$2"; shift 2 ;;
|
||||
--strict) strict=true; shift ;;
|
||||
-v|--verbose) verbose=true; shift ;;
|
||||
--validate-voices) shift ;; # Not implemented in bash version
|
||||
*) [[ -z "$segments_file" ]] && segments_file="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$segments_file" || ! -f "$segments_file" ]]; then
|
||||
echo "Error: Segments file not found: ${segments_file:-<none>}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Validating: $segments_file"
|
||||
echo "Model: $model"
|
||||
|
||||
local valid_emotions="happy sad angry fearful disgusted surprised calm fluent whisper"
|
||||
echo "Valid emotions: $valid_emotions"
|
||||
echo ""
|
||||
|
||||
# Parse JSON
|
||||
local segments count
|
||||
segments="$(jq -r 'if type == "array" then . elif type == "object" and has("segments") then .segments else empty end' "$segments_file" 2>/dev/null)" || {
|
||||
echo "Error: Invalid JSON in $segments_file" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [[ -z "$segments" || "$segments" == "null" ]]; then
|
||||
echo "Error: No segments found in file" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
count="$(echo "$segments" | jq 'length')"
|
||||
local errors=0
|
||||
|
||||
for ((i=0; i<count; i++)); do
|
||||
local text voice_id emotion
|
||||
text="$(echo "$segments" | jq -r ".[$i].text // \"\"")"
|
||||
voice_id="$(echo "$segments" | jq -r ".[$i].voice_id // \"\"")"
|
||||
emotion="$(echo "$segments" | jq -r ".[$i].emotion // \"\"")"
|
||||
|
||||
if [[ -z "$text" ]]; then
|
||||
echo " - Segment $i: 'text' is required and must not be empty"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
if [[ -z "$voice_id" ]]; then
|
||||
echo " - Segment $i: 'voice_id' is required"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
if [[ -n "$emotion" ]]; then
|
||||
if ! echo "$valid_emotions" | grep -qw "$emotion"; then
|
||||
echo " - Segment $i: invalid emotion '$emotion'"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $errors -eq 0 ]]; then
|
||||
echo "Validation passed: $count segments"
|
||||
if $verbose; then
|
||||
echo ""
|
||||
echo "=== Segment Summary ==="
|
||||
for ((i=0; i<count; i++)); do
|
||||
local text voice_id emotion
|
||||
text="$(echo "$segments" | jq -r ".[$i].text // \"\"")"
|
||||
voice_id="$(echo "$segments" | jq -r ".[$i].voice_id // \"\"")"
|
||||
emotion="$(echo "$segments" | jq -r ".[$i].emotion // \"\"")"
|
||||
local elabel="${emotion:-AUTO}"
|
||||
printf " %d: [%-10s] voice=%-20s \"%s\"\n" "$i" "${elabel^^}" "${voice_id:0:20}" "${text:0:40}"
|
||||
done
|
||||
fi
|
||||
return 0
|
||||
else
|
||||
echo "Validation failed ($errors errors)"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: generate (multi-segment pipeline)
|
||||
# ============================================================================
|
||||
cmd_generate() {
|
||||
local segments_file="" output="" model="speech-2.8-hd" crossfade=200
|
||||
local no_normalize=false temp_dir="" continue_on_error=false
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
segments_file="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--model) model="$2"; shift 2 ;;
|
||||
--crossfade) crossfade="$2"; shift 2 ;;
|
||||
--no-normalize) no_normalize=true; shift ;;
|
||||
--temp-dir) temp_dir="$2"; shift 2 ;;
|
||||
--continue-on-error) continue_on_error=true; shift ;;
|
||||
*) [[ -z "$segments_file" ]] && segments_file="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$segments_file" || ! -f "$segments_file" ]]; then
|
||||
echo "Error: Segments file not found: ${segments_file:-<none>}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: -o/--output is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate first
|
||||
echo "Validating segments file..."
|
||||
local segments count
|
||||
segments="$(jq -r 'if type == "array" then . elif type == "object" and has("segments") then .segments else empty end' "$segments_file")"
|
||||
count="$(echo "$segments" | jq 'length')"
|
||||
|
||||
if [[ "$count" -eq 0 ]]; then
|
||||
echo "Error: No segments found" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "Found $count valid segments"
|
||||
echo ""
|
||||
|
||||
# Setup temp dir
|
||||
if [[ -z "$temp_dir" ]]; then
|
||||
temp_dir="$(dirname "$(cd "$(dirname "$output")" 2>/dev/null && pwd || echo ".")/$(basename "$output")")/tmp"
|
||||
fi
|
||||
mkdir -p "$temp_dir"
|
||||
echo "Temp directory: $temp_dir"
|
||||
|
||||
# Generate each segment
|
||||
local succeeded=0 failed=0
|
||||
local segment_files=()
|
||||
|
||||
for ((i=0; i<count; i++)); do
|
||||
local text voice_id emotion speed vol pitch
|
||||
text="$(echo "$segments" | jq -r ".[$i].text")"
|
||||
voice_id="$(echo "$segments" | jq -r ".[$i].voice_id")"
|
||||
emotion="$(echo "$segments" | jq -r ".[$i].emotion // \"\"")"
|
||||
speed="$(echo "$segments" | jq -r ".[$i].speed // 1.0")"
|
||||
vol="$(echo "$segments" | jq -r ".[$i].volume // 1.0")"
|
||||
pitch="$(echo "$segments" | jq -r ".[$i].pitch // 0")"
|
||||
|
||||
printf " Generating segment %d/%d: %s...\n" "$((i+1))" "$count" "${text:0:40}"
|
||||
|
||||
local seg_output="$temp_dir/segment_$(printf '%04d' "$i").mp3"
|
||||
|
||||
# Build voice_setting
|
||||
local voice_setting
|
||||
voice_setting=$(jq -n \
|
||||
--arg vid "$voice_id" \
|
||||
--argjson spd "$speed" \
|
||||
--argjson vol "$vol" \
|
||||
--argjson pit "$pitch" \
|
||||
'{voice_id: $vid, speed: $spd, vol: $vol, pitch: $pit}')
|
||||
if [[ -n "$emotion" ]]; then
|
||||
voice_setting=$(echo "$voice_setting" | jq --arg e "$emotion" '. + {emotion: $e}')
|
||||
fi
|
||||
|
||||
local payload
|
||||
payload=$(jq -n \
|
||||
--arg model "$model" \
|
||||
--arg text "$text" \
|
||||
--argjson vs "$voice_setting" \
|
||||
'{
|
||||
model: $model,
|
||||
text: $text,
|
||||
voice_setting: $vs,
|
||||
audio_setting: {sample_rate: 32000, bitrate: 128000, format: "mp3", channel: 1},
|
||||
stream: false,
|
||||
output_format: "hex"
|
||||
}')
|
||||
|
||||
local response audio_hex
|
||||
if response="$(api_request POST t2a_v2 "$payload" 2>&1)"; then
|
||||
audio_hex="$(echo "$response" | jq -r '.data.audio // .extra_info.audio // empty')"
|
||||
if [[ -n "$audio_hex" ]]; then
|
||||
hex_to_file "$audio_hex" "$seg_output"
|
||||
segment_files+=("$seg_output")
|
||||
succeeded=$((succeeded + 1))
|
||||
echo " ✓ Saved: $seg_output"
|
||||
else
|
||||
failed=$((failed + 1))
|
||||
echo " ✗ Error: No audio data in response"
|
||||
if ! $continue_on_error; then break; fi
|
||||
fi
|
||||
else
|
||||
failed=$((failed + 1))
|
||||
echo " ✗ Error: $response"
|
||||
if ! $continue_on_error; then break; fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#segment_files[@]} -eq 0 ]]; then
|
||||
echo "Error: No segments were generated successfully" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Merge segments
|
||||
ensure_dir "$(dirname "$output")"
|
||||
|
||||
if [[ ${#segment_files[@]} -eq 1 ]]; then
|
||||
cp "${segment_files[0]}" "$output"
|
||||
else
|
||||
_merge_audio_files "$output" "$crossfade" "$no_normalize" "${segment_files[@]}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Audio saved to: $output"
|
||||
echo " Generated: $succeeded/$count segments"
|
||||
echo ""
|
||||
echo " Intermediate files in: $temp_dir"
|
||||
echo " Delete with: rm -rf $temp_dir"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: merge
|
||||
# ============================================================================
|
||||
cmd_merge() {
|
||||
local output="" format="mp3" crossfade=300 normalize=true
|
||||
local input_files=()
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--format) format="$2"; shift 2 ;;
|
||||
--crossfade) crossfade="$2"; shift 2 ;;
|
||||
--no-normalize) normalize=false; shift ;;
|
||||
*) input_files+=("$1"); shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${#input_files[@]} -lt 2 ]]; then
|
||||
echo "Error: At least 2 input files required" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: -o/--output is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for f in "${input_files[@]}"; do
|
||||
if [[ ! -f "$f" ]]; then
|
||||
echo "Error: File not found: $f" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Merging ${#input_files[@]} files..."
|
||||
local no_norm="false"
|
||||
$normalize || no_norm="true"
|
||||
_merge_audio_files "$output" "$crossfade" "$no_norm" "${input_files[@]}"
|
||||
echo "Merged audio saved to: $output"
|
||||
}
|
||||
|
||||
_merge_audio_files() {
|
||||
# _merge_audio_files OUTPUT CROSSFADE_MS NO_NORMALIZE FILE1 FILE2 ...
|
||||
local output="$1" crossfade_ms="$2" no_normalize="$3"
|
||||
shift 3
|
||||
local files=("$@")
|
||||
local n=${#files[@]}
|
||||
|
||||
ensure_dir "$(dirname "$output")"
|
||||
|
||||
if [[ "$crossfade_ms" -gt 0 && $n -ge 2 ]]; then
|
||||
# Use acrossfade filter for crossfade between segments
|
||||
local crossfade_sec
|
||||
crossfade_sec=$(echo "scale=3; $crossfade_ms / 1000" | bc)
|
||||
|
||||
local inputs=()
|
||||
local filter_parts=()
|
||||
|
||||
for ((i=0; i<n; i++)); do
|
||||
inputs+=(-i "${files[$i]}")
|
||||
filter_parts+=("[${i}:a]aresample=32000,aformat=sample_fmts=fltp:channel_layouts=mono[s${i}]")
|
||||
done
|
||||
|
||||
# Build acrossfade chain
|
||||
if [[ $n -eq 2 ]]; then
|
||||
filter_parts+=("[s0][s1]acrossfade=d=${crossfade_sec}[merged]")
|
||||
else
|
||||
filter_parts+=("[s0][s1]acrossfade=d=${crossfade_sec}[m1]")
|
||||
for ((i=2; i<n; i++)); do
|
||||
local prev="[m$((i-1))]"
|
||||
if [[ $i -eq $((n-1)) ]]; then
|
||||
filter_parts+=("${prev}[s${i}]acrossfade=d=${crossfade_sec}[merged]")
|
||||
else
|
||||
filter_parts+=("${prev}[s${i}]acrossfade=d=${crossfade_sec}[m${i}]")
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
local final_filter="[merged]aformat=sample_fmts=fltp"
|
||||
if [[ "$no_normalize" != "true" ]]; then
|
||||
final_filter+=",loudnorm=I=-16:TP=-1.5:LRA=11"
|
||||
fi
|
||||
final_filter+="[final]"
|
||||
filter_parts+=("$final_filter")
|
||||
|
||||
local filter_complex
|
||||
filter_complex="$(IFS=';'; echo "${filter_parts[*]}")"
|
||||
|
||||
if ffmpeg -y "${inputs[@]}" \
|
||||
-filter_complex "$filter_complex" \
|
||||
-map "[final]" \
|
||||
-ar 32000 -ac 1 -acodec libmp3lame \
|
||||
"$output" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
echo " Crossfade merge failed, falling back to concat demuxer..." >&2
|
||||
fi
|
||||
|
||||
# Fallback: concat demuxer (no crossfade)
|
||||
local concat_file
|
||||
concat_file="$(mktemp /tmp/concat_XXXXXX.txt)"
|
||||
for f in "${files[@]}"; do
|
||||
echo "file '$(cd "$(dirname "$f")" && pwd)/$(basename "$f")'" >> "$concat_file"
|
||||
done
|
||||
|
||||
if [[ "$no_normalize" != "true" ]]; then
|
||||
local tmp_concat
|
||||
tmp_concat="$(mktemp /tmp/concat_out_XXXXXX.mp3)"
|
||||
ffmpeg -y -f concat -safe 0 -i "$concat_file" -c copy "$tmp_concat" 2>/dev/null
|
||||
ffmpeg -y -i "$tmp_concat" -af "loudnorm=I=-16:TP=-1.5:LRA=11" -acodec libmp3lame "$output" 2>/dev/null
|
||||
rm -f "$tmp_concat"
|
||||
else
|
||||
ffmpeg -y -f concat -safe 0 -i "$concat_file" -c copy "$output" 2>/dev/null
|
||||
fi
|
||||
|
||||
rm -f "$concat_file"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: convert
|
||||
# ============================================================================
|
||||
cmd_convert() {
|
||||
local input_file="" output="" format="mp3" sample_rate="" bitrate="" channels=""
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
input_file="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--format) format="$2"; shift 2 ;;
|
||||
--sample-rate) sample_rate="$2"; shift 2 ;;
|
||||
--bitrate) bitrate="$2"; shift 2 ;;
|
||||
--channels) channels="$2"; shift 2 ;;
|
||||
*) [[ -z "$input_file" ]] && input_file="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$input_file" || ! -f "$input_file" ]]; then
|
||||
echo "Error: Input file not found: ${input_file:-<none>}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: -o/--output is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ensure_dir "$(dirname "$output")"
|
||||
|
||||
# Determine codec
|
||||
local codec="copy"
|
||||
case "$format" in
|
||||
mp3) codec="libmp3lame" ;;
|
||||
wav) codec="pcm_s16le" ;;
|
||||
flac) codec="flac" ;;
|
||||
ogg) codec="libvorbis" ;;
|
||||
aac) codec="aac" ;;
|
||||
m4a) codec="aac" ;;
|
||||
*) codec="copy" ;;
|
||||
esac
|
||||
|
||||
local args=(-y -i "$input_file" -acodec "$codec")
|
||||
[[ -n "$sample_rate" ]] && args+=(-ar "$sample_rate")
|
||||
[[ -n "$channels" ]] && args+=(-ac "$channels")
|
||||
[[ -n "$bitrate" ]] && args+=(-b:a "$bitrate")
|
||||
args+=("$output")
|
||||
|
||||
echo "Converting $input_file to $format..."
|
||||
ffmpeg "${args[@]}" 2>/dev/null
|
||||
echo "Converted audio saved to: $output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: check-env
|
||||
# ============================================================================
|
||||
cmd_check_env() {
|
||||
local check_script="$SCRIPT_DIR/../check_environment.sh"
|
||||
if [[ -f "$check_script" ]]; then
|
||||
bash "$check_script" "$@"
|
||||
else
|
||||
echo "check_environment.sh not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Main dispatcher
|
||||
# ============================================================================
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
MiniMax Voice CLI — Unified TTS interface
|
||||
|
||||
Usage:
|
||||
generate_voice.sh <command> [options]
|
||||
|
||||
Commands:
|
||||
tts Basic text-to-speech
|
||||
clone Clone voice from audio sample
|
||||
design Design voice from description
|
||||
list-voices List available voices
|
||||
validate Validate segments.json file
|
||||
generate Generate audio from segments.json
|
||||
merge Merge multiple audio files
|
||||
convert Convert audio format
|
||||
check-env Check environment setup
|
||||
|
||||
Examples:
|
||||
generate_voice.sh tts "Hello world" -o hello.mp3
|
||||
generate_voice.sh tts "你好" -v female-shaonv -o hello_cn.mp3
|
||||
generate_voice.sh clone my_voice.mp3 --voice-id my-custom-voice
|
||||
generate_voice.sh design "A warm female voice" --voice-id narrator-1
|
||||
generate_voice.sh list-voices
|
||||
generate_voice.sh validate segments.json --verbose
|
||||
generate_voice.sh generate segments.json -o output.mp3
|
||||
generate_voice.sh merge part1.mp3 part2.mp3 -o combined.mp3
|
||||
generate_voice.sh convert input.wav -o output.mp3
|
||||
generate_voice.sh check-env --test-api
|
||||
EOF
|
||||
}
|
||||
|
||||
main() {
|
||||
load_env
|
||||
|
||||
if [[ $# -eq 0 ]]; then
|
||||
usage
|
||||
exit 0
|
||||
fi
|
||||
|
||||
local command="$1"; shift
|
||||
|
||||
case "$command" in
|
||||
tts)
|
||||
check_api_key
|
||||
cmd_tts "$@"
|
||||
;;
|
||||
clone)
|
||||
check_api_key
|
||||
cmd_clone "$@"
|
||||
;;
|
||||
design)
|
||||
check_api_key
|
||||
cmd_design "$@"
|
||||
;;
|
||||
list-voices)
|
||||
check_api_key
|
||||
cmd_list_voices "$@"
|
||||
;;
|
||||
validate)
|
||||
cmd_validate "$@"
|
||||
;;
|
||||
generate)
|
||||
check_api_key
|
||||
cmd_generate "$@"
|
||||
;;
|
||||
merge)
|
||||
cmd_merge "$@"
|
||||
;;
|
||||
convert)
|
||||
cmd_convert "$@"
|
||||
;;
|
||||
check-env)
|
||||
cmd_check_env "$@"
|
||||
;;
|
||||
-h|--help|help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
echo "Unknown command: $command" >&2
|
||||
usage >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
221
skills/minimax-multimodal-toolkit/scripts/video/add_bgm.sh
Executable file
221
skills/minimax-multimodal-toolkit/scripts/video/add_bgm.sh
Executable file
@@ -0,0 +1,221 @@
|
||||
#!/usr/bin/env bash
|
||||
# Add background music to a video file (pure bash)
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/video/add_bgm.sh --video input.mp4 --audio bgm.mp3 -o output.mp4
|
||||
# bash scripts/video/add_bgm.sh --video input.mp4 --generate-bgm --music-prompt "upbeat pop" -o output.mp4
|
||||
# bash scripts/video/add_bgm.sh --video input.mp4 --audio bgm.mp3 --replace-audio -o output.mp4
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
MUSIC_API_URL="${MINIMAX_API_HOST:-https://api.minimaxi.com}/v1/music_generation"
|
||||
|
||||
# ============================================================================
|
||||
# Common functions
|
||||
# ============================================================================
|
||||
|
||||
load_env() {
|
||||
local env_file
|
||||
for env_file in "$PROJECT_ROOT/.env" "$(pwd)/.env"; do
|
||||
if [[ -f "$env_file" ]]; then
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
line="${line%%#*}"; line="$(echo "$line" | xargs)"
|
||||
[[ -z "$line" || "$line" != *=* ]] && continue
|
||||
local key="${line%%=*}" val="${line#*=}"
|
||||
key="$(echo "$key" | xargs)"; val="$(echo "$val" | xargs)"
|
||||
if [[ ${#val} -ge 2 ]]; then
|
||||
case "$val" in \"*\") val="${val:1:${#val}-2}" ;; \'*\') val="${val:1:${#val}-2}" ;; esac
|
||||
fi
|
||||
[[ -z "${!key:-}" ]] && export "$key=$val"
|
||||
done < "$env_file"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
get_video_duration() {
|
||||
ffprobe -v error -show_entries format=duration -of json "$1" 2>/dev/null | jq -r '.format.duration'
|
||||
}
|
||||
|
||||
video_has_audio() {
|
||||
local out
|
||||
out="$(ffprobe -v error -select_streams a -show_entries stream=codec_type -of csv=p=0 "$1" 2>/dev/null)"
|
||||
[[ "$out" == *audio* ]]
|
||||
}
|
||||
|
||||
generate_music() {
|
||||
local prompt="$1" output_path="$2" instrumental="${3:-false}"
|
||||
|
||||
local payload
|
||||
local effective_prompt="${prompt:-background music, cinematic, ambient}"
|
||||
|
||||
if [[ "$instrumental" == "true" ]]; then
|
||||
payload=$(jq -n \
|
||||
--arg p "$effective_prompt. pure music, no lyrics" \
|
||||
'{model: "music-2.5", prompt: $p, lyrics: "[intro] [outro]", output_format: "url"}')
|
||||
else
|
||||
payload=$(jq -n \
|
||||
--arg p "$effective_prompt" \
|
||||
'{model: "music-2.5", prompt: $p, lyrics: "[Intro]\nla da da\nla la la", output_format: "url"}')
|
||||
fi
|
||||
|
||||
echo "Generating ${instrumental:+instrumental }music..."
|
||||
echo " Prompt: $prompt"
|
||||
|
||||
local raw http_code response
|
||||
raw="$(curl -s -w "\n%{http_code}" -X POST "$MUSIC_API_URL" \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--max-time 300 -d "$payload")"
|
||||
http_code="${raw##*$'\n'}"; response="${raw%$'\n'*}"
|
||||
|
||||
[[ "$http_code" -ge 400 ]] 2>/dev/null && { echo "Error: Music API HTTP $http_code" >&2; return 1; }
|
||||
|
||||
local sc
|
||||
sc="$(echo "$response" | jq -r '.base_resp.status_code // 0')" 2>/dev/null || true
|
||||
[[ "$sc" != "0" && -n "$sc" ]] && { echo "Error: Music API error: $(echo "$response" | jq '.base_resp')" >&2; return 1; }
|
||||
|
||||
local audio_url
|
||||
audio_url="$(echo "$response" | jq -r '.data.audio_url // .data.audio // .data.audio_file.download_url // empty')"
|
||||
[[ -z "$audio_url" ]] && { echo "Error: No audio URL in music response" >&2; return 1; }
|
||||
|
||||
mkdir -p "$(dirname "$output_path")"
|
||||
|
||||
# Download with retry
|
||||
local attempt
|
||||
for attempt in 1 2 3; do
|
||||
if curl -s -o "$output_path" --max-time 120 "$audio_url" 2>/dev/null; then
|
||||
local size; size="$(wc -c < "$output_path" | tr -d ' ')"
|
||||
echo " Downloaded: $output_path ($size bytes)"
|
||||
return 0
|
||||
fi
|
||||
if [[ $attempt -lt 3 ]]; then
|
||||
local wait=$((2 ** attempt))
|
||||
echo " Download attempt $attempt failed. Retrying in ${wait}s..."
|
||||
sleep "$wait"
|
||||
fi
|
||||
done
|
||||
echo "Error: Download failed after 3 attempts" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Main
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
load_env
|
||||
|
||||
local video="" audio="" output=""
|
||||
local generate_bgm=false instrumental=false music_prompt=""
|
||||
local bgm_volume=0.3 fade_in=0 fade_out=0 replace_audio=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--video) video="$2"; shift 2 ;;
|
||||
--audio) audio="$2"; shift 2 ;;
|
||||
--generate-bgm) generate_bgm=true; shift ;;
|
||||
--instrumental) instrumental=true; shift ;;
|
||||
--music-prompt) music_prompt="$2"; shift 2 ;;
|
||||
--bgm-volume) bgm_volume="$2"; shift 2 ;;
|
||||
--fade-in) fade_in="$2"; shift 2 ;;
|
||||
--fade-out) fade_out="$2"; shift 2 ;;
|
||||
--replace-audio) replace_audio=true; shift ;;
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
-h|--help)
|
||||
cat <<'USAGE'
|
||||
Add Background Music to Video
|
||||
|
||||
Usage:
|
||||
add_bgm.sh --video INPUT --audio BGM -o OUTPUT
|
||||
add_bgm.sh --video INPUT --generate-bgm --music-prompt "style" -o OUTPUT
|
||||
|
||||
Options:
|
||||
--video FILE Input video file (required)
|
||||
--audio FILE Background music audio file
|
||||
--generate-bgm Generate BGM via MiniMax API
|
||||
--instrumental Make generated BGM instrumental
|
||||
--music-prompt TEXT Prompt for BGM generation
|
||||
--bgm-volume FLOAT BGM volume level (default: 0.3)
|
||||
--fade-in SECS BGM fade-in duration
|
||||
--fade-out SECS BGM fade-out duration
|
||||
--replace-audio Replace original audio instead of mixing
|
||||
-o, --output FILE Output video file (required)
|
||||
|
||||
Examples:
|
||||
add_bgm.sh --video input.mp4 --audio bgm.mp3 -o output.mp4
|
||||
add_bgm.sh --video input.mp4 --generate-bgm --music-prompt "upbeat pop" -o output.mp4
|
||||
add_bgm.sh --video input.mp4 --audio bgm.mp3 --replace-audio -o output.mp4
|
||||
USAGE
|
||||
exit 0
|
||||
;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$video" || ! -f "$video" ]]; then
|
||||
echo "Error: Video file not found: ${video:-<none>}" >&2; exit 1
|
||||
fi
|
||||
if [[ -z "$audio" && "$generate_bgm" != "true" ]]; then
|
||||
echo "Error: Provide --audio or --generate-bgm" >&2; exit 1
|
||||
fi
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: --output / -o is required" >&2; exit 1
|
||||
fi
|
||||
|
||||
local audio_path="$audio"
|
||||
|
||||
if $generate_bgm; then
|
||||
if [[ -z "${MINIMAX_API_KEY:-}" ]]; then
|
||||
echo "Error: MINIMAX_API_KEY not set." >&2; exit 1
|
||||
fi
|
||||
audio_path="${output%.*}_bgm.mp3"
|
||||
generate_music "$music_prompt" "$audio_path" "$instrumental" || exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "$audio_path" ]]; then
|
||||
echo "Error: Audio file not found: $audio_path" >&2; exit 1
|
||||
fi
|
||||
|
||||
local duration
|
||||
duration="$(get_video_duration "$video")"
|
||||
echo "Video duration: $(printf '%.1f' "$duration")s"
|
||||
|
||||
mkdir -p "$(dirname "$output")"
|
||||
|
||||
local has_audio=false
|
||||
video_has_audio "$video" && has_audio=true
|
||||
|
||||
local bgm_filter="[1:a]volume=${bgm_volume}"
|
||||
[[ "$(echo "$fade_in > 0" | bc -l)" == "1" ]] && bgm_filter+=",afade=t=in:d=${fade_in}"
|
||||
if [[ "$(echo "$fade_out > 0" | bc -l)" == "1" ]]; then
|
||||
local fo_start
|
||||
fo_start="$(echo "$duration - $fade_out" | bc -l)"
|
||||
[[ "$(echo "$fo_start < 0" | bc -l)" == "1" ]] && fo_start=0
|
||||
bgm_filter+=",afade=t=out:st=${fo_start}:d=${fade_out}"
|
||||
fi
|
||||
|
||||
if $has_audio && ! $replace_audio; then
|
||||
bgm_filter+="[bgm];[0:a][bgm]amix=inputs=2:duration=first:dropout_transition=2[aout]"
|
||||
echo "Merging video + audio (mixing with original, bgm_volume=${bgm_volume})..."
|
||||
ffmpeg -y \
|
||||
-i "$video" -i "$audio_path" \
|
||||
-filter_complex "$bgm_filter" \
|
||||
-map 0:v -map "[aout]" \
|
||||
-c:v copy -c:a aac -shortest "$output" 2>/dev/null
|
||||
else
|
||||
bgm_filter+="[bgm]"
|
||||
echo "Merging video + audio (${replace_audio:+replacing original}${replace_audio:-no original audio}, bgm_volume=${bgm_volume})..."
|
||||
ffmpeg -y \
|
||||
-i "$video" -i "$audio_path" \
|
||||
-filter_complex "$bgm_filter" \
|
||||
-map 0:v -map "[bgm]" \
|
||||
-c:v copy -c:a aac -shortest "$output" 2>/dev/null
|
||||
fi
|
||||
|
||||
echo "Output saved: $output"
|
||||
echo "Done!"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
479
skills/minimax-multimodal-toolkit/scripts/video/generate_long_video.sh
Executable file
479
skills/minimax-multimodal-toolkit/scripts/video/generate_long_video.sh
Executable file
@@ -0,0 +1,479 @@
|
||||
#!/usr/bin/env bash
|
||||
# MiniMax Long Video Generation CLI (pure bash)
|
||||
#
|
||||
# Generates multi-segment videos by chaining scenes together.
|
||||
# Each segment's last frame becomes the next segment's first frame.
|
||||
# Optionally adds AI-generated background music.
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/video/generate_long_video.sh \
|
||||
# --scenes "A sunrise" "Birds flying" "A calm lake" \
|
||||
# --output output/long_video.mp4
|
||||
#
|
||||
# bash scripts/video/generate_long_video.sh \
|
||||
# --scenes "A robot waking up" "The robot walks outside" \
|
||||
# --music-prompt "cinematic orchestral" \
|
||||
# --output output/robot_story.mp4
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
API_BASE="${MINIMAX_API_HOST:-https://api.minimaxi.com}/v1"
|
||||
MUSIC_API_URL="${API_BASE}/music_generation"
|
||||
POLL_INTERVAL=10
|
||||
MAX_WAIT_TIME=600
|
||||
REQUEST_TIMEOUT=60
|
||||
MAX_CONSECUTIVE_FAILURES=5
|
||||
|
||||
# ============================================================================
|
||||
# Common functions
|
||||
# ============================================================================
|
||||
|
||||
load_env() {
|
||||
local env_file
|
||||
for env_file in "$PROJECT_ROOT/.env" "$(pwd)/.env"; do
|
||||
if [[ -f "$env_file" ]]; then
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
line="${line%%#*}"; line="$(echo "$line" | xargs)"
|
||||
[[ -z "$line" || "$line" != *=* ]] && continue
|
||||
local key="${line%%=*}" val="${line#*=}"
|
||||
key="$(echo "$key" | xargs)"; val="$(echo "$val" | xargs)"
|
||||
if [[ ${#val} -ge 2 ]]; then
|
||||
case "$val" in \"*\") val="${val:1:${#val}-2}" ;; \'*\') val="${val:1:${#val}-2}" ;; esac
|
||||
fi
|
||||
[[ -z "${!key:-}" ]] && export "$key=$val"
|
||||
done < "$env_file"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
check_api_key() {
|
||||
if [[ -z "${MINIMAX_API_KEY:-}" ]]; then
|
||||
echo "Error: MINIMAX_API_KEY not set." >&2; exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
image_to_data_url() {
|
||||
local path="$1"
|
||||
[[ -f "$path" ]] || { echo "Error: Image not found: $path" >&2; exit 1; }
|
||||
local mime; mime="$(file -b --mime-type "$path" 2>/dev/null)" || mime="image/jpeg"
|
||||
local b64; b64="$(base64 < "$path")"
|
||||
echo "data:${mime};base64,${b64}"
|
||||
}
|
||||
|
||||
resolve_image() {
|
||||
local input="$1"
|
||||
[[ -z "$input" ]] && return
|
||||
case "$input" in
|
||||
http://*|https://*|data:*) echo "$input" ;;
|
||||
*) image_to_data_url "$input" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Video API helpers (duplicated from generate_video.sh for standalone use)
|
||||
# ============================================================================
|
||||
|
||||
_create_task() {
|
||||
local payload="$1"
|
||||
local raw http_code response
|
||||
raw="$(curl -s -w "\n%{http_code}" -X POST "${API_BASE}/video_generation" \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--max-time "$REQUEST_TIMEOUT" -d "$payload")"
|
||||
http_code="${raw##*$'\n'}"; response="${raw%$'\n'*}"
|
||||
[[ "$http_code" -ge 400 ]] 2>/dev/null && { echo "Error: HTTP $http_code" >&2; echo "$response" >&2; exit 1; }
|
||||
local sc; sc="$(echo "$response" | jq -r '.base_resp.status_code // 0')" 2>/dev/null || true
|
||||
[[ "$sc" != "0" && -n "$sc" ]] && { echo "Error: $(echo "$response" | jq '.base_resp')" >&2; exit 1; }
|
||||
echo "$response" | jq -r '.task_id // empty'
|
||||
}
|
||||
|
||||
_poll_task() {
|
||||
local task_id="$1" start_time cf=0
|
||||
start_time="$(date +%s)"
|
||||
while true; do
|
||||
local now=$(($(date +%s) - start_time))
|
||||
[[ $now -gt $MAX_WAIT_TIME ]] && { echo "Error: Timeout" >&2; exit 1; }
|
||||
local raw http_code response
|
||||
if raw="$(curl -s -w "\n%{http_code}" -G "${API_BASE}/query/video_generation" \
|
||||
-d "task_id=$task_id" -H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
--max-time "$REQUEST_TIMEOUT" 2>/dev/null)"; then
|
||||
http_code="${raw##*$'\n'}"; response="${raw%$'\n'*}"; cf=0
|
||||
else
|
||||
cf=$((cf+1)); [[ $cf -ge $MAX_CONSECUTIVE_FAILURES ]] && { echo "Error: Too many failures" >&2; exit 1; }
|
||||
sleep "$POLL_INTERVAL"; continue
|
||||
fi
|
||||
local status; status="$(echo "$response" | jq -r '.status // "Unknown"')"
|
||||
echo " [${now}s] Status: $status" >&2
|
||||
[[ "$status" == "Success" ]] && { echo "$response" | jq -r '.file_id // empty'; return 0; }
|
||||
[[ "$status" == "Fail" || "$status" == "Failed" || "$status" == "Error" ]] && { echo "Error: Task failed" >&2; exit 1; }
|
||||
sleep "$POLL_INTERVAL"
|
||||
done
|
||||
}
|
||||
|
||||
_download_video() {
|
||||
local file_id="$1" output_path="$2"
|
||||
local raw; raw="$(curl -s -G "${API_BASE}/files/retrieve" -d "file_id=$file_id" \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" --max-time "$REQUEST_TIMEOUT")"
|
||||
local dl_url; dl_url="$(echo "$raw" | jq -r '.file.download_url // empty')"
|
||||
[[ -z "$dl_url" ]] && { echo "Error: No download_url" >&2; exit 1; }
|
||||
mkdir -p "$(dirname "$output_path")"
|
||||
curl -s -o "$output_path" --max-time $((REQUEST_TIMEOUT * 3)) "$dl_url"
|
||||
echo " Video saved: $output_path ($(wc -c < "$output_path" | tr -d ' ') bytes)" >&2
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# FFmpeg helpers
|
||||
# ============================================================================
|
||||
|
||||
get_video_duration() {
|
||||
ffprobe -v error -show_entries format=duration -of json "$1" 2>/dev/null | jq -r '.format.duration'
|
||||
}
|
||||
|
||||
get_video_fps() {
|
||||
local fps_str
|
||||
fps_str="$(ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate -of csv=p=0 "$1" 2>/dev/null)" || { echo 25; return; }
|
||||
local num den
|
||||
num="${fps_str%/*}"; den="${fps_str#*/}"
|
||||
echo $(( (num + den/2) / den )) 2>/dev/null || echo 25
|
||||
}
|
||||
|
||||
video_has_audio() {
|
||||
local out
|
||||
out="$(ffprobe -v error -select_streams a -show_entries stream=codec_type -of csv=p=0 "$1" 2>/dev/null)"
|
||||
[[ "$out" == *audio* ]]
|
||||
}
|
||||
|
||||
extract_last_frame() {
|
||||
local video_path="$1" output_image="$2"
|
||||
# Try frame-accurate method with sseof fallback
|
||||
if ! ffmpeg -y -sseof -0.04 -i "$video_path" -frames:v 1 -q:v 2 "$output_image" 2>/dev/null; then
|
||||
echo "Warning: Could not extract last frame" >&2
|
||||
return 1
|
||||
fi
|
||||
[[ -f "$output_image" ]] || return 1
|
||||
echo " Extracted last frame: $output_image" >&2
|
||||
}
|
||||
|
||||
concatenate_videos() {
|
||||
local output_path="$1" crossfade="$2"
|
||||
shift 2
|
||||
local video_paths=("$@")
|
||||
local n=${#video_paths[@]}
|
||||
|
||||
if [[ $n -eq 1 ]]; then
|
||||
cp "${video_paths[0]}" "$output_path"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local fps
|
||||
fps="$(get_video_fps "${video_paths[0]}")"
|
||||
local has_audio=true
|
||||
for vp in "${video_paths[@]}"; do
|
||||
video_has_audio "$vp" || { has_audio=false; break; }
|
||||
done
|
||||
|
||||
if [[ "$(echo "$crossfade > 0" | bc -l)" == "1" ]]; then
|
||||
# Get durations
|
||||
local durations=()
|
||||
for vp in "${video_paths[@]}"; do
|
||||
durations+=("$(get_video_duration "$vp")")
|
||||
done
|
||||
|
||||
# Build inputs
|
||||
local inputs=()
|
||||
for vp in "${video_paths[@]}"; do
|
||||
inputs+=(-i "$(cd "$(dirname "$vp")" && pwd)/$(basename "$vp")")
|
||||
done
|
||||
|
||||
# Calculate offsets
|
||||
local offsets=() cumulative=0
|
||||
for ((i=0; i<n-1; i++)); do
|
||||
local offset
|
||||
offset="$(echo "$cumulative + ${durations[$i]} - $crossfade" | bc -l)"
|
||||
offsets+=("$offset")
|
||||
cumulative="$offset"
|
||||
done
|
||||
|
||||
# Build filter
|
||||
local vf_parts=() af_parts=()
|
||||
if [[ $n -eq 2 ]]; then
|
||||
vf_parts+=("[0:v][1:v]xfade=transition=fade:duration=${crossfade}:offset=${offsets[0]}[vout]")
|
||||
$has_audio && af_parts+=("[0:a][1:a]acrossfade=d=${crossfade}:c1=tri:c2=tri[aout]")
|
||||
else
|
||||
vf_parts+=("[0:v][1:v]xfade=transition=fade:duration=${crossfade}:offset=${offsets[0]}[xv1]")
|
||||
$has_audio && af_parts+=("[0:a][1:a]acrossfade=d=${crossfade}:c1=tri:c2=tri[xa1]")
|
||||
for ((i=2; i<n; i++)); do
|
||||
local out_v="[xv${i}]" out_a="[xa${i}]"
|
||||
[[ $i -eq $((n-1)) ]] && { out_v="[vout]"; out_a="[aout]"; }
|
||||
vf_parts+=("[xv$((i-1))][${i}:v]xfade=transition=fade:duration=${crossfade}:offset=${offsets[$((i-1))]}${out_v}")
|
||||
$has_audio && af_parts+=("[xa$((i-1))][${i}:a]acrossfade=d=${crossfade}:c1=tri:c2=tri${out_a}")
|
||||
done
|
||||
fi
|
||||
|
||||
local filter_complex
|
||||
filter_complex="$(IFS=';'; echo "${vf_parts[*]}${af_parts[*]:+;${af_parts[*]}}")"
|
||||
|
||||
local cmd=(ffmpeg -y "${inputs[@]}" -filter_complex "$filter_complex" -map "[vout]")
|
||||
$has_audio && cmd+=(-map "[aout]")
|
||||
cmd+=(-c:v libx264 -preset medium -crf 18 -pix_fmt yuv420p -r "$fps")
|
||||
$has_audio && cmd+=(-c:a aac -b:a 192k)
|
||||
cmd+=("$output_path")
|
||||
|
||||
if "${cmd[@]}" 2>/dev/null; then
|
||||
echo "Concatenated $n segments -> $output_path" >&2
|
||||
return 0
|
||||
fi
|
||||
echo " Crossfade failed, falling back to re-encode concat..." >&2
|
||||
fi
|
||||
|
||||
# Fallback: concat demuxer with re-encode
|
||||
local concat_file
|
||||
concat_file="$(mktemp /tmp/concat_XXXXXX.txt)"
|
||||
for vp in "${video_paths[@]}"; do
|
||||
echo "file '$(cd "$(dirname "$vp")" && pwd)/$(basename "$vp")'" >> "$concat_file"
|
||||
done
|
||||
ffmpeg -y -f concat -safe 0 -i "$concat_file" \
|
||||
-c:v libx264 -preset medium -crf 18 -pix_fmt yuv420p -r "$fps" \
|
||||
-c:a aac -b:a 192k "$output_path" 2>/dev/null
|
||||
rm -f "$concat_file"
|
||||
echo "Concatenated $n segments -> $output_path" >&2
|
||||
}
|
||||
|
||||
merge_video_audio() {
|
||||
local video_path="$1" audio_path="$2" output_path="$3"
|
||||
local bgm_volume="${4:-0.3}" fade_in="${5:-0}" fade_out="${6:-0}"
|
||||
|
||||
local duration
|
||||
duration="$(get_video_duration "$video_path")"
|
||||
|
||||
local af="[1:a]volume=${bgm_volume}"
|
||||
[[ "$(echo "$fade_in > 0" | bc -l)" == "1" ]] && af+=",afade=t=in:d=${fade_in}"
|
||||
if [[ "$(echo "$fade_out > 0" | bc -l)" == "1" ]]; then
|
||||
local fo_start
|
||||
fo_start="$(echo "$duration - $fade_out" | bc -l)"
|
||||
[[ "$(echo "$fo_start < 0" | bc -l)" == "1" ]] && fo_start=0
|
||||
af+=",afade=t=out:st=${fo_start}:d=${fade_out}"
|
||||
fi
|
||||
af+="[bgm]"
|
||||
|
||||
mkdir -p "$(dirname "$output_path")"
|
||||
ffmpeg -y -i "$video_path" -i "$audio_path" \
|
||||
-filter_complex "$af" \
|
||||
-map 0:v -map "[bgm]" \
|
||||
-c:v copy -c:a aac -shortest "$output_path" 2>/dev/null
|
||||
|
||||
echo "Merged video+audio -> $output_path" >&2
|
||||
}
|
||||
|
||||
generate_music_instrumental() {
|
||||
local prompt="$1" output_path="$2"
|
||||
local payload
|
||||
payload=$(jq -n \
|
||||
--arg p "${prompt:-cinematic background music, orchestral, ambient}. pure music, no lyrics" \
|
||||
'{model: "music-2.5", prompt: $p, lyrics: "[intro] [outro]", output_format: "url"}')
|
||||
|
||||
echo "Generating instrumental music: $prompt" >&2
|
||||
local raw http_code response
|
||||
raw="$(curl -s -w "\n%{http_code}" -X POST "$MUSIC_API_URL" \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--max-time 300 -d "$payload")"
|
||||
http_code="${raw##*$'\n'}"; response="${raw%$'\n'*}"
|
||||
[[ "$http_code" -ge 400 ]] 2>/dev/null && { echo "Error: Music API HTTP $http_code" >&2; return 1; }
|
||||
|
||||
local audio_url
|
||||
audio_url="$(echo "$response" | jq -r '.data.audio_url // .data.audio // .data.audio_file.download_url // empty')"
|
||||
[[ -z "$audio_url" ]] && { echo "Error: No audio URL in music response" >&2; return 1; }
|
||||
|
||||
mkdir -p "$(dirname "$output_path")"
|
||||
curl -s -o "$output_path" --max-time 120 "$audio_url"
|
||||
echo " Music saved: $output_path" >&2
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Main
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
load_env
|
||||
check_api_key
|
||||
|
||||
local scenes=() model="" segment_duration=10 resolution="768P"
|
||||
local first_frame="" subject_reference="" crossfade=0.5
|
||||
local music_prompt="" bgm_volume=0.3 fade_in=0 fade_out=0
|
||||
local output=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--scenes)
|
||||
shift
|
||||
while [[ $# -gt 0 && "$1" != --* ]]; do
|
||||
scenes+=("$1"); shift
|
||||
done
|
||||
;;
|
||||
--model) model="$2"; shift 2 ;;
|
||||
--segment-duration) segment_duration="$2"; shift 2 ;;
|
||||
--resolution) resolution="$2"; shift 2 ;;
|
||||
--first-frame) first_frame="$2"; shift 2 ;;
|
||||
--subject-reference) subject_reference="$2"; shift 2 ;;
|
||||
--crossfade) crossfade="$2"; shift 2 ;;
|
||||
--music-prompt) music_prompt="$2"; shift 2 ;;
|
||||
--bgm-volume) bgm_volume="$2"; shift 2 ;;
|
||||
--fade-in) fade_in="$2"; shift 2 ;;
|
||||
--fade-out) fade_out="$2"; shift 2 ;;
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
-h|--help)
|
||||
cat <<'USAGE'
|
||||
MiniMax Long Video Generation CLI
|
||||
|
||||
Usage:
|
||||
generate_long_video.sh --scenes "scene1" "scene2" ... -o OUTPUT
|
||||
|
||||
Options:
|
||||
--scenes TEXT... Scene prompts (2+ required)
|
||||
--model MODEL Model name (default: auto)
|
||||
--segment-duration SECS Duration per segment (default: 10)
|
||||
--resolution RES Resolution: 768P, 1080P (default: 768P)
|
||||
--first-frame FILE First frame for scene 1 (local file or URL)
|
||||
--subject-reference FILE Subject reference image
|
||||
--crossfade SECS Crossfade duration between scenes (default: 0.5)
|
||||
--music-prompt TEXT Generate BGM with this prompt
|
||||
--bgm-volume FLOAT BGM volume level (default: 0.3)
|
||||
--fade-in SECS BGM fade-in duration
|
||||
--fade-out SECS BGM fade-out duration
|
||||
-o, --output FILE Output video file (required)
|
||||
|
||||
Examples:
|
||||
generate_long_video.sh --scenes "A sunrise" "Birds flying" "Sunset" -o long.mp4
|
||||
generate_long_video.sh --scenes "Scene 1" "Scene 2" --crossfade 1 --music-prompt "cinematic" -o movie.mp4
|
||||
USAGE
|
||||
exit 0
|
||||
;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${#scenes[@]} -eq 0 ]]; then
|
||||
echo "Error: --scenes is required" >&2; exit 1
|
||||
fi
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: --output / -o is required" >&2; exit 1
|
||||
fi
|
||||
|
||||
local output_dir
|
||||
output_dir="$(dirname "$output")"
|
||||
mkdir -p "$output_dir"
|
||||
local tmpdir="$output_dir/tmp"
|
||||
mkdir -p "$tmpdir"
|
||||
echo "Temp directory: $tmpdir"
|
||||
|
||||
local segment_paths=()
|
||||
local current_first_frame="$first_frame"
|
||||
|
||||
echo "=== Generating ${#scenes[@]} video segments ==="
|
||||
echo ""
|
||||
|
||||
for i in "${!scenes[@]}"; do
|
||||
local scene="${scenes[$i]}"
|
||||
echo "--- Segment $((i+1))/${#scenes[@]} ---"
|
||||
echo " Prompt: $scene"
|
||||
|
||||
local seg_output="$tmpdir/segment_$(printf '%03d' "$i").mp4"
|
||||
|
||||
# Determine mode
|
||||
local seg_mode="t2v"
|
||||
[[ -n "$current_first_frame" ]] && seg_mode="i2v"
|
||||
[[ -n "$subject_reference" && -z "$current_first_frame" ]] && seg_mode="ref"
|
||||
|
||||
# Determine model
|
||||
local seg_model="$model"
|
||||
if [[ -z "$seg_model" ]]; then
|
||||
case "$seg_mode" in
|
||||
t2v|i2v) seg_model="MiniMax-Hailuo-2.3" ;;
|
||||
ref) seg_model="S2V-01" ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Build payload
|
||||
local payload
|
||||
payload=$(jq -n \
|
||||
--arg m "$seg_model" \
|
||||
--arg p "$scene" \
|
||||
--argjson d "$segment_duration" \
|
||||
--arg r "$resolution" \
|
||||
'{model: $m, prompt: $p, duration: $d, resolution: $r}')
|
||||
|
||||
if [[ "$seg_mode" == "i2v" ]]; then
|
||||
local ff_url; ff_url="$(resolve_image "$current_first_frame")"
|
||||
payload=$(echo "$payload" | jq --arg ff "$ff_url" '. + {first_frame_image: $ff, prompt_optimizer: false}')
|
||||
elif [[ "$seg_mode" == "ref" ]]; then
|
||||
local si_url; si_url="$(resolve_image "$subject_reference")"
|
||||
payload=$(echo "$payload" | jq --arg si "$si_url" '. + {subject_reference: [{type: "character", image: [$si]}]}')
|
||||
fi
|
||||
|
||||
# Generate segment
|
||||
local task_id file_id
|
||||
if task_id="$(_create_task "$payload")" && [[ -n "$task_id" ]]; then
|
||||
echo " Task created: $task_id"
|
||||
if file_id="$(_poll_task "$task_id")" && [[ -n "$file_id" ]]; then
|
||||
_download_video "$file_id" "$seg_output"
|
||||
segment_paths+=("$seg_output")
|
||||
|
||||
# Extract last frame for next segment
|
||||
local last_frame_path="$tmpdir/last_frame_$(printf '%03d' "$i").jpg"
|
||||
if extract_last_frame "$seg_output" "$last_frame_path"; then
|
||||
current_first_frame="$last_frame_path"
|
||||
else
|
||||
current_first_frame=""
|
||||
fi
|
||||
else
|
||||
echo " Error: Polling failed for segment $((i+1))" >&2
|
||||
[[ ${#segment_paths[@]} -eq 0 ]] && exit 1
|
||||
break
|
||||
fi
|
||||
else
|
||||
echo " Error generating segment $((i+1))" >&2
|
||||
[[ ${#segment_paths[@]} -eq 0 ]] && exit 1
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#segment_paths[@]} -eq 0 ]]; then
|
||||
echo "Error: No segments were generated." >&2; exit 1
|
||||
fi
|
||||
|
||||
# Concatenate
|
||||
local final_video="$output"
|
||||
[[ -n "$music_prompt" ]] && final_video="$tmpdir/concatenated.mp4"
|
||||
|
||||
if [[ ${#segment_paths[@]} -eq 1 ]]; then
|
||||
cp "${segment_paths[0]}" "$final_video"
|
||||
else
|
||||
concatenate_videos "$final_video" "$crossfade" "${segment_paths[@]}"
|
||||
fi
|
||||
|
||||
# Add BGM if requested
|
||||
if [[ -n "$music_prompt" ]]; then
|
||||
echo ""
|
||||
echo "--- Generating background music ---"
|
||||
local music_path="$tmpdir/bgm.mp3"
|
||||
if generate_music_instrumental "$music_prompt" "$music_path"; then
|
||||
merge_video_audio "$final_video" "$music_path" "$output" "$bgm_volume" "$fade_in" "$fade_out" || {
|
||||
echo "Warning: Failed to add BGM, using video without music" >&2
|
||||
[[ "$final_video" != "$output" ]] && cp "$final_video" "$output"
|
||||
}
|
||||
else
|
||||
echo "Warning: Failed to generate BGM" >&2
|
||||
[[ "$final_video" != "$output" ]] && cp "$final_video" "$output"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Done! Output: $output ==="
|
||||
echo " Intermediate files in: $tmpdir"
|
||||
echo " Delete with: rm -rf $tmpdir"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
216
skills/minimax-multimodal-toolkit/scripts/video/generate_template_video.sh
Executable file
216
skills/minimax-multimodal-toolkit/scripts/video/generate_template_video.sh
Executable file
@@ -0,0 +1,216 @@
|
||||
#!/usr/bin/env bash
|
||||
# MiniMax Template Video Generation CLI (pure bash)
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/video/generate_template_video.sh \
|
||||
# --template-id T00001 \
|
||||
# --media image1.jpg image2.jpg \
|
||||
# --text "Title" "Subtitle" \
|
||||
# -o output/template_video.mp4
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
API_BASE="${MINIMAX_API_HOST:-https://api.minimaxi.com}/v1"
|
||||
TEMPLATE_URL="${API_BASE}/video_template_generation"
|
||||
QUERY_URL="${API_BASE}/query/video_template_generation"
|
||||
|
||||
POLL_INTERVAL=10
|
||||
MAX_WAIT_TIME=600
|
||||
REQUEST_TIMEOUT=60
|
||||
MAX_CONSECUTIVE_FAILURES=5
|
||||
|
||||
# ============================================================================
|
||||
# Common functions
|
||||
# ============================================================================
|
||||
|
||||
load_env() {
|
||||
local env_file
|
||||
for env_file in "$PROJECT_ROOT/.env" "$(pwd)/.env"; do
|
||||
if [[ -f "$env_file" ]]; then
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
line="${line%%#*}"; line="$(echo "$line" | xargs)"
|
||||
[[ -z "$line" || "$line" != *=* ]] && continue
|
||||
local key="${line%%=*}" val="${line#*=}"
|
||||
key="$(echo "$key" | xargs)"; val="$(echo "$val" | xargs)"
|
||||
if [[ ${#val} -ge 2 ]]; then
|
||||
case "$val" in \"*\") val="${val:1:${#val}-2}" ;; \'*\') val="${val:1:${#val}-2}" ;; esac
|
||||
fi
|
||||
[[ -z "${!key:-}" ]] && export "$key=$val"
|
||||
done < "$env_file"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
check_api_key() {
|
||||
if [[ -z "${MINIMAX_API_KEY:-}" ]]; then
|
||||
echo "Error: MINIMAX_API_KEY not set." >&2; exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
resolve_media_input() {
|
||||
local value="$1"
|
||||
case "$value" in
|
||||
http://*|https://*|data:*) echo "$value"; return ;;
|
||||
esac
|
||||
[[ -f "$value" ]] || { echo "Error: Media file not found: $value" >&2; exit 1; }
|
||||
local mime; mime="$(file -b --mime-type "$value" 2>/dev/null)" || mime="application/octet-stream"
|
||||
local b64; b64="$(base64 < "$value")"
|
||||
echo "data:${mime};base64,${b64}"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Main
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
load_env
|
||||
check_api_key
|
||||
|
||||
local template_id="" output=""
|
||||
local media_inputs=() text_inputs=()
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--template-id) template_id="$2"; shift 2 ;;
|
||||
--media)
|
||||
shift
|
||||
while [[ $# -gt 0 && "$1" != --* ]]; do
|
||||
media_inputs+=("$1"); shift
|
||||
done
|
||||
;;
|
||||
--text)
|
||||
shift
|
||||
while [[ $# -gt 0 && "$1" != --* ]]; do
|
||||
text_inputs+=("$1"); shift
|
||||
done
|
||||
;;
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
-h|--help)
|
||||
cat <<'USAGE'
|
||||
MiniMax Template Video Generation CLI
|
||||
|
||||
Usage:
|
||||
generate_template_video.sh --template-id ID [--media FILE...] [--text TEXT...] -o OUTPUT
|
||||
|
||||
Options:
|
||||
--template-id ID Template ID (required)
|
||||
--media FILE... Media inputs (local files or URLs)
|
||||
--text TEXT... Text inputs for template slots
|
||||
-o, --output FILE Output video file (required)
|
||||
|
||||
Examples:
|
||||
generate_template_video.sh --template-id T00001 --media image1.jpg image2.jpg --text "Title" "Subtitle" -o video.mp4
|
||||
USAGE
|
||||
exit 0
|
||||
;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$template_id" ]]; then
|
||||
echo "Error: --template-id is required" >&2; exit 1
|
||||
fi
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: --output / -o is required" >&2; exit 1
|
||||
fi
|
||||
|
||||
# Build payload
|
||||
local payload
|
||||
payload=$(jq -n --arg tid "$template_id" '{template_id: $tid}')
|
||||
|
||||
# Add media inputs
|
||||
if [[ ${#media_inputs[@]} -gt 0 ]]; then
|
||||
local media_json="[]"
|
||||
for i in "${!media_inputs[@]}"; do
|
||||
local resolved
|
||||
resolved="$(resolve_media_input "${media_inputs[$i]}")"
|
||||
media_json=$(echo "$media_json" | jq --arg url "$resolved" '. + [{value: $url}]')
|
||||
echo " Media [$i]: ${media_inputs[$i]}"
|
||||
done
|
||||
payload=$(echo "$payload" | jq --argjson mi "$media_json" '. + {media_inputs: $mi}')
|
||||
fi
|
||||
|
||||
# Add text inputs
|
||||
if [[ ${#text_inputs[@]} -gt 0 ]]; then
|
||||
local text_json="[]"
|
||||
for i in "${!text_inputs[@]}"; do
|
||||
text_json=$(echo "$text_json" | jq --arg t "${text_inputs[$i]}" '. + [{value: $t}]')
|
||||
echo " Text [$i]: ${text_inputs[$i]}"
|
||||
done
|
||||
payload=$(echo "$payload" | jq --argjson ti "$text_json" '. + {text_inputs: $ti}')
|
||||
fi
|
||||
|
||||
# Create task
|
||||
echo "Creating template video task (template: $template_id)..."
|
||||
local raw http_code response
|
||||
raw="$(curl -s -w "\n%{http_code}" -X POST "$TEMPLATE_URL" \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--max-time "$REQUEST_TIMEOUT" -d "$payload")"
|
||||
http_code="${raw##*$'\n'}"; response="${raw%$'\n'*}"
|
||||
|
||||
[[ "$http_code" -ge 400 ]] 2>/dev/null && { echo "Error: HTTP $http_code" >&2; echo "$response" >&2; exit 1; }
|
||||
|
||||
local sc
|
||||
sc="$(echo "$response" | jq -r '.base_resp.status_code // 0')" 2>/dev/null || true
|
||||
[[ "$sc" != "0" && -n "$sc" ]] && { echo "Error: $(echo "$response" | jq '.base_resp')" >&2; exit 1; }
|
||||
|
||||
local task_id
|
||||
task_id="$(echo "$response" | jq -r '.task_id // empty')"
|
||||
[[ -z "$task_id" ]] && { echo "Error: No task_id in response" >&2; exit 1; }
|
||||
echo "Task created: $task_id"
|
||||
|
||||
# Poll task
|
||||
echo "Polling task $task_id..."
|
||||
local start_time cf=0
|
||||
start_time="$(date +%s)"
|
||||
local video_url=""
|
||||
|
||||
while true; do
|
||||
local elapsed=$(( $(date +%s) - start_time ))
|
||||
[[ $elapsed -gt $MAX_WAIT_TIME ]] && { echo "Error: Timeout" >&2; exit 1; }
|
||||
|
||||
local poll_raw poll_code poll_resp
|
||||
if poll_raw="$(curl -s -w "\n%{http_code}" -G "$QUERY_URL" \
|
||||
-d "task_id=$task_id" \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
--max-time "$REQUEST_TIMEOUT" 2>/dev/null)"; then
|
||||
poll_code="${poll_raw##*$'\n'}"; poll_resp="${poll_raw%$'\n'*}"; cf=0
|
||||
else
|
||||
cf=$((cf+1))
|
||||
echo " Poll error ($cf/$MAX_CONSECUTIVE_FAILURES)"
|
||||
[[ $cf -ge $MAX_CONSECUTIVE_FAILURES ]] && { echo "Error: Too many failures" >&2; exit 1; }
|
||||
sleep "$POLL_INTERVAL"; continue
|
||||
fi
|
||||
|
||||
local status
|
||||
status="$(echo "$poll_resp" | jq -r '.status // "Unknown"')"
|
||||
echo " [${elapsed}s] Status: $status"
|
||||
|
||||
if [[ "$status" == "Success" ]]; then
|
||||
local video_url
|
||||
video_url="$(echo "$poll_resp" | jq -r '.video_url // empty')"
|
||||
[[ -z "$video_url" ]] && { echo "Error: No video_url in response" >&2; exit 1; }
|
||||
break
|
||||
fi
|
||||
|
||||
[[ "$status" == "Fail" || "$status" == "Failed" || "$status" == "Error" ]] && {
|
||||
echo "Error: Task failed: $(echo "$poll_resp" | jq -r '.base_resp.status_msg // "Unknown"')" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
sleep "$POLL_INTERVAL"
|
||||
done
|
||||
|
||||
# Download video directly from video_url
|
||||
echo "Downloading video..."
|
||||
mkdir -p "$(dirname "$output")"
|
||||
curl -s -o "$output" --max-time $((REQUEST_TIMEOUT * 3)) "$video_url"
|
||||
local size; size="$(wc -c < "$output" | tr -d ' ')"
|
||||
echo "Video saved to: $output ($size bytes)"
|
||||
echo "Done!"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
329
skills/minimax-multimodal-toolkit/scripts/video/generate_video.sh
Executable file
329
skills/minimax-multimodal-toolkit/scripts/video/generate_video.sh
Executable file
@@ -0,0 +1,329 @@
|
||||
#!/usr/bin/env bash
|
||||
# MiniMax Video Generation CLI (pure bash)
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/video/generate_video.sh --mode t2v --prompt "A cat playing piano" -o output/cat.mp4
|
||||
# bash scripts/video/generate_video.sh --mode i2v --prompt "Gentle breeze" --first-frame image.jpg -o output/anim.mp4
|
||||
# bash scripts/video/generate_video.sh --mode sef --first-frame start.jpg --last-frame end.jpg -o output/sef.mp4
|
||||
# bash scripts/video/generate_video.sh --mode ref --prompt "Person dancing" --subject-image person.jpg -o output/ref.mp4
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
API_BASE="${MINIMAX_API_HOST:-https://api.minimaxi.com}/v1"
|
||||
POLL_INTERVAL=10
|
||||
MAX_WAIT_TIME=600
|
||||
REQUEST_TIMEOUT=60
|
||||
MAX_CONSECUTIVE_FAILURES=5
|
||||
|
||||
# ============================================================================
|
||||
# Common functions
|
||||
# ============================================================================
|
||||
|
||||
load_env() {
|
||||
local env_file
|
||||
for env_file in "$PROJECT_ROOT/.env" "$(pwd)/.env"; do
|
||||
if [[ -f "$env_file" ]]; then
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
line="${line%%#*}"; line="$(echo "$line" | xargs)"
|
||||
[[ -z "$line" || "$line" != *=* ]] && continue
|
||||
local key="${line%%=*}" val="${line#*=}"
|
||||
key="$(echo "$key" | xargs)"; val="$(echo "$val" | xargs)"
|
||||
if [[ ${#val} -ge 2 ]]; then
|
||||
case "$val" in \"*\") val="${val:1:${#val}-2}" ;; \'*\') val="${val:1:${#val}-2}" ;; esac
|
||||
fi
|
||||
[[ -z "${!key:-}" ]] && export "$key=$val"
|
||||
done < "$env_file"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
check_api_key() {
|
||||
if [[ -z "${MINIMAX_API_KEY:-}" ]]; then
|
||||
echo "Error: MINIMAX_API_KEY environment variable is not set." >&2; exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
image_to_data_url() {
|
||||
local path="$1"
|
||||
[[ -f "$path" ]] || { echo "Error: Image not found: $path" >&2; exit 1; }
|
||||
local mime
|
||||
mime="$(file -b --mime-type "$path" 2>/dev/null)" || mime="image/jpeg"
|
||||
local b64
|
||||
b64="$(base64 < "$path")"
|
||||
echo "data:${mime};base64,${b64}"
|
||||
}
|
||||
|
||||
resolve_image() {
|
||||
local input="$1"
|
||||
[[ -z "$input" ]] && return
|
||||
case "$input" in
|
||||
http://*|https://*|data:*) echo "$input" ;;
|
||||
*) image_to_data_url "$input" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Video generation functions
|
||||
# ============================================================================
|
||||
|
||||
create_task() {
|
||||
local payload="$1"
|
||||
echo "Creating video generation task..." >&2
|
||||
local raw_output http_code response
|
||||
raw_output="$(curl -s -w "\n%{http_code}" \
|
||||
-X POST "${API_BASE}/video_generation" \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--max-time "$REQUEST_TIMEOUT" \
|
||||
-d "$payload")"
|
||||
http_code="${raw_output##*$'\n'}"
|
||||
response="${raw_output%$'\n'*}"
|
||||
|
||||
if [[ "$http_code" -ge 400 ]] 2>/dev/null; then
|
||||
echo "Error: API returned HTTP $http_code" >&2; echo "$response" >&2; exit 1
|
||||
fi
|
||||
|
||||
local sc
|
||||
sc="$(echo "$response" | jq -r '.base_resp.status_code // 0')" 2>/dev/null || true
|
||||
if [[ "$sc" != "0" && -n "$sc" ]]; then
|
||||
echo "Error: API error: $(echo "$response" | jq '.base_resp')" >&2; exit 1
|
||||
fi
|
||||
|
||||
local task_id
|
||||
task_id="$(echo "$response" | jq -r '.task_id // empty')"
|
||||
if [[ -z "$task_id" ]]; then
|
||||
echo "Error: No task_id in response" >&2; echo "$response" >&2; exit 1
|
||||
fi
|
||||
|
||||
echo "Task created: $task_id" >&2
|
||||
echo "$task_id"
|
||||
}
|
||||
|
||||
poll_task() {
|
||||
local task_id="$1"
|
||||
echo "Polling task $task_id..." >&2
|
||||
local start_time consecutive_failures=0
|
||||
start_time="$(date +%s)"
|
||||
|
||||
while true; do
|
||||
local now elapsed
|
||||
now="$(date +%s)"
|
||||
elapsed=$((now - start_time))
|
||||
if [[ $elapsed -gt $MAX_WAIT_TIME ]]; then
|
||||
echo "Error: Task $task_id timed out after ${MAX_WAIT_TIME}s" >&2; exit 1
|
||||
fi
|
||||
|
||||
local raw_output http_code response
|
||||
if raw_output="$(curl -s -w "\n%{http_code}" \
|
||||
-G "${API_BASE}/query/video_generation" \
|
||||
-d "task_id=$task_id" \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
--max-time "$REQUEST_TIMEOUT" 2>/dev/null)"; then
|
||||
http_code="${raw_output##*$'\n'}"
|
||||
response="${raw_output%$'\n'*}"
|
||||
consecutive_failures=0
|
||||
else
|
||||
consecutive_failures=$((consecutive_failures + 1))
|
||||
echo " Poll error ($consecutive_failures/$MAX_CONSECUTIVE_FAILURES)" >&2
|
||||
if [[ $consecutive_failures -ge $MAX_CONSECUTIVE_FAILURES ]]; then
|
||||
echo "Error: Too many consecutive poll failures" >&2; exit 1
|
||||
fi
|
||||
sleep "$POLL_INTERVAL"; continue
|
||||
fi
|
||||
|
||||
local status
|
||||
status="$(echo "$response" | jq -r '.status // "Unknown"')"
|
||||
echo " [${elapsed}s] Status: $status" >&2
|
||||
|
||||
if [[ "$status" == "Success" ]]; then
|
||||
local file_id
|
||||
file_id="$(echo "$response" | jq -r '.file_id // empty')"
|
||||
if [[ -z "$file_id" ]]; then
|
||||
echo "Error: Task succeeded but no file_id" >&2; exit 1
|
||||
fi
|
||||
echo "$file_id"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ "$status" == "Fail" || "$status" == "Failed" || "$status" == "Error" ]]; then
|
||||
local err_msg
|
||||
err_msg="$(echo "$response" | jq -r '.base_resp.status_msg // "Unknown error"')"
|
||||
echo "Error: Task failed: $err_msg" >&2; exit 1
|
||||
fi
|
||||
|
||||
sleep "$POLL_INTERVAL"
|
||||
done
|
||||
}
|
||||
|
||||
download_video() {
|
||||
local file_id="$1" output_path="$2"
|
||||
echo "Retrieving file $file_id..." >&2
|
||||
|
||||
local raw_output http_code response
|
||||
raw_output="$(curl -s -w "\n%{http_code}" \
|
||||
-G "${API_BASE}/files/retrieve" \
|
||||
-d "file_id=$file_id" \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
--max-time "$REQUEST_TIMEOUT")"
|
||||
http_code="${raw_output##*$'\n'}"
|
||||
response="${raw_output%$'\n'*}"
|
||||
|
||||
local dl_url
|
||||
dl_url="$(echo "$response" | jq -r '.file.download_url // empty')"
|
||||
if [[ -z "$dl_url" ]]; then
|
||||
echo "Error: No download_url in file response" >&2; exit 1
|
||||
fi
|
||||
|
||||
echo "Downloading video..." >&2
|
||||
mkdir -p "$(dirname "$output_path")"
|
||||
curl -s -o "$output_path" --max-time $((REQUEST_TIMEOUT * 3)) "$dl_url"
|
||||
local size
|
||||
size="$(wc -c < "$output_path" | tr -d ' ')"
|
||||
echo "Video saved to: $output_path ($size bytes)" >&2
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Main
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
load_env
|
||||
check_api_key
|
||||
|
||||
local mode="" prompt="" model="" duration=10 resolution="768P"
|
||||
local first_frame="" last_frame="" subject_image=""
|
||||
local prompt_optimizer="" fast_pretreatment="" callback_url="" aigc_watermark=""
|
||||
local output=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--mode) mode="$2"; shift 2 ;;
|
||||
--prompt) prompt="$2"; shift 2 ;;
|
||||
--model) model="$2"; shift 2 ;;
|
||||
--duration) duration="$2"; shift 2 ;;
|
||||
--resolution) resolution="$2"; shift 2 ;;
|
||||
--first-frame) first_frame="$2"; shift 2 ;;
|
||||
--last-frame) last_frame="$2"; shift 2 ;;
|
||||
--subject-image) subject_image="$2"; shift 2 ;;
|
||||
--prompt-optimizer) prompt_optimizer="$2"; shift 2 ;;
|
||||
--fast-pretreatment) fast_pretreatment="$2"; shift 2 ;;
|
||||
--callback-url) callback_url="$2"; shift 2 ;;
|
||||
--aigc-watermark) aigc_watermark="$2"; shift 2 ;;
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
-h|--help)
|
||||
cat <<'USAGE'
|
||||
MiniMax Video Generation CLI
|
||||
|
||||
Usage:
|
||||
generate_video.sh --mode MODE [options] -o OUTPUT
|
||||
|
||||
Modes:
|
||||
t2v Text-to-video
|
||||
i2v Image-to-video (requires --first-frame)
|
||||
sef Start-end frame (requires --first-frame and --last-frame)
|
||||
ref Subject reference (requires --subject-image)
|
||||
|
||||
Options:
|
||||
--mode MODE Generation mode: t2v, i2v, sef, ref (required)
|
||||
--prompt TEXT Text prompt describing the video
|
||||
--model MODEL Model name (default: T2V-01)
|
||||
--first-frame FILE First frame image (local file or URL)
|
||||
--last-frame FILE Last frame image (local file or URL)
|
||||
--subject-image FILE Subject reference image (local file or URL)
|
||||
-o, --output FILE Output video file (required)
|
||||
|
||||
Examples:
|
||||
generate_video.sh --mode t2v --prompt "A cat playing piano" -o cat.mp4
|
||||
generate_video.sh --mode i2v --prompt "Gentle breeze" --first-frame photo.jpg -o anim.mp4
|
||||
generate_video.sh --mode sef --first-frame start.jpg --last-frame end.jpg -o sef.mp4
|
||||
generate_video.sh --mode ref --prompt "Person dancing" --subject-image person.jpg -o ref.mp4
|
||||
USAGE
|
||||
exit 0
|
||||
;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$mode" ]]; then
|
||||
echo "Error: --mode is required (t2v, i2v, sef, ref)" >&2; exit 1
|
||||
fi
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: --output / -o is required" >&2; exit 1
|
||||
fi
|
||||
|
||||
# Default model per mode
|
||||
if [[ -z "$model" ]]; then
|
||||
case "$mode" in
|
||||
t2v) model="MiniMax-Hailuo-2.3" ;;
|
||||
i2v) model="MiniMax-Hailuo-2.3" ;;
|
||||
sef) model="MiniMax-Hailuo-02" ;;
|
||||
ref) model="S2V-01" ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Build payload
|
||||
local payload
|
||||
payload=$(jq -n --arg m "$model" '{model: $m}')
|
||||
|
||||
[[ -n "$prompt" ]] && payload=$(echo "$payload" | jq --arg p "$prompt" '. + {prompt: $p}')
|
||||
payload=$(echo "$payload" | jq --argjson d "$duration" '. + {duration: $d}')
|
||||
payload=$(echo "$payload" | jq --arg r "$resolution" '. + {resolution: $r}')
|
||||
|
||||
[[ -n "$prompt_optimizer" ]] && payload=$(echo "$payload" | jq --argjson po "$(echo "$prompt_optimizer" | tr '[:upper:]' '[:lower:]' | jq -R 'test("true")')" '. + {prompt_optimizer: $po}')
|
||||
[[ -n "$callback_url" ]] && payload=$(echo "$payload" | jq --arg cu "$callback_url" '. + {callback_url: $cu}')
|
||||
[[ -n "$aigc_watermark" ]] && payload=$(echo "$payload" | jq --argjson aw "$aigc_watermark" '. + {aigc_watermark: $aw}')
|
||||
|
||||
case "$mode" in
|
||||
t2v) ;;
|
||||
i2v)
|
||||
if [[ -z "$first_frame" ]]; then
|
||||
echo "Error: --first-frame is required for i2v mode" >&2; exit 1
|
||||
fi
|
||||
local ff_url
|
||||
ff_url="$(resolve_image "$first_frame")"
|
||||
payload=$(echo "$payload" | jq --arg ff "$ff_url" '. + {first_frame_image: $ff}')
|
||||
[[ -n "$fast_pretreatment" ]] && payload=$(echo "$payload" | jq --argjson fp "$(echo "$fast_pretreatment" | tr '[:upper:]' '[:lower:]' | jq -R 'test("true")')" '. + {fast_pretreatment: $fp}')
|
||||
;;
|
||||
sef)
|
||||
if [[ -z "$first_frame" ]]; then
|
||||
echo "Error: --first-frame is required for sef mode" >&2; exit 1
|
||||
fi
|
||||
local ff_url
|
||||
ff_url="$(resolve_image "$first_frame")"
|
||||
payload=$(echo "$payload" | jq --arg ff "$ff_url" '. + {first_frame_image: $ff}')
|
||||
if [[ -n "$last_frame" ]]; then
|
||||
local lf_url
|
||||
lf_url="$(resolve_image "$last_frame")"
|
||||
payload=$(echo "$payload" | jq --arg lf "$lf_url" '. + {last_frame_image: $lf}')
|
||||
fi
|
||||
;;
|
||||
ref)
|
||||
if [[ -z "$subject_image" ]]; then
|
||||
echo "Error: --subject-image is required for ref mode" >&2; exit 1
|
||||
fi
|
||||
local si_url
|
||||
si_url="$(resolve_image "$subject_image")"
|
||||
payload=$(echo "$payload" | jq --arg si "$si_url" '. + {subject_reference: [{type: "character", image: [$si]}]}')
|
||||
if [[ -n "$first_frame" ]]; then
|
||||
local ff_url
|
||||
ff_url="$(resolve_image "$first_frame")"
|
||||
payload=$(echo "$payload" | jq --arg ff "$ff_url" '. + {first_frame_image: $ff}')
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown mode: $mode" >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
echo "Mode: $mode"
|
||||
echo "Model: $model"
|
||||
|
||||
local task_id file_id
|
||||
task_id="$(create_task "$payload")"
|
||||
file_id="$(poll_task "$task_id")"
|
||||
download_video "$file_id" "$output"
|
||||
echo "Done!"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user