feat: Import 35+ skills, merge duplicates, add openclaw installer
Major updates: - Added 35+ new skills from awesome-opencode-skills and antigravity repos - Merged SEO skills into seo-master - Merged architecture skills into architecture - Merged security skills into security-auditor and security-coder - Merged testing skills into testing-master and testing-patterns - Merged pentesting skills into pentesting - Renamed website-creator to thai-frontend-dev - Replaced skill-creator with github version - Removed Chutes references (use MiniMax API instead) - Added install-openclaw-skills.sh for cross-platform installation - Updated .env.example with MiniMax API credentials
This commit is contained in:
934
skills/minimax-multimodal-toolkit/scripts/tts/generate_voice.sh
Executable file
934
skills/minimax-multimodal-toolkit/scripts/tts/generate_voice.sh
Executable file
@@ -0,0 +1,934 @@
|
||||
#!/usr/bin/env bash
|
||||
# MiniMax Voice CLI — Unified TTS command-line interface (pure bash)
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/tts/generate_voice.sh tts "Hello world" -o hello.mp3
|
||||
# bash scripts/tts/generate_voice.sh clone my_voice.mp3 --voice-id my-custom-voice
|
||||
# bash scripts/tts/generate_voice.sh design "A gentle female voice" --voice-id designed-voice-1
|
||||
# bash scripts/tts/generate_voice.sh list-voices
|
||||
# bash scripts/tts/generate_voice.sh validate segments.json
|
||||
# bash scripts/tts/generate_voice.sh generate segments.json -o output.mp3
|
||||
# bash scripts/tts/generate_voice.sh merge file1.mp3 file2.mp3 -o combined.mp3
|
||||
# bash scripts/tts/generate_voice.sh convert input.wav -o output.mp3
|
||||
# bash scripts/tts/generate_voice.sh check-env
|
||||
set -euo pipefail
|
||||
|
||||
# ============================================================================
|
||||
# Configuration
|
||||
# ============================================================================
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# ============================================================================
|
||||
# Common functions
|
||||
# ============================================================================
|
||||
|
||||
load_env() {
|
||||
local env_file
|
||||
for env_file in "$PROJECT_ROOT/.env" "$(pwd)/.env"; do
|
||||
if [[ -f "$env_file" ]]; then
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
line="${line%%#*}" # strip comments
|
||||
line="$(echo "$line" | xargs)" # trim whitespace
|
||||
[[ -z "$line" || "$line" != *=* ]] && continue
|
||||
local key="${line%%=*}"
|
||||
local val="${line#*=}"
|
||||
key="$(echo "$key" | xargs)"
|
||||
val="$(echo "$val" | xargs)"
|
||||
# Remove surrounding quotes
|
||||
if [[ ${#val} -ge 2 ]]; then
|
||||
case "$val" in
|
||||
\"*\") val="${val:1:${#val}-2}" ;;
|
||||
\'*\') val="${val:1:${#val}-2}" ;;
|
||||
esac
|
||||
fi
|
||||
# Only set if not already in environment
|
||||
if [[ -z "${!key:-}" ]]; then
|
||||
export "$key=$val"
|
||||
fi
|
||||
done < "$env_file"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 0
|
||||
}
|
||||
|
||||
check_api_key() {
|
||||
if [[ -z "${MINIMAX_API_KEY:-}" ]]; then
|
||||
echo "Error: MINIMAX_API_KEY environment variable is not set" >&2
|
||||
echo " export MINIMAX_API_KEY='your-key'" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_dir() {
|
||||
local dir="$1"
|
||||
[[ -n "$dir" ]] && mkdir -p "$dir"
|
||||
}
|
||||
|
||||
API_BASE="${MINIMAX_API_HOST:-https://api.minimaxi.com}/v1"
|
||||
|
||||
api_request() {
|
||||
# api_request METHOD ENDPOINT [JSON_BODY]
|
||||
# Outputs raw JSON response to stdout.
|
||||
local method="$1" endpoint="$2" body="${3:-}"
|
||||
local url="${API_BASE}/${endpoint#/}"
|
||||
|
||||
local args=(
|
||||
-s -w "\n%{http_code}"
|
||||
-X "$method"
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}"
|
||||
-H "Accept-Encoding: gzip, deflate"
|
||||
--compressed
|
||||
--max-time 120
|
||||
)
|
||||
if [[ -n "$body" ]]; then
|
||||
args+=(-H "Content-Type: application/json" -d "$body")
|
||||
fi
|
||||
args+=("$url")
|
||||
|
||||
local output http_code response
|
||||
output="$(curl "${args[@]}" 2>/dev/null)" || {
|
||||
echo "Error: curl request failed" >&2
|
||||
exit 1
|
||||
}
|
||||
http_code="${output##*$'\n'}"
|
||||
response="${output%$'\n'*}"
|
||||
|
||||
if [[ "$http_code" -ge 400 ]] 2>/dev/null; then
|
||||
echo "Error: API returned HTTP $http_code" >&2
|
||||
echo "$response" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check API-level error
|
||||
local status_code
|
||||
status_code="$(echo "$response" | jq -r '.base_resp.status_code // 0')" 2>/dev/null || true
|
||||
if [[ "$status_code" != "0" && -n "$status_code" ]]; then
|
||||
local status_msg
|
||||
status_msg="$(echo "$response" | jq -r '.base_resp.status_msg // "Unknown error"')"
|
||||
echo "Error: API error [$status_code]: $status_msg" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$response"
|
||||
}
|
||||
|
||||
api_upload() {
|
||||
# api_upload ENDPOINT FILE_PATH PURPOSE
|
||||
local endpoint="$1" file_path="$2" purpose="$3"
|
||||
local url="${API_BASE}/${endpoint#/}"
|
||||
|
||||
local output http_code response
|
||||
output="$(curl -s -w "\n%{http_code}" \
|
||||
-X POST \
|
||||
-H "Authorization: Bearer ${MINIMAX_API_KEY}" \
|
||||
-H "Accept-Encoding: gzip, deflate" \
|
||||
--compressed \
|
||||
-F "file=@${file_path}" \
|
||||
-F "purpose=${purpose}" \
|
||||
--max-time 120 \
|
||||
"$url" 2>/dev/null)" || {
|
||||
echo "Error: curl upload failed" >&2
|
||||
exit 1
|
||||
}
|
||||
http_code="${output##*$'\n'}"
|
||||
response="${output%$'\n'*}"
|
||||
|
||||
if [[ "$http_code" -ge 400 ]] 2>/dev/null; then
|
||||
echo "Error: API returned HTTP $http_code" >&2
|
||||
echo "$response" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local status_code
|
||||
status_code="$(echo "$response" | jq -r '.base_resp.status_code // 0')" 2>/dev/null || true
|
||||
if [[ "$status_code" != "0" && -n "$status_code" ]]; then
|
||||
local status_msg
|
||||
status_msg="$(echo "$response" | jq -r '.base_resp.status_msg // "Unknown error"')"
|
||||
echo "Error: API error [$status_code]: $status_msg" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$response"
|
||||
}
|
||||
|
||||
hex_to_file() {
|
||||
# hex_to_file HEX_STRING OUTPUT_PATH
|
||||
local hex="$1" output="$2"
|
||||
ensure_dir "$(dirname "$output")"
|
||||
echo "$hex" | xxd -r -p > "$output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: tts
|
||||
# ============================================================================
|
||||
cmd_tts() {
|
||||
local text="" voice_id="male-qn-qingse" output="" model="speech-2.8-hd"
|
||||
local speed=1.0 volume=1.0 pitch=0 emotion="" audio_format="mp3"
|
||||
local sample_rate=32000 language_boost=""
|
||||
|
||||
# First positional arg is text
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
text="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-v|--voice-id) voice_id="$2"; shift 2 ;;
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--model) model="$2"; shift 2 ;;
|
||||
--speed) speed="$2"; shift 2 ;;
|
||||
--volume) volume="$2"; shift 2 ;;
|
||||
--pitch) pitch="$2"; shift 2 ;;
|
||||
--emotion) emotion="$2"; shift 2 ;;
|
||||
--format) audio_format="$2"; shift 2 ;;
|
||||
--sample-rate) sample_rate="$2"; shift 2 ;;
|
||||
--language-boost) language_boost="$2"; shift 2 ;;
|
||||
*) text="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$text" ]]; then
|
||||
echo "Error: text is required" >&2
|
||||
echo "Usage: $(basename "$0") tts \"Text to speak\" -o output.mp3" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Build voice_setting
|
||||
local voice_setting
|
||||
voice_setting=$(jq -n \
|
||||
--arg vid "$voice_id" \
|
||||
--argjson spd "$speed" \
|
||||
--argjson vol "$volume" \
|
||||
--argjson pit "$pitch" \
|
||||
'{voice_id: $vid, speed: $spd, vol: $vol, pitch: $pit}')
|
||||
|
||||
if [[ -n "$emotion" ]]; then
|
||||
voice_setting=$(echo "$voice_setting" | jq --arg e "$emotion" '. + {emotion: $e}')
|
||||
fi
|
||||
|
||||
# Build payload
|
||||
local payload
|
||||
payload=$(jq -n \
|
||||
--arg model "$model" \
|
||||
--arg text "$text" \
|
||||
--argjson vs "$voice_setting" \
|
||||
--arg fmt "$audio_format" \
|
||||
--argjson sr "$sample_rate" \
|
||||
'{
|
||||
model: $model,
|
||||
text: $text,
|
||||
voice_setting: $vs,
|
||||
audio_setting: {sample_rate: $sr, bitrate: 128000, format: $fmt, channel: 1},
|
||||
stream: false,
|
||||
subtitle_enable: false,
|
||||
output_format: "hex"
|
||||
}')
|
||||
|
||||
if [[ -n "$language_boost" ]]; then
|
||||
payload=$(echo "$payload" | jq --arg lb "$language_boost" '. + {language_boost: $lb}')
|
||||
fi
|
||||
|
||||
echo "Synthesizing: ${text:0:50}..."
|
||||
local response
|
||||
response="$(api_request POST t2a_v2 "$payload")"
|
||||
|
||||
# Extract hex audio
|
||||
local audio_hex
|
||||
audio_hex="$(echo "$response" | jq -r '.data.audio // .extra_info.audio // empty')"
|
||||
|
||||
if [[ -z "$audio_hex" ]]; then
|
||||
echo "Error: No audio data returned from API" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -n "$output" ]]; then
|
||||
hex_to_file "$audio_hex" "$output"
|
||||
echo "Done: $output"
|
||||
else
|
||||
echo "Generated ${#audio_hex} hex chars of audio"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: clone
|
||||
# ============================================================================
|
||||
cmd_clone() {
|
||||
local audio_file="" voice_id="" preview_text="" preview_output=""
|
||||
|
||||
# First positional arg is audio file
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
audio_file="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--voice-id) voice_id="$2"; shift 2 ;;
|
||||
--preview) preview_text="$2"; shift 2 ;;
|
||||
--preview-output) preview_output="$2"; shift 2 ;;
|
||||
*) [[ -z "$audio_file" ]] && audio_file="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$audio_file" ]]; then
|
||||
echo "Error: audio file is required" >&2
|
||||
echo "Usage: $(basename "$0") clone audio.mp3 --voice-id my-voice" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -f "$audio_file" ]]; then
|
||||
echo "Error: Audio file not found: $audio_file" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "$voice_id" ]]; then
|
||||
echo "Error: --voice-id is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Cloning voice from: $audio_file"
|
||||
echo "Voice ID: $voice_id"
|
||||
|
||||
# Step 1: Upload audio
|
||||
local upload_response file_id
|
||||
upload_response="$(api_upload files/upload "$audio_file" voice_clone)"
|
||||
file_id="$(echo "$upload_response" | jq -r '.file.file_id // .file_id // empty')"
|
||||
|
||||
if [[ -z "$file_id" ]]; then
|
||||
echo "Error: Upload succeeded but no file_id was returned" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Step 2: Clone voice
|
||||
local clone_payload
|
||||
clone_payload=$(jq -n \
|
||||
--arg vid "$voice_id" \
|
||||
--argjson fid "$file_id" \
|
||||
'{voice_id: $vid, file_id: $fid}')
|
||||
|
||||
api_request POST voice_clone "$clone_payload" > /dev/null
|
||||
echo "Voice cloned successfully: $voice_id"
|
||||
|
||||
# Step 3: Preview if requested
|
||||
if [[ -n "$preview_text" ]]; then
|
||||
echo "Generating preview..."
|
||||
local pout="${preview_output:-${voice_id}_preview.mp3}"
|
||||
cmd_tts "$preview_text" -v "$voice_id" -o "$pout"
|
||||
echo "Preview saved to: $pout"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: design
|
||||
# ============================================================================
|
||||
cmd_design() {
|
||||
local description="" voice_id="" preview_text="" preview_output=""
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
description="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--voice-id) voice_id="$2"; shift 2 ;;
|
||||
--preview) preview_text="$2"; shift 2 ;;
|
||||
--preview-output) preview_output="$2"; shift 2 ;;
|
||||
*) [[ -z "$description" ]] && description="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$description" ]]; then
|
||||
echo "Error: description is required" >&2
|
||||
echo "Usage: $(basename \"$0\") design \"A warm female voice\" --voice-id narrator" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local ptext="${preview_text:-This is a preview of the designed voice.}"
|
||||
|
||||
echo "Designing voice from: \"$description\""
|
||||
[[ -n "$voice_id" ]] && echo "Voice ID: $voice_id"
|
||||
|
||||
local payload
|
||||
payload=$(jq -n \
|
||||
--arg prompt "$description" \
|
||||
--arg pt "$ptext" \
|
||||
'{prompt: $prompt, preview_text: $pt}')
|
||||
|
||||
if [[ -n "$voice_id" ]]; then
|
||||
payload=$(echo "$payload" | jq --arg vid "$voice_id" '. + {voice_id: $vid}')
|
||||
fi
|
||||
|
||||
local response
|
||||
response="$(api_request POST voice_design "$payload")"
|
||||
|
||||
local actual_voice_id
|
||||
actual_voice_id="${voice_id:-$(echo "$response" | jq -r '.voice_id // "unknown"')}"
|
||||
echo "Voice designed: $actual_voice_id"
|
||||
|
||||
local trial_audio
|
||||
trial_audio="$(echo "$response" | jq -r '.trial_audio // empty')"
|
||||
if [[ -n "$trial_audio" ]]; then
|
||||
local pout="${preview_output:-${actual_voice_id}_preview.mp3}"
|
||||
hex_to_file "$trial_audio" "$pout"
|
||||
echo "Preview saved to: $pout"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: list-voices
|
||||
# ============================================================================
|
||||
cmd_list_voices() {
|
||||
echo "=== System Voices ==="
|
||||
local sys_response
|
||||
sys_response="$(api_request POST voice/list '{"voice_type":"system"}' 2>/dev/null)" || true
|
||||
|
||||
if [[ -n "$sys_response" ]]; then
|
||||
local count
|
||||
count="$(echo "$sys_response" | jq '.voice_list | length')" 2>/dev/null || count=0
|
||||
if [[ "$count" -gt 0 ]]; then
|
||||
echo "$sys_response" | jq -r '.voice_list[:10][] | " \(.voice_id): \(.name // "N/A")"'
|
||||
if [[ "$count" -gt 10 ]]; then
|
||||
echo " ... and $((count - 10)) more"
|
||||
fi
|
||||
else
|
||||
echo " (None found)"
|
||||
fi
|
||||
else
|
||||
echo " (Could not fetch system voices)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Custom Voices ==="
|
||||
|
||||
local clone_response design_response
|
||||
clone_response="$(api_request POST voice/list '{"voice_type":"voice_cloning"}' 2>/dev/null)" || true
|
||||
design_response="$(api_request POST voice/list '{"voice_type":"voice_generation"}' 2>/dev/null)" || true
|
||||
|
||||
local has_custom=false
|
||||
|
||||
if [[ -n "$clone_response" ]]; then
|
||||
local cc
|
||||
cc="$(echo "$clone_response" | jq '.voice_list | length')" 2>/dev/null || cc=0
|
||||
if [[ "$cc" -gt 0 ]]; then
|
||||
has_custom=true
|
||||
echo "Cloned ($cc):"
|
||||
echo "$clone_response" | jq -r '.voice_list[] | " \(.voice_id)"'
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -n "$design_response" ]]; then
|
||||
local dc
|
||||
dc="$(echo "$design_response" | jq '.voice_list | length')" 2>/dev/null || dc=0
|
||||
if [[ "$dc" -gt 0 ]]; then
|
||||
has_custom=true
|
||||
echo "Designed ($dc):"
|
||||
echo "$design_response" | jq -r '.voice_list[] | " \(.voice_id)"'
|
||||
fi
|
||||
fi
|
||||
|
||||
if ! $has_custom; then
|
||||
echo " (None found)"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: validate
|
||||
# ============================================================================
|
||||
cmd_validate() {
|
||||
local segments_file="" model="speech-2.8-hd" strict=false verbose=false
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
segments_file="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--model) model="$2"; shift 2 ;;
|
||||
--strict) strict=true; shift ;;
|
||||
-v|--verbose) verbose=true; shift ;;
|
||||
--validate-voices) shift ;; # Not implemented in bash version
|
||||
*) [[ -z "$segments_file" ]] && segments_file="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$segments_file" || ! -f "$segments_file" ]]; then
|
||||
echo "Error: Segments file not found: ${segments_file:-<none>}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Validating: $segments_file"
|
||||
echo "Model: $model"
|
||||
|
||||
local valid_emotions="happy sad angry fearful disgusted surprised calm fluent whisper"
|
||||
echo "Valid emotions: $valid_emotions"
|
||||
echo ""
|
||||
|
||||
# Parse JSON
|
||||
local segments count
|
||||
segments="$(jq -r 'if type == "array" then . elif type == "object" and has("segments") then .segments else empty end' "$segments_file" 2>/dev/null)" || {
|
||||
echo "Error: Invalid JSON in $segments_file" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [[ -z "$segments" || "$segments" == "null" ]]; then
|
||||
echo "Error: No segments found in file" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
count="$(echo "$segments" | jq 'length')"
|
||||
local errors=0
|
||||
|
||||
for ((i=0; i<count; i++)); do
|
||||
local text voice_id emotion
|
||||
text="$(echo "$segments" | jq -r ".[$i].text // \"\"")"
|
||||
voice_id="$(echo "$segments" | jq -r ".[$i].voice_id // \"\"")"
|
||||
emotion="$(echo "$segments" | jq -r ".[$i].emotion // \"\"")"
|
||||
|
||||
if [[ -z "$text" ]]; then
|
||||
echo " - Segment $i: 'text' is required and must not be empty"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
if [[ -z "$voice_id" ]]; then
|
||||
echo " - Segment $i: 'voice_id' is required"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
if [[ -n "$emotion" ]]; then
|
||||
if ! echo "$valid_emotions" | grep -qw "$emotion"; then
|
||||
echo " - Segment $i: invalid emotion '$emotion'"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $errors -eq 0 ]]; then
|
||||
echo "Validation passed: $count segments"
|
||||
if $verbose; then
|
||||
echo ""
|
||||
echo "=== Segment Summary ==="
|
||||
for ((i=0; i<count; i++)); do
|
||||
local text voice_id emotion
|
||||
text="$(echo "$segments" | jq -r ".[$i].text // \"\"")"
|
||||
voice_id="$(echo "$segments" | jq -r ".[$i].voice_id // \"\"")"
|
||||
emotion="$(echo "$segments" | jq -r ".[$i].emotion // \"\"")"
|
||||
local elabel="${emotion:-AUTO}"
|
||||
printf " %d: [%-10s] voice=%-20s \"%s\"\n" "$i" "${elabel^^}" "${voice_id:0:20}" "${text:0:40}"
|
||||
done
|
||||
fi
|
||||
return 0
|
||||
else
|
||||
echo "Validation failed ($errors errors)"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: generate (multi-segment pipeline)
|
||||
# ============================================================================
|
||||
cmd_generate() {
|
||||
local segments_file="" output="" model="speech-2.8-hd" crossfade=200
|
||||
local no_normalize=false temp_dir="" continue_on_error=false
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
segments_file="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--model) model="$2"; shift 2 ;;
|
||||
--crossfade) crossfade="$2"; shift 2 ;;
|
||||
--no-normalize) no_normalize=true; shift ;;
|
||||
--temp-dir) temp_dir="$2"; shift 2 ;;
|
||||
--continue-on-error) continue_on_error=true; shift ;;
|
||||
*) [[ -z "$segments_file" ]] && segments_file="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$segments_file" || ! -f "$segments_file" ]]; then
|
||||
echo "Error: Segments file not found: ${segments_file:-<none>}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: -o/--output is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate first
|
||||
echo "Validating segments file..."
|
||||
local segments count
|
||||
segments="$(jq -r 'if type == "array" then . elif type == "object" and has("segments") then .segments else empty end' "$segments_file")"
|
||||
count="$(echo "$segments" | jq 'length')"
|
||||
|
||||
if [[ "$count" -eq 0 ]]; then
|
||||
echo "Error: No segments found" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "Found $count valid segments"
|
||||
echo ""
|
||||
|
||||
# Setup temp dir
|
||||
if [[ -z "$temp_dir" ]]; then
|
||||
temp_dir="$(dirname "$(cd "$(dirname "$output")" 2>/dev/null && pwd || echo ".")/$(basename "$output")")/tmp"
|
||||
fi
|
||||
mkdir -p "$temp_dir"
|
||||
echo "Temp directory: $temp_dir"
|
||||
|
||||
# Generate each segment
|
||||
local succeeded=0 failed=0
|
||||
local segment_files=()
|
||||
|
||||
for ((i=0; i<count; i++)); do
|
||||
local text voice_id emotion speed vol pitch
|
||||
text="$(echo "$segments" | jq -r ".[$i].text")"
|
||||
voice_id="$(echo "$segments" | jq -r ".[$i].voice_id")"
|
||||
emotion="$(echo "$segments" | jq -r ".[$i].emotion // \"\"")"
|
||||
speed="$(echo "$segments" | jq -r ".[$i].speed // 1.0")"
|
||||
vol="$(echo "$segments" | jq -r ".[$i].volume // 1.0")"
|
||||
pitch="$(echo "$segments" | jq -r ".[$i].pitch // 0")"
|
||||
|
||||
printf " Generating segment %d/%d: %s...\n" "$((i+1))" "$count" "${text:0:40}"
|
||||
|
||||
local seg_output="$temp_dir/segment_$(printf '%04d' "$i").mp3"
|
||||
|
||||
# Build voice_setting
|
||||
local voice_setting
|
||||
voice_setting=$(jq -n \
|
||||
--arg vid "$voice_id" \
|
||||
--argjson spd "$speed" \
|
||||
--argjson vol "$vol" \
|
||||
--argjson pit "$pitch" \
|
||||
'{voice_id: $vid, speed: $spd, vol: $vol, pitch: $pit}')
|
||||
if [[ -n "$emotion" ]]; then
|
||||
voice_setting=$(echo "$voice_setting" | jq --arg e "$emotion" '. + {emotion: $e}')
|
||||
fi
|
||||
|
||||
local payload
|
||||
payload=$(jq -n \
|
||||
--arg model "$model" \
|
||||
--arg text "$text" \
|
||||
--argjson vs "$voice_setting" \
|
||||
'{
|
||||
model: $model,
|
||||
text: $text,
|
||||
voice_setting: $vs,
|
||||
audio_setting: {sample_rate: 32000, bitrate: 128000, format: "mp3", channel: 1},
|
||||
stream: false,
|
||||
output_format: "hex"
|
||||
}')
|
||||
|
||||
local response audio_hex
|
||||
if response="$(api_request POST t2a_v2 "$payload" 2>&1)"; then
|
||||
audio_hex="$(echo "$response" | jq -r '.data.audio // .extra_info.audio // empty')"
|
||||
if [[ -n "$audio_hex" ]]; then
|
||||
hex_to_file "$audio_hex" "$seg_output"
|
||||
segment_files+=("$seg_output")
|
||||
succeeded=$((succeeded + 1))
|
||||
echo " ✓ Saved: $seg_output"
|
||||
else
|
||||
failed=$((failed + 1))
|
||||
echo " ✗ Error: No audio data in response"
|
||||
if ! $continue_on_error; then break; fi
|
||||
fi
|
||||
else
|
||||
failed=$((failed + 1))
|
||||
echo " ✗ Error: $response"
|
||||
if ! $continue_on_error; then break; fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#segment_files[@]} -eq 0 ]]; then
|
||||
echo "Error: No segments were generated successfully" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Merge segments
|
||||
ensure_dir "$(dirname "$output")"
|
||||
|
||||
if [[ ${#segment_files[@]} -eq 1 ]]; then
|
||||
cp "${segment_files[0]}" "$output"
|
||||
else
|
||||
_merge_audio_files "$output" "$crossfade" "$no_normalize" "${segment_files[@]}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Audio saved to: $output"
|
||||
echo " Generated: $succeeded/$count segments"
|
||||
echo ""
|
||||
echo " Intermediate files in: $temp_dir"
|
||||
echo " Delete with: rm -rf $temp_dir"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: merge
|
||||
# ============================================================================
|
||||
cmd_merge() {
|
||||
local output="" format="mp3" crossfade=300 normalize=true
|
||||
local input_files=()
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--format) format="$2"; shift 2 ;;
|
||||
--crossfade) crossfade="$2"; shift 2 ;;
|
||||
--no-normalize) normalize=false; shift ;;
|
||||
*) input_files+=("$1"); shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${#input_files[@]} -lt 2 ]]; then
|
||||
echo "Error: At least 2 input files required" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: -o/--output is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for f in "${input_files[@]}"; do
|
||||
if [[ ! -f "$f" ]]; then
|
||||
echo "Error: File not found: $f" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Merging ${#input_files[@]} files..."
|
||||
local no_norm="false"
|
||||
$normalize || no_norm="true"
|
||||
_merge_audio_files "$output" "$crossfade" "$no_norm" "${input_files[@]}"
|
||||
echo "Merged audio saved to: $output"
|
||||
}
|
||||
|
||||
_merge_audio_files() {
|
||||
# _merge_audio_files OUTPUT CROSSFADE_MS NO_NORMALIZE FILE1 FILE2 ...
|
||||
local output="$1" crossfade_ms="$2" no_normalize="$3"
|
||||
shift 3
|
||||
local files=("$@")
|
||||
local n=${#files[@]}
|
||||
|
||||
ensure_dir "$(dirname "$output")"
|
||||
|
||||
if [[ "$crossfade_ms" -gt 0 && $n -ge 2 ]]; then
|
||||
# Use acrossfade filter for crossfade between segments
|
||||
local crossfade_sec
|
||||
crossfade_sec=$(echo "scale=3; $crossfade_ms / 1000" | bc)
|
||||
|
||||
local inputs=()
|
||||
local filter_parts=()
|
||||
|
||||
for ((i=0; i<n; i++)); do
|
||||
inputs+=(-i "${files[$i]}")
|
||||
filter_parts+=("[${i}:a]aresample=32000,aformat=sample_fmts=fltp:channel_layouts=mono[s${i}]")
|
||||
done
|
||||
|
||||
# Build acrossfade chain
|
||||
if [[ $n -eq 2 ]]; then
|
||||
filter_parts+=("[s0][s1]acrossfade=d=${crossfade_sec}[merged]")
|
||||
else
|
||||
filter_parts+=("[s0][s1]acrossfade=d=${crossfade_sec}[m1]")
|
||||
for ((i=2; i<n; i++)); do
|
||||
local prev="[m$((i-1))]"
|
||||
if [[ $i -eq $((n-1)) ]]; then
|
||||
filter_parts+=("${prev}[s${i}]acrossfade=d=${crossfade_sec}[merged]")
|
||||
else
|
||||
filter_parts+=("${prev}[s${i}]acrossfade=d=${crossfade_sec}[m${i}]")
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
local final_filter="[merged]aformat=sample_fmts=fltp"
|
||||
if [[ "$no_normalize" != "true" ]]; then
|
||||
final_filter+=",loudnorm=I=-16:TP=-1.5:LRA=11"
|
||||
fi
|
||||
final_filter+="[final]"
|
||||
filter_parts+=("$final_filter")
|
||||
|
||||
local filter_complex
|
||||
filter_complex="$(IFS=';'; echo "${filter_parts[*]}")"
|
||||
|
||||
if ffmpeg -y "${inputs[@]}" \
|
||||
-filter_complex "$filter_complex" \
|
||||
-map "[final]" \
|
||||
-ar 32000 -ac 1 -acodec libmp3lame \
|
||||
"$output" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
echo " Crossfade merge failed, falling back to concat demuxer..." >&2
|
||||
fi
|
||||
|
||||
# Fallback: concat demuxer (no crossfade)
|
||||
local concat_file
|
||||
concat_file="$(mktemp /tmp/concat_XXXXXX.txt)"
|
||||
for f in "${files[@]}"; do
|
||||
echo "file '$(cd "$(dirname "$f")" && pwd)/$(basename "$f")'" >> "$concat_file"
|
||||
done
|
||||
|
||||
if [[ "$no_normalize" != "true" ]]; then
|
||||
local tmp_concat
|
||||
tmp_concat="$(mktemp /tmp/concat_out_XXXXXX.mp3)"
|
||||
ffmpeg -y -f concat -safe 0 -i "$concat_file" -c copy "$tmp_concat" 2>/dev/null
|
||||
ffmpeg -y -i "$tmp_concat" -af "loudnorm=I=-16:TP=-1.5:LRA=11" -acodec libmp3lame "$output" 2>/dev/null
|
||||
rm -f "$tmp_concat"
|
||||
else
|
||||
ffmpeg -y -f concat -safe 0 -i "$concat_file" -c copy "$output" 2>/dev/null
|
||||
fi
|
||||
|
||||
rm -f "$concat_file"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: convert
|
||||
# ============================================================================
|
||||
cmd_convert() {
|
||||
local input_file="" output="" format="mp3" sample_rate="" bitrate="" channels=""
|
||||
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
input_file="$1"; shift
|
||||
fi
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-o|--output) output="$2"; shift 2 ;;
|
||||
--format) format="$2"; shift 2 ;;
|
||||
--sample-rate) sample_rate="$2"; shift 2 ;;
|
||||
--bitrate) bitrate="$2"; shift 2 ;;
|
||||
--channels) channels="$2"; shift 2 ;;
|
||||
*) [[ -z "$input_file" ]] && input_file="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$input_file" || ! -f "$input_file" ]]; then
|
||||
echo "Error: Input file not found: ${input_file:-<none>}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "$output" ]]; then
|
||||
echo "Error: -o/--output is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ensure_dir "$(dirname "$output")"
|
||||
|
||||
# Determine codec
|
||||
local codec="copy"
|
||||
case "$format" in
|
||||
mp3) codec="libmp3lame" ;;
|
||||
wav) codec="pcm_s16le" ;;
|
||||
flac) codec="flac" ;;
|
||||
ogg) codec="libvorbis" ;;
|
||||
aac) codec="aac" ;;
|
||||
m4a) codec="aac" ;;
|
||||
*) codec="copy" ;;
|
||||
esac
|
||||
|
||||
local args=(-y -i "$input_file" -acodec "$codec")
|
||||
[[ -n "$sample_rate" ]] && args+=(-ar "$sample_rate")
|
||||
[[ -n "$channels" ]] && args+=(-ac "$channels")
|
||||
[[ -n "$bitrate" ]] && args+=(-b:a "$bitrate")
|
||||
args+=("$output")
|
||||
|
||||
echo "Converting $input_file to $format..."
|
||||
ffmpeg "${args[@]}" 2>/dev/null
|
||||
echo "Converted audio saved to: $output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Subcommand: check-env
|
||||
# ============================================================================
|
||||
cmd_check_env() {
|
||||
local check_script="$SCRIPT_DIR/../check_environment.sh"
|
||||
if [[ -f "$check_script" ]]; then
|
||||
bash "$check_script" "$@"
|
||||
else
|
||||
echo "check_environment.sh not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Main dispatcher
|
||||
# ============================================================================
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
MiniMax Voice CLI — Unified TTS interface
|
||||
|
||||
Usage:
|
||||
generate_voice.sh <command> [options]
|
||||
|
||||
Commands:
|
||||
tts Basic text-to-speech
|
||||
clone Clone voice from audio sample
|
||||
design Design voice from description
|
||||
list-voices List available voices
|
||||
validate Validate segments.json file
|
||||
generate Generate audio from segments.json
|
||||
merge Merge multiple audio files
|
||||
convert Convert audio format
|
||||
check-env Check environment setup
|
||||
|
||||
Examples:
|
||||
generate_voice.sh tts "Hello world" -o hello.mp3
|
||||
generate_voice.sh tts "你好" -v female-shaonv -o hello_cn.mp3
|
||||
generate_voice.sh clone my_voice.mp3 --voice-id my-custom-voice
|
||||
generate_voice.sh design "A warm female voice" --voice-id narrator-1
|
||||
generate_voice.sh list-voices
|
||||
generate_voice.sh validate segments.json --verbose
|
||||
generate_voice.sh generate segments.json -o output.mp3
|
||||
generate_voice.sh merge part1.mp3 part2.mp3 -o combined.mp3
|
||||
generate_voice.sh convert input.wav -o output.mp3
|
||||
generate_voice.sh check-env --test-api
|
||||
EOF
|
||||
}
|
||||
|
||||
main() {
|
||||
load_env
|
||||
|
||||
if [[ $# -eq 0 ]]; then
|
||||
usage
|
||||
exit 0
|
||||
fi
|
||||
|
||||
local command="$1"; shift
|
||||
|
||||
case "$command" in
|
||||
tts)
|
||||
check_api_key
|
||||
cmd_tts "$@"
|
||||
;;
|
||||
clone)
|
||||
check_api_key
|
||||
cmd_clone "$@"
|
||||
;;
|
||||
design)
|
||||
check_api_key
|
||||
cmd_design "$@"
|
||||
;;
|
||||
list-voices)
|
||||
check_api_key
|
||||
cmd_list_voices "$@"
|
||||
;;
|
||||
validate)
|
||||
cmd_validate "$@"
|
||||
;;
|
||||
generate)
|
||||
check_api_key
|
||||
cmd_generate "$@"
|
||||
;;
|
||||
merge)
|
||||
cmd_merge "$@"
|
||||
;;
|
||||
convert)
|
||||
cmd_convert "$@"
|
||||
;;
|
||||
check-env)
|
||||
cmd_check_env "$@"
|
||||
;;
|
||||
-h|--help|help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
echo "Unknown command: $command" >&2
|
||||
usage >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user