Save local changes (GSC/Bing integrations) before merging PR #354

This commit is contained in:
ajaysi
2026-02-13 13:11:27 +05:30
parent 43e66835ac
commit 08a1f4a1d8
144 changed files with 8310 additions and 2748 deletions

View File

@@ -324,6 +324,39 @@ class WaveSpeedClient:
timeout=timeout,
)
def voice_design(
self,
text: str,
voice_description: str,
language: str = "auto",
timeout: int = 180,
) -> bytes:
return self.speech.voice_design(
text=text,
voice_description=voice_description,
language=language,
timeout=timeout,
)
def cosyvoice_voice_clone(
self,
audio_bytes: bytes,
text: str,
*,
model: str = "wavespeed-ai/cosyvoice-tts/voice-clone",
audio_mime_type: str = "audio/wav",
reference_text: Optional[str] = None,
timeout: int = 180,
) -> bytes:
return self.speech.cosyvoice_voice_clone(
audio_bytes=audio_bytes,
text=text,
model=model,
audio_mime_type=audio_mime_type,
reference_text=reference_text,
timeout=timeout,
)
def generate_text_video(
self,
prompt: str,

View File

@@ -146,14 +146,44 @@ class PromptGenerator:
if isinstance(first_output, str):
if first_output.startswith("http://") or first_output.startswith("https://"):
logger.info(f"[WaveSpeed] Fetching optimized prompt from URL: {first_output}")
url_response = requests.get(first_output, timeout=timeout)
if url_response.status_code == 200:
return url_response.text.strip()
else:
logger.error(f"[WaveSpeed] Failed to fetch prompt from URL: {url_response.status_code}")
# Use stream=True to avoid downloading large files into memory
try:
with requests.get(first_output, timeout=timeout, stream=True) as url_response:
if url_response.status_code == 200:
# Check Content-Length if available
content_length = url_response.headers.get("Content-Length")
if content_length and int(content_length) > 1024 * 1024: # 1MB limit for prompts
logger.error(f"[WaveSpeed] Optimized prompt URL content too large: {content_length} bytes")
raise HTTPException(
status_code=502,
detail="WaveSpeed prompt optimizer returned a file that is too large",
)
# Read content with limit
content = ""
for chunk in url_response.iter_content(chunk_size=8192, decode_unicode=True):
if chunk:
content += chunk
if len(content) > 1024 * 1024: # Hard limit 1MB
logger.error("[WaveSpeed] Optimized prompt URL content exceeded 1MB limit during download")
raise HTTPException(
status_code=502,
detail="WaveSpeed prompt optimizer returned a file that is too large",
)
return content.strip()
else:
logger.error(f"[WaveSpeed] Failed to fetch prompt from URL: {url_response.status_code}")
raise HTTPException(
status_code=502,
detail="Failed to fetch optimized prompt from WaveSpeed URL",
)
except requests.RequestException as e:
logger.error(f"[WaveSpeed] Error fetching prompt from URL: {str(e)}")
raise HTTPException(
status_code=502,
detail="Failed to fetch optimized prompt from WaveSpeed URL",
detail=f"Error fetching optimized prompt: {str(e)}",
)
else:
# It's already the text

View File

@@ -181,6 +181,102 @@ class SpeechGenerator:
audio_url = self._extract_audio_url(outputs)
return self._download_audio(audio_url, timeout)
def voice_design(
self,
text: str,
voice_description: str,
language: str = "auto",
timeout: int = 180,
) -> bytes:
"""
Generate speech using Qwen3 Voice Design (text + voice description).
"""
url = f"{self.base_url}/wavespeed-ai/qwen3-tts/voice-design"
payload = {
"text": text,
"voice_description": voice_description,
"language": language
}
logger.info(f"[WaveSpeed] Voice design via {url}")
try:
response = requests.post(
url,
headers=self._get_headers(),
json=payload,
timeout=(30, 90),
)
except requests_exceptions.Timeout as e:
raise HTTPException(status_code=504, detail={"error": "WaveSpeed Voice Design timed out", "message": str(e)})
except (requests_exceptions.ConnectionError, requests_exceptions.ConnectTimeout) as e:
raise HTTPException(status_code=504, detail={"error": "WaveSpeed Voice Design connection failed", "message": str(e)})
if response.status_code != 200:
raise HTTPException(
status_code=response.status_code,
detail={"error": "WaveSpeed Voice Design failed", "message": response.text}
)
try:
data = response.json()
# The API is async and returns a task ID or direct output depending on implementation.
# Based on user input, it returns a "data" object with "id" and we poll.
# BUT wait, the Python example provided by user shows:
# response = requests.post(url, ...)
# if response.status_code == 200: result = response.json()["data"] ...
# Then it polls /api/v3/predictions/{request_id}/result
# Let's handle the async polling logic here or in the caller.
# The user's Python example is very clear. It's an async task.
if "data" in data and "id" in data["data"]:
request_id = data["data"]["id"]
return self._poll_prediction_result(request_id, timeout=timeout)
# Fallback if it returns direct output (unlikely based on docs)
if "data" in data and "outputs" in data["data"] and data["data"]["outputs"]:
return self._download_audio(data["data"]["outputs"][0]["url"], timeout) # Assuming structure
raise ValueError(f"Unexpected response format: {data}")
except Exception as e:
logger.error(f"[WaveSpeed] Error parsing Voice Design response: {e}")
raise HTTPException(status_code=500, detail={"error": "Failed to parse Voice Design response", "message": str(e)})
def _poll_prediction_result(self, request_id: str, timeout: int = 180) -> bytes:
import time
url = f"https://api.wavespeed.ai/api/v3/predictions/{request_id}/result"
start_time = time.time()
while time.time() - start_time < timeout:
try:
response = requests.get(url, headers=self._get_headers(), timeout=10)
if response.status_code == 200:
result = response.json().get("data", {})
status = result.get("status")
if status == "completed":
if result.get("outputs") and len(result["outputs"]) > 0:
audio_url = result["outputs"][0] # It's a URL string in the array
return self._download_audio(audio_url, timeout)
else:
raise ValueError("Completed task has no output URLs")
elif status == "failed":
raise ValueError(f"Task failed: {result.get('error')}")
# If processing/created, continue polling
time.sleep(1)
else:
logger.warning(f"Polling error {response.status_code}: {response.text}")
time.sleep(1)
except Exception as e:
logger.error(f"Polling exception: {e}")
time.sleep(1)
raise HTTPException(status_code=504, detail="Voice Design generation timed out")
def voice_clone(
self,
audio_bytes: bytes,
@@ -320,6 +416,70 @@ class SpeechGenerator:
audio_url = self._extract_audio_url(outputs)
return self._download_audio(audio_url, timeout)
def cosyvoice_voice_clone(
self,
audio_bytes: bytes,
text: str,
*,
model: str = "wavespeed-ai/cosyvoice-tts/voice-clone",
audio_mime_type: str = "audio/wav",
reference_text: Optional[str] = None,
timeout: int = 180,
) -> bytes:
url = f"{self.base_url}/{model}"
audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
mime = audio_mime_type or "audio/wav"
audio_data_url = f"data:{mime};base64,{audio_b64}"
payload = {
"audio": audio_data_url,
"text": text,
}
if reference_text:
payload["reference_text"] = reference_text
logger.info(f"[WaveSpeed] CosyVoice voice clone via {url}")
try:
response = requests.post(
url,
headers=self._get_headers(),
json=payload,
timeout=(30, 90),
)
except requests_exceptions.Timeout as e:
raise HTTPException(status_code=504, detail={"error": "WaveSpeed CosyVoice voice clone timed out", "message": str(e)})
except (requests_exceptions.ConnectionError, requests_exceptions.ConnectTimeout) as e:
raise HTTPException(status_code=504, detail={"error": "WaveSpeed CosyVoice voice clone connection failed", "message": str(e)})
if response.status_code != 200:
raise HTTPException(
status_code=502,
detail={
"error": "WaveSpeed CosyVoice voice clone failed",
"status_code": response.status_code,
"response": response.text,
},
)
response_json = response.json()
data = response_json.get("data") or response_json
outputs = data.get("outputs") or []
status = data.get("status")
prediction_id = data.get("id")
if not outputs and prediction_id and status in {"created", "processing"}:
result = self.polling.poll_until_complete(prediction_id, timeout_seconds=timeout, interval_seconds=0.8)
outputs = result.get("outputs") or []
if not outputs:
raise HTTPException(status_code=502, detail="WaveSpeed CosyVoice voice clone returned no outputs")
audio_url = self._extract_audio_url(outputs)
return self._download_audio(audio_url, timeout)
def _extract_audio_url(self, outputs: list) -> str:
"""Extract audio URL from outputs."""
if not isinstance(outputs, list) or len(outputs) == 0: