SEO Dashboard Fixes and content planning refactoring
This commit is contained in:
209
backend/services/blog_writer/circuit_breaker.py
Normal file
209
backend/services/blog_writer/circuit_breaker.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
Circuit Breaker Pattern for Blog Writer API Calls
|
||||
|
||||
Implements circuit breaker pattern to prevent cascading failures when external APIs
|
||||
are experiencing issues. Tracks failure rates and automatically disables calls when
|
||||
threshold is exceeded, with auto-recovery after cooldown period.
|
||||
"""
|
||||
|
||||
import time
|
||||
import asyncio
|
||||
from typing import Callable, Any, Optional, Dict
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
from loguru import logger
|
||||
|
||||
from .exceptions import CircuitBreakerOpenException
|
||||
|
||||
|
||||
class CircuitState(Enum):
|
||||
"""Circuit breaker states."""
|
||||
CLOSED = "closed" # Normal operation
|
||||
OPEN = "open" # Circuit is open, calls are blocked
|
||||
HALF_OPEN = "half_open" # Testing if service is back
|
||||
|
||||
|
||||
@dataclass
|
||||
class CircuitBreakerConfig:
|
||||
"""Configuration for circuit breaker."""
|
||||
failure_threshold: int = 5 # Number of failures before opening
|
||||
recovery_timeout: int = 60 # Seconds to wait before trying again
|
||||
success_threshold: int = 3 # Successes needed to close from half-open
|
||||
timeout: int = 30 # Timeout for individual calls
|
||||
max_failures_per_minute: int = 10 # Max failures per minute before opening
|
||||
|
||||
|
||||
class CircuitBreaker:
|
||||
"""Circuit breaker implementation for API calls."""
|
||||
|
||||
def __init__(self, name: str, config: Optional[CircuitBreakerConfig] = None):
|
||||
self.name = name
|
||||
self.config = config or CircuitBreakerConfig()
|
||||
self.state = CircuitState.CLOSED
|
||||
self.failure_count = 0
|
||||
self.success_count = 0
|
||||
self.last_failure_time = 0
|
||||
self.last_success_time = 0
|
||||
self.failure_times = [] # Track failure times for rate limiting
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
async def call(self, func: Callable, *args, **kwargs) -> Any:
|
||||
"""
|
||||
Execute function with circuit breaker protection.
|
||||
|
||||
Args:
|
||||
func: Function to execute
|
||||
*args: Function arguments
|
||||
**kwargs: Function keyword arguments
|
||||
|
||||
Returns:
|
||||
Function result
|
||||
|
||||
Raises:
|
||||
CircuitBreakerOpenException: If circuit is open
|
||||
"""
|
||||
async with self._lock:
|
||||
# Check if circuit should be opened due to rate limiting
|
||||
await self._check_rate_limit()
|
||||
|
||||
# Check circuit state
|
||||
if self.state == CircuitState.OPEN:
|
||||
if self._should_attempt_reset():
|
||||
self.state = CircuitState.HALF_OPEN
|
||||
self.success_count = 0
|
||||
logger.info(f"Circuit breaker {self.name} transitioning to HALF_OPEN")
|
||||
else:
|
||||
retry_after = int(self.config.recovery_timeout - (time.time() - self.last_failure_time))
|
||||
raise CircuitBreakerOpenException(
|
||||
f"Circuit breaker {self.name} is OPEN",
|
||||
retry_after=max(0, retry_after),
|
||||
context={"circuit_name": self.name, "state": self.state.value}
|
||||
)
|
||||
|
||||
try:
|
||||
# Execute the function with timeout
|
||||
result = await asyncio.wait_for(
|
||||
func(*args, **kwargs),
|
||||
timeout=self.config.timeout
|
||||
)
|
||||
|
||||
# Record success
|
||||
await self._record_success()
|
||||
return result
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
await self._record_failure("timeout")
|
||||
raise
|
||||
except Exception as e:
|
||||
await self._record_failure(str(e))
|
||||
raise
|
||||
|
||||
async def _check_rate_limit(self):
|
||||
"""Check if failure rate exceeds threshold."""
|
||||
current_time = time.time()
|
||||
|
||||
# Remove failures older than 1 minute
|
||||
self.failure_times = [
|
||||
failure_time for failure_time in self.failure_times
|
||||
if current_time - failure_time < 60
|
||||
]
|
||||
|
||||
# Check if we've exceeded the rate limit
|
||||
if len(self.failure_times) >= self.config.max_failures_per_minute:
|
||||
self.state = CircuitState.OPEN
|
||||
self.last_failure_time = current_time
|
||||
logger.warning(f"Circuit breaker {self.name} opened due to rate limit: {len(self.failure_times)} failures in last minute")
|
||||
|
||||
def _should_attempt_reset(self) -> bool:
|
||||
"""Check if enough time has passed to attempt reset."""
|
||||
return time.time() - self.last_failure_time >= self.config.recovery_timeout
|
||||
|
||||
async def _record_success(self):
|
||||
"""Record a successful call."""
|
||||
async with self._lock:
|
||||
self.last_success_time = time.time()
|
||||
|
||||
if self.state == CircuitState.HALF_OPEN:
|
||||
self.success_count += 1
|
||||
if self.success_count >= self.config.success_threshold:
|
||||
self.state = CircuitState.CLOSED
|
||||
self.failure_count = 0
|
||||
logger.info(f"Circuit breaker {self.name} closed after {self.success_count} successes")
|
||||
elif self.state == CircuitState.CLOSED:
|
||||
# Reset failure count on success
|
||||
self.failure_count = 0
|
||||
|
||||
async def _record_failure(self, error: str):
|
||||
"""Record a failed call."""
|
||||
async with self._lock:
|
||||
current_time = time.time()
|
||||
self.failure_count += 1
|
||||
self.last_failure_time = current_time
|
||||
self.failure_times.append(current_time)
|
||||
|
||||
logger.warning(f"Circuit breaker {self.name} recorded failure #{self.failure_count}: {error}")
|
||||
|
||||
# Open circuit if threshold exceeded
|
||||
if self.failure_count >= self.config.failure_threshold:
|
||||
self.state = CircuitState.OPEN
|
||||
logger.error(f"Circuit breaker {self.name} opened after {self.failure_count} failures")
|
||||
|
||||
def get_state(self) -> Dict[str, Any]:
|
||||
"""Get current circuit breaker state."""
|
||||
return {
|
||||
"name": self.name,
|
||||
"state": self.state.value,
|
||||
"failure_count": self.failure_count,
|
||||
"success_count": self.success_count,
|
||||
"last_failure_time": self.last_failure_time,
|
||||
"last_success_time": self.last_success_time,
|
||||
"failures_in_last_minute": len([
|
||||
t for t in self.failure_times
|
||||
if time.time() - t < 60
|
||||
])
|
||||
}
|
||||
|
||||
|
||||
class CircuitBreakerManager:
|
||||
"""Manages multiple circuit breakers."""
|
||||
|
||||
def __init__(self):
|
||||
self._breakers: Dict[str, CircuitBreaker] = {}
|
||||
|
||||
def get_breaker(self, name: str, config: Optional[CircuitBreakerConfig] = None) -> CircuitBreaker:
|
||||
"""Get or create a circuit breaker."""
|
||||
if name not in self._breakers:
|
||||
self._breakers[name] = CircuitBreaker(name, config)
|
||||
return self._breakers[name]
|
||||
|
||||
def get_all_states(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Get states of all circuit breakers."""
|
||||
return {name: breaker.get_state() for name, breaker in self._breakers.items()}
|
||||
|
||||
def reset_breaker(self, name: str):
|
||||
"""Reset a circuit breaker to closed state."""
|
||||
if name in self._breakers:
|
||||
self._breakers[name].state = CircuitState.CLOSED
|
||||
self._breakers[name].failure_count = 0
|
||||
self._breakers[name].success_count = 0
|
||||
logger.info(f"Circuit breaker {name} manually reset")
|
||||
|
||||
|
||||
# Global circuit breaker manager
|
||||
circuit_breaker_manager = CircuitBreakerManager()
|
||||
|
||||
|
||||
def circuit_breaker(name: str, config: Optional[CircuitBreakerConfig] = None):
|
||||
"""
|
||||
Decorator to add circuit breaker protection to async functions.
|
||||
|
||||
Args:
|
||||
name: Circuit breaker name
|
||||
config: Circuit breaker configuration
|
||||
"""
|
||||
def decorator(func: Callable) -> Callable:
|
||||
async def wrapper(*args, **kwargs):
|
||||
breaker = circuit_breaker_manager.get_breaker(name, config)
|
||||
return await breaker.call(func, *args, **kwargs)
|
||||
return wrapper
|
||||
return decorator
|
||||
536
backend/services/blog_writer/database_task_manager.py
Normal file
536
backend/services/blog_writer/database_task_manager.py
Normal file
@@ -0,0 +1,536 @@
|
||||
"""
|
||||
Database-Backed Task Manager for Blog Writer
|
||||
|
||||
Replaces in-memory task storage with persistent database storage for
|
||||
reliability, recovery, and analytics.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from services.blog_writer.logger_config import blog_writer_logger, log_function_call
|
||||
from models.blog_models import (
|
||||
BlogResearchRequest,
|
||||
BlogOutlineRequest,
|
||||
MediumBlogGenerateRequest,
|
||||
MediumBlogGenerateResult,
|
||||
)
|
||||
from services.blog_writer.blog_service import BlogWriterService
|
||||
|
||||
|
||||
class DatabaseTaskManager:
|
||||
"""Database-backed task manager for blog writer operations."""
|
||||
|
||||
def __init__(self, db_connection):
|
||||
self.db = db_connection
|
||||
self.service = BlogWriterService()
|
||||
self._cleanup_task = None
|
||||
self._start_cleanup_task()
|
||||
|
||||
def _start_cleanup_task(self):
|
||||
"""Start background task to clean up old completed tasks."""
|
||||
async def cleanup_loop():
|
||||
while True:
|
||||
try:
|
||||
await self.cleanup_old_tasks()
|
||||
await asyncio.sleep(3600) # Run every hour
|
||||
except Exception as e:
|
||||
logger.error(f"Error in cleanup task: {e}")
|
||||
await asyncio.sleep(300) # Wait 5 minutes on error
|
||||
|
||||
self._cleanup_task = asyncio.create_task(cleanup_loop())
|
||||
|
||||
@log_function_call("create_task")
|
||||
async def create_task(
|
||||
self,
|
||||
user_id: str,
|
||||
task_type: str,
|
||||
request_data: Dict[str, Any],
|
||||
correlation_id: Optional[str] = None,
|
||||
operation: Optional[str] = None,
|
||||
priority: int = 0,
|
||||
max_retries: int = 3,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""Create a new task in the database."""
|
||||
task_id = str(uuid.uuid4())
|
||||
correlation_id = correlation_id or str(uuid.uuid4())
|
||||
|
||||
query = """
|
||||
INSERT INTO blog_writer_tasks
|
||||
(id, user_id, task_type, status, request_data, correlation_id, operation, priority, max_retries, metadata)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
||||
"""
|
||||
|
||||
await self.db.execute(
|
||||
query,
|
||||
task_id,
|
||||
user_id,
|
||||
task_type,
|
||||
'pending',
|
||||
json.dumps(request_data),
|
||||
correlation_id,
|
||||
operation,
|
||||
priority,
|
||||
max_retries,
|
||||
json.dumps(metadata or {})
|
||||
)
|
||||
|
||||
blog_writer_logger.log_operation_start(
|
||||
"task_created",
|
||||
task_id=task_id,
|
||||
task_type=task_type,
|
||||
user_id=user_id,
|
||||
correlation_id=correlation_id
|
||||
)
|
||||
|
||||
return task_id
|
||||
|
||||
@log_function_call("get_task_status")
|
||||
async def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get the status of a task."""
|
||||
query = """
|
||||
SELECT
|
||||
id, user_id, task_type, status, request_data, result_data, error_data,
|
||||
created_at, updated_at, completed_at, correlation_id, operation,
|
||||
retry_count, max_retries, priority, metadata
|
||||
FROM blog_writer_tasks
|
||||
WHERE id = $1
|
||||
"""
|
||||
|
||||
row = await self.db.fetchrow(query, task_id)
|
||||
if not row:
|
||||
return None
|
||||
|
||||
# Get progress messages
|
||||
progress_query = """
|
||||
SELECT timestamp, message, percentage, progress_type, metadata
|
||||
FROM blog_writer_task_progress
|
||||
WHERE task_id = $1
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT 10
|
||||
"""
|
||||
|
||||
progress_rows = await self.db.fetch(progress_query, task_id)
|
||||
progress_messages = [
|
||||
{
|
||||
"timestamp": row["timestamp"].isoformat(),
|
||||
"message": row["message"],
|
||||
"percentage": float(row["percentage"]),
|
||||
"progress_type": row["progress_type"],
|
||||
"metadata": row["metadata"] or {}
|
||||
}
|
||||
for row in progress_rows
|
||||
]
|
||||
|
||||
return {
|
||||
"task_id": row["id"],
|
||||
"user_id": row["user_id"],
|
||||
"task_type": row["task_type"],
|
||||
"status": row["status"],
|
||||
"created_at": row["created_at"].isoformat(),
|
||||
"updated_at": row["updated_at"].isoformat(),
|
||||
"completed_at": row["completed_at"].isoformat() if row["completed_at"] else None,
|
||||
"correlation_id": row["correlation_id"],
|
||||
"operation": row["operation"],
|
||||
"retry_count": row["retry_count"],
|
||||
"max_retries": row["max_retries"],
|
||||
"priority": row["priority"],
|
||||
"progress_messages": progress_messages,
|
||||
"result": json.loads(row["result_data"]) if row["result_data"] else None,
|
||||
"error": json.loads(row["error_data"]) if row["error_data"] else None,
|
||||
"metadata": json.loads(row["metadata"]) if row["metadata"] else {}
|
||||
}
|
||||
|
||||
@log_function_call("update_task_status")
|
||||
async def update_task_status(
|
||||
self,
|
||||
task_id: str,
|
||||
status: str,
|
||||
result_data: Optional[Dict[str, Any]] = None,
|
||||
error_data: Optional[Dict[str, Any]] = None,
|
||||
completed_at: Optional[datetime] = None
|
||||
):
|
||||
"""Update task status and data."""
|
||||
query = """
|
||||
UPDATE blog_writer_tasks
|
||||
SET status = $2, result_data = $3, error_data = $4, completed_at = $5, updated_at = NOW()
|
||||
WHERE id = $1
|
||||
"""
|
||||
|
||||
await self.db.execute(
|
||||
query,
|
||||
task_id,
|
||||
status,
|
||||
json.dumps(result_data) if result_data else None,
|
||||
json.dumps(error_data) if error_data else None,
|
||||
completed_at or (datetime.now() if status in ['completed', 'failed', 'cancelled'] else None)
|
||||
)
|
||||
|
||||
blog_writer_logger.log_operation_end(
|
||||
"task_status_updated",
|
||||
0,
|
||||
success=status in ['completed', 'cancelled'],
|
||||
task_id=task_id,
|
||||
status=status
|
||||
)
|
||||
|
||||
@log_function_call("update_progress")
|
||||
async def update_progress(
|
||||
self,
|
||||
task_id: str,
|
||||
message: str,
|
||||
percentage: Optional[float] = None,
|
||||
progress_type: str = "info",
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
"""Update task progress."""
|
||||
# Insert progress record
|
||||
progress_query = """
|
||||
INSERT INTO blog_writer_task_progress
|
||||
(task_id, message, percentage, progress_type, metadata)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
"""
|
||||
|
||||
await self.db.execute(
|
||||
progress_query,
|
||||
task_id,
|
||||
message,
|
||||
percentage or 0.0,
|
||||
progress_type,
|
||||
json.dumps(metadata or {})
|
||||
)
|
||||
|
||||
# Update task status to running if it was pending
|
||||
status_query = """
|
||||
UPDATE blog_writer_tasks
|
||||
SET status = 'running', updated_at = NOW()
|
||||
WHERE id = $1 AND status = 'pending'
|
||||
"""
|
||||
|
||||
await self.db.execute(status_query, task_id)
|
||||
|
||||
logger.info(f"Progress update for task {task_id}: {message}")
|
||||
|
||||
@log_function_call("record_metrics")
|
||||
async def record_metrics(
|
||||
self,
|
||||
task_id: str,
|
||||
operation: str,
|
||||
duration_ms: int,
|
||||
token_usage: Optional[Dict[str, int]] = None,
|
||||
api_calls: int = 0,
|
||||
cache_hits: int = 0,
|
||||
cache_misses: int = 0,
|
||||
error_count: int = 0,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
"""Record performance metrics for a task."""
|
||||
query = """
|
||||
INSERT INTO blog_writer_task_metrics
|
||||
(task_id, operation, duration_ms, token_usage, api_calls, cache_hits, cache_misses, error_count, metadata)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
"""
|
||||
|
||||
await self.db.execute(
|
||||
query,
|
||||
task_id,
|
||||
operation,
|
||||
duration_ms,
|
||||
json.dumps(token_usage) if token_usage else None,
|
||||
api_calls,
|
||||
cache_hits,
|
||||
cache_misses,
|
||||
error_count,
|
||||
json.dumps(metadata or {})
|
||||
)
|
||||
|
||||
blog_writer_logger.log_performance(
|
||||
f"task_metrics_{operation}",
|
||||
duration_ms,
|
||||
"ms",
|
||||
task_id=task_id,
|
||||
operation=operation,
|
||||
api_calls=api_calls,
|
||||
cache_hits=cache_hits,
|
||||
cache_misses=cache_misses
|
||||
)
|
||||
|
||||
@log_function_call("increment_retry_count")
|
||||
async def increment_retry_count(self, task_id: str) -> int:
|
||||
"""Increment retry count and return new count."""
|
||||
query = """
|
||||
UPDATE blog_writer_tasks
|
||||
SET retry_count = retry_count + 1, updated_at = NOW()
|
||||
WHERE id = $1
|
||||
RETURNING retry_count
|
||||
"""
|
||||
|
||||
result = await self.db.fetchval(query, task_id)
|
||||
return result or 0
|
||||
|
||||
@log_function_call("cleanup_old_tasks")
|
||||
async def cleanup_old_tasks(self, days: int = 7) -> int:
|
||||
"""Clean up old completed tasks."""
|
||||
query = """
|
||||
DELETE FROM blog_writer_tasks
|
||||
WHERE status IN ('completed', 'failed', 'cancelled')
|
||||
AND created_at < NOW() - INTERVAL '%s days'
|
||||
""" % days
|
||||
|
||||
result = await self.db.execute(query)
|
||||
deleted_count = int(result.split()[-1]) if result else 0
|
||||
|
||||
if deleted_count > 0:
|
||||
logger.info(f"Cleaned up {deleted_count} old blog writer tasks")
|
||||
|
||||
return deleted_count
|
||||
|
||||
@log_function_call("get_user_tasks")
|
||||
async def get_user_tasks(
|
||||
self,
|
||||
user_id: str,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
status_filter: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Get tasks for a specific user."""
|
||||
query = """
|
||||
SELECT
|
||||
id, task_type, status, created_at, updated_at, completed_at,
|
||||
operation, retry_count, max_retries, priority
|
||||
FROM blog_writer_tasks
|
||||
WHERE user_id = $1
|
||||
"""
|
||||
|
||||
params = [user_id]
|
||||
param_count = 1
|
||||
|
||||
if status_filter:
|
||||
param_count += 1
|
||||
query += f" AND status = ${param_count}"
|
||||
params.append(status_filter)
|
||||
|
||||
query += f" ORDER BY created_at DESC LIMIT ${param_count + 1} OFFSET ${param_count + 2}"
|
||||
params.extend([limit, offset])
|
||||
|
||||
rows = await self.db.fetch(query, *params)
|
||||
|
||||
return [
|
||||
{
|
||||
"task_id": row["id"],
|
||||
"task_type": row["task_type"],
|
||||
"status": row["status"],
|
||||
"created_at": row["created_at"].isoformat(),
|
||||
"updated_at": row["updated_at"].isoformat(),
|
||||
"completed_at": row["completed_at"].isoformat() if row["completed_at"] else None,
|
||||
"operation": row["operation"],
|
||||
"retry_count": row["retry_count"],
|
||||
"max_retries": row["max_retries"],
|
||||
"priority": row["priority"]
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
@log_function_call("get_task_analytics")
|
||||
async def get_task_analytics(self, days: int = 7) -> Dict[str, Any]:
|
||||
"""Get task analytics for monitoring."""
|
||||
query = """
|
||||
SELECT
|
||||
task_type,
|
||||
status,
|
||||
COUNT(*) as task_count,
|
||||
AVG(EXTRACT(EPOCH FROM (COALESCE(completed_at, NOW()) - created_at))) as avg_duration_seconds,
|
||||
COUNT(CASE WHEN status = 'completed' THEN 1 END) as completed_count,
|
||||
COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed_count,
|
||||
COUNT(CASE WHEN status = 'running' THEN 1 END) as running_count
|
||||
FROM blog_writer_tasks
|
||||
WHERE created_at >= NOW() - INTERVAL '%s days'
|
||||
GROUP BY task_type, status
|
||||
ORDER BY task_type, status
|
||||
""" % days
|
||||
|
||||
rows = await self.db.fetch(query)
|
||||
|
||||
analytics = {
|
||||
"summary": {
|
||||
"total_tasks": sum(row["task_count"] for row in rows),
|
||||
"completed_tasks": sum(row["completed_count"] for row in rows),
|
||||
"failed_tasks": sum(row["failed_count"] for row in rows),
|
||||
"running_tasks": sum(row["running_count"] for row in rows)
|
||||
},
|
||||
"by_task_type": {},
|
||||
"by_status": {}
|
||||
}
|
||||
|
||||
for row in rows:
|
||||
task_type = row["task_type"]
|
||||
status = row["status"]
|
||||
|
||||
if task_type not in analytics["by_task_type"]:
|
||||
analytics["by_task_type"][task_type] = {}
|
||||
|
||||
analytics["by_task_type"][task_type][status] = {
|
||||
"count": row["task_count"],
|
||||
"avg_duration_seconds": float(row["avg_duration_seconds"]) if row["avg_duration_seconds"] else 0
|
||||
}
|
||||
|
||||
if status not in analytics["by_status"]:
|
||||
analytics["by_status"][status] = 0
|
||||
analytics["by_status"][status] += row["task_count"]
|
||||
|
||||
return analytics
|
||||
|
||||
# Task execution methods (same as original but with database persistence)
|
||||
async def start_research_task(self, request: BlogResearchRequest, user_id: str) -> str:
|
||||
"""Start a research operation and return a task ID."""
|
||||
task_id = await self.create_task(
|
||||
user_id=user_id,
|
||||
task_type="research",
|
||||
request_data=request.dict(),
|
||||
operation="research_operation"
|
||||
)
|
||||
|
||||
# Start the research operation in the background
|
||||
asyncio.create_task(self._run_research_task(task_id, request))
|
||||
|
||||
return task_id
|
||||
|
||||
async def start_outline_task(self, request: BlogOutlineRequest, user_id: str) -> str:
|
||||
"""Start an outline generation operation and return a task ID."""
|
||||
task_id = await self.create_task(
|
||||
user_id=user_id,
|
||||
task_type="outline",
|
||||
request_data=request.dict(),
|
||||
operation="outline_generation"
|
||||
)
|
||||
|
||||
# Start the outline generation operation in the background
|
||||
asyncio.create_task(self._run_outline_generation_task(task_id, request))
|
||||
|
||||
return task_id
|
||||
|
||||
async def start_medium_generation_task(self, request: MediumBlogGenerateRequest, user_id: str) -> str:
|
||||
"""Start a medium blog generation task."""
|
||||
task_id = await self.create_task(
|
||||
user_id=user_id,
|
||||
task_type="medium_generation",
|
||||
request_data=request.dict(),
|
||||
operation="medium_blog_generation"
|
||||
)
|
||||
|
||||
asyncio.create_task(self._run_medium_generation_task(task_id, request))
|
||||
return task_id
|
||||
|
||||
async def _run_research_task(self, task_id: str, request: BlogResearchRequest):
|
||||
"""Background task to run research and update status with progress messages."""
|
||||
try:
|
||||
await self.update_progress(task_id, "🔍 Starting research operation...", 0)
|
||||
|
||||
# Run the actual research with progress updates
|
||||
result = await self.service.research_with_progress(request, task_id)
|
||||
|
||||
# Check if research failed gracefully
|
||||
if not result.success:
|
||||
await self.update_progress(
|
||||
task_id,
|
||||
f"❌ Research failed: {result.error_message or 'Unknown error'}",
|
||||
100,
|
||||
"error"
|
||||
)
|
||||
await self.update_task_status(
|
||||
task_id,
|
||||
"failed",
|
||||
error_data={
|
||||
"error_message": result.error_message,
|
||||
"retry_suggested": result.retry_suggested,
|
||||
"error_code": result.error_code,
|
||||
"actionable_steps": result.actionable_steps
|
||||
}
|
||||
)
|
||||
else:
|
||||
await self.update_progress(
|
||||
task_id,
|
||||
f"✅ Research completed successfully! Found {len(result.sources)} sources and {len(result.search_queries or [])} search queries.",
|
||||
100,
|
||||
"success"
|
||||
)
|
||||
await self.update_task_status(
|
||||
task_id,
|
||||
"completed",
|
||||
result_data=result.dict()
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
await self.update_progress(task_id, f"❌ Research failed with error: {str(e)}", 100, "error")
|
||||
await self.update_task_status(
|
||||
task_id,
|
||||
"failed",
|
||||
error_data={"error_message": str(e), "error_type": type(e).__name__}
|
||||
)
|
||||
blog_writer_logger.log_error(e, "research_task", context={"task_id": task_id})
|
||||
|
||||
async def _run_outline_generation_task(self, task_id: str, request: BlogOutlineRequest):
|
||||
"""Background task to run outline generation and update status with progress messages."""
|
||||
try:
|
||||
await self.update_progress(task_id, "🧩 Starting outline generation...", 0)
|
||||
|
||||
# Run the actual outline generation with progress updates
|
||||
result = await self.service.generate_outline_with_progress(request, task_id)
|
||||
|
||||
await self.update_progress(
|
||||
task_id,
|
||||
f"✅ Outline generated successfully! Created {len(result.outline)} sections with {len(result.title_options)} title options.",
|
||||
100,
|
||||
"success"
|
||||
)
|
||||
await self.update_task_status(task_id, "completed", result_data=result.dict())
|
||||
|
||||
except Exception as e:
|
||||
await self.update_progress(task_id, f"❌ Outline generation failed: {str(e)}", 100, "error")
|
||||
await self.update_task_status(
|
||||
task_id,
|
||||
"failed",
|
||||
error_data={"error_message": str(e), "error_type": type(e).__name__}
|
||||
)
|
||||
blog_writer_logger.log_error(e, "outline_generation_task", context={"task_id": task_id})
|
||||
|
||||
async def _run_medium_generation_task(self, task_id: str, request: MediumBlogGenerateRequest):
|
||||
"""Background task to generate a medium blog using a single structured JSON call."""
|
||||
try:
|
||||
await self.update_progress(task_id, "📦 Packaging outline and metadata...", 0)
|
||||
|
||||
# Basic guard: respect global target words
|
||||
total_target = int(request.globalTargetWords or 1000)
|
||||
if total_target > 1000:
|
||||
raise ValueError("Global target words exceed 1000; medium generation not allowed")
|
||||
|
||||
result: MediumBlogGenerateResult = await self.service.generate_medium_blog_with_progress(
|
||||
request,
|
||||
task_id,
|
||||
)
|
||||
|
||||
if not result or not getattr(result, "sections", None):
|
||||
raise ValueError("Empty generation result from model")
|
||||
|
||||
# Check if result came from cache
|
||||
cache_hit = getattr(result, 'cache_hit', False)
|
||||
if cache_hit:
|
||||
await self.update_progress(task_id, "⚡ Found cached content - loading instantly!", 100, "success")
|
||||
else:
|
||||
await self.update_progress(task_id, "🤖 Generated fresh content with AI...", 100, "success")
|
||||
|
||||
await self.update_task_status(task_id, "completed", result_data=result.dict())
|
||||
|
||||
except Exception as e:
|
||||
await self.update_progress(task_id, f"❌ Medium generation failed: {str(e)}", 100, "error")
|
||||
await self.update_task_status(
|
||||
task_id,
|
||||
"failed",
|
||||
error_data={"error_message": str(e), "error_type": type(e).__name__}
|
||||
)
|
||||
blog_writer_logger.log_error(e, "medium_generation_task", context={"task_id": task_id})
|
||||
285
backend/services/blog_writer/exceptions.py
Normal file
285
backend/services/blog_writer/exceptions.py
Normal file
@@ -0,0 +1,285 @@
|
||||
"""
|
||||
Blog Writer Exception Hierarchy
|
||||
|
||||
Defines custom exception classes for different failure modes in the AI Blog Writer.
|
||||
Each exception includes error_code, user_message, retry_suggested, and actionable_steps.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Dict, Any
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class ErrorCategory(Enum):
|
||||
"""Categories for error classification."""
|
||||
TRANSIENT = "transient" # Temporary issues, retry recommended
|
||||
PERMANENT = "permanent" # Permanent issues, no retry
|
||||
USER_ERROR = "user_error" # User input issues, fix input
|
||||
API_ERROR = "api_error" # External API issues
|
||||
VALIDATION_ERROR = "validation_error" # Data validation issues
|
||||
SYSTEM_ERROR = "system_error" # Internal system issues
|
||||
|
||||
|
||||
class BlogWriterException(Exception):
|
||||
"""Base exception for all Blog Writer errors."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
error_code: str,
|
||||
user_message: str,
|
||||
retry_suggested: bool = False,
|
||||
actionable_steps: Optional[List[str]] = None,
|
||||
error_category: ErrorCategory = ErrorCategory.SYSTEM_ERROR,
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
super().__init__(message)
|
||||
self.error_code = error_code
|
||||
self.user_message = user_message
|
||||
self.retry_suggested = retry_suggested
|
||||
self.actionable_steps = actionable_steps or []
|
||||
self.error_category = error_category
|
||||
self.context = context or {}
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert exception to dictionary for API responses."""
|
||||
return {
|
||||
"error_code": self.error_code,
|
||||
"user_message": self.user_message,
|
||||
"retry_suggested": self.retry_suggested,
|
||||
"actionable_steps": self.actionable_steps,
|
||||
"error_category": self.error_category.value,
|
||||
"context": self.context
|
||||
}
|
||||
|
||||
|
||||
class ResearchFailedException(BlogWriterException):
|
||||
"""Raised when research operation fails."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
user_message: str = "Research failed. Please try again with different keywords or check your internet connection.",
|
||||
retry_suggested: bool = True,
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_code="RESEARCH_FAILED",
|
||||
user_message=user_message,
|
||||
retry_suggested=retry_suggested,
|
||||
actionable_steps=[
|
||||
"Try with different keywords",
|
||||
"Check your internet connection",
|
||||
"Wait a few minutes and try again",
|
||||
"Contact support if the issue persists"
|
||||
],
|
||||
error_category=ErrorCategory.API_ERROR,
|
||||
context=context
|
||||
)
|
||||
|
||||
|
||||
class OutlineGenerationException(BlogWriterException):
|
||||
"""Raised when outline generation fails."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
user_message: str = "Outline generation failed. Please try again or adjust your research data.",
|
||||
retry_suggested: bool = True,
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_code="OUTLINE_GENERATION_FAILED",
|
||||
user_message=user_message,
|
||||
retry_suggested=retry_suggested,
|
||||
actionable_steps=[
|
||||
"Try generating outline again",
|
||||
"Check if research data is complete",
|
||||
"Try with different research keywords",
|
||||
"Contact support if the issue persists"
|
||||
],
|
||||
error_category=ErrorCategory.API_ERROR,
|
||||
context=context
|
||||
)
|
||||
|
||||
|
||||
class ContentGenerationException(BlogWriterException):
|
||||
"""Raised when content generation fails."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
user_message: str = "Content generation failed. Please try again or adjust your outline.",
|
||||
retry_suggested: bool = True,
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_code="CONTENT_GENERATION_FAILED",
|
||||
user_message=user_message,
|
||||
retry_suggested=retry_suggested,
|
||||
actionable_steps=[
|
||||
"Try generating content again",
|
||||
"Check if outline is complete",
|
||||
"Try with a shorter outline",
|
||||
"Contact support if the issue persists"
|
||||
],
|
||||
error_category=ErrorCategory.API_ERROR,
|
||||
context=context
|
||||
)
|
||||
|
||||
|
||||
class SEOAnalysisException(BlogWriterException):
|
||||
"""Raised when SEO analysis fails."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
user_message: str = "SEO analysis failed. Content was generated but SEO optimization is unavailable.",
|
||||
retry_suggested: bool = True,
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_code="SEO_ANALYSIS_FAILED",
|
||||
user_message=user_message,
|
||||
retry_suggested=retry_suggested,
|
||||
actionable_steps=[
|
||||
"Try SEO analysis again",
|
||||
"Continue without SEO optimization",
|
||||
"Contact support if the issue persists"
|
||||
],
|
||||
error_category=ErrorCategory.API_ERROR,
|
||||
context=context
|
||||
)
|
||||
|
||||
|
||||
class APIRateLimitException(BlogWriterException):
|
||||
"""Raised when API rate limit is exceeded."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
retry_after: Optional[int] = None,
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
retry_message = f"Rate limit exceeded. Please wait {retry_after} seconds before trying again." if retry_after else "Rate limit exceeded. Please wait a few minutes before trying again."
|
||||
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_code="API_RATE_LIMIT",
|
||||
user_message=retry_message,
|
||||
retry_suggested=True,
|
||||
actionable_steps=[
|
||||
f"Wait {retry_after or 60} seconds before trying again",
|
||||
"Reduce the frequency of requests",
|
||||
"Try again during off-peak hours",
|
||||
"Contact support if you need higher limits"
|
||||
],
|
||||
error_category=ErrorCategory.API_ERROR,
|
||||
context=context
|
||||
)
|
||||
|
||||
|
||||
class APITimeoutException(BlogWriterException):
|
||||
"""Raised when API request times out."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
timeout_seconds: int = 60,
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_code="API_TIMEOUT",
|
||||
user_message=f"Request timed out after {timeout_seconds} seconds. Please try again.",
|
||||
retry_suggested=True,
|
||||
actionable_steps=[
|
||||
"Try again with a shorter request",
|
||||
"Check your internet connection",
|
||||
"Try again during off-peak hours",
|
||||
"Contact support if the issue persists"
|
||||
],
|
||||
error_category=ErrorCategory.TRANSIENT,
|
||||
context=context
|
||||
)
|
||||
|
||||
|
||||
class ValidationException(BlogWriterException):
|
||||
"""Raised when input validation fails."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
field: str,
|
||||
user_message: str = "Invalid input provided. Please check your data and try again.",
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_code="VALIDATION_ERROR",
|
||||
user_message=user_message,
|
||||
retry_suggested=False,
|
||||
actionable_steps=[
|
||||
f"Check the {field} field",
|
||||
"Ensure all required fields are filled",
|
||||
"Verify data format is correct",
|
||||
"Contact support if you need help"
|
||||
],
|
||||
error_category=ErrorCategory.USER_ERROR,
|
||||
context=context
|
||||
)
|
||||
|
||||
|
||||
class CircuitBreakerOpenException(BlogWriterException):
|
||||
"""Raised when circuit breaker is open."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
retry_after: int,
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_code="CIRCUIT_BREAKER_OPEN",
|
||||
user_message=f"Service temporarily unavailable. Please wait {retry_after} seconds before trying again.",
|
||||
retry_suggested=True,
|
||||
actionable_steps=[
|
||||
f"Wait {retry_after} seconds before trying again",
|
||||
"Try again during off-peak hours",
|
||||
"Contact support if the issue persists"
|
||||
],
|
||||
error_category=ErrorCategory.TRANSIENT,
|
||||
context=context
|
||||
)
|
||||
|
||||
|
||||
class PartialSuccessException(BlogWriterException):
|
||||
"""Raised when operation partially succeeds."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
partial_results: Dict[str, Any],
|
||||
failed_operations: List[str],
|
||||
user_message: str = "Operation partially completed. Some sections were generated successfully.",
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_code="PARTIAL_SUCCESS",
|
||||
user_message=user_message,
|
||||
retry_suggested=True,
|
||||
actionable_steps=[
|
||||
"Review the generated content",
|
||||
"Retry failed sections individually",
|
||||
"Contact support if you need help with failed sections"
|
||||
],
|
||||
error_category=ErrorCategory.TRANSIENT,
|
||||
context=context
|
||||
)
|
||||
self.partial_results = partial_results
|
||||
self.failed_operations = failed_operations
|
||||
293
backend/services/blog_writer/logger_config.py
Normal file
293
backend/services/blog_writer/logger_config.py
Normal file
@@ -0,0 +1,293 @@
|
||||
"""
|
||||
Structured Logging Configuration for Blog Writer
|
||||
|
||||
Configures structured JSON logging with correlation IDs, context tracking,
|
||||
and performance metrics for the AI Blog Writer system.
|
||||
"""
|
||||
|
||||
import json
|
||||
import uuid
|
||||
import time
|
||||
import sys
|
||||
from typing import Dict, Any, Optional
|
||||
from contextvars import ContextVar
|
||||
from loguru import logger
|
||||
from datetime import datetime
|
||||
|
||||
# Context variables for request tracking
|
||||
correlation_id: ContextVar[str] = ContextVar('correlation_id', default='')
|
||||
user_id: ContextVar[str] = ContextVar('user_id', default='')
|
||||
task_id: ContextVar[str] = ContextVar('task_id', default='')
|
||||
operation: ContextVar[str] = ContextVar('operation', default='')
|
||||
|
||||
|
||||
class BlogWriterLogger:
|
||||
"""Enhanced logger for Blog Writer with structured logging and context tracking."""
|
||||
|
||||
def __init__(self):
|
||||
self._setup_logger()
|
||||
|
||||
def _setup_logger(self):
|
||||
"""Configure loguru with structured JSON output."""
|
||||
from utils.logger_utils import get_service_logger
|
||||
return get_service_logger("blog_writer")
|
||||
|
||||
def _json_formatter(self, record):
|
||||
"""Format log record as structured JSON."""
|
||||
# Extract context variables
|
||||
correlation_id_val = correlation_id.get('')
|
||||
user_id_val = user_id.get('')
|
||||
task_id_val = task_id.get('')
|
||||
operation_val = operation.get('')
|
||||
|
||||
# Build structured log entry
|
||||
log_entry = {
|
||||
"timestamp": datetime.fromtimestamp(record["time"].timestamp()).isoformat(),
|
||||
"level": record["level"].name,
|
||||
"logger": record["name"],
|
||||
"function": record["function"],
|
||||
"line": record["line"],
|
||||
"message": record["message"],
|
||||
"correlation_id": correlation_id_val,
|
||||
"user_id": user_id_val,
|
||||
"task_id": task_id_val,
|
||||
"operation": operation_val,
|
||||
"module": record["module"],
|
||||
"process_id": record["process"].id,
|
||||
"thread_id": record["thread"].id
|
||||
}
|
||||
|
||||
# Add exception info if present
|
||||
if record["exception"]:
|
||||
log_entry["exception"] = {
|
||||
"type": record["exception"].type.__name__,
|
||||
"value": str(record["exception"].value),
|
||||
"traceback": record["exception"].traceback
|
||||
}
|
||||
|
||||
# Add extra fields from record
|
||||
if record["extra"]:
|
||||
log_entry.update(record["extra"])
|
||||
|
||||
return json.dumps(log_entry, default=str)
|
||||
|
||||
def set_context(
|
||||
self,
|
||||
correlation_id_val: Optional[str] = None,
|
||||
user_id_val: Optional[str] = None,
|
||||
task_id_val: Optional[str] = None,
|
||||
operation_val: Optional[str] = None
|
||||
):
|
||||
"""Set context variables for the current request."""
|
||||
if correlation_id_val:
|
||||
correlation_id.set(correlation_id_val)
|
||||
if user_id_val:
|
||||
user_id.set(user_id_val)
|
||||
if task_id_val:
|
||||
task_id.set(task_id_val)
|
||||
if operation_val:
|
||||
operation.set(operation_val)
|
||||
|
||||
def clear_context(self):
|
||||
"""Clear all context variables."""
|
||||
correlation_id.set('')
|
||||
user_id.set('')
|
||||
task_id.set('')
|
||||
operation.set('')
|
||||
|
||||
def generate_correlation_id(self) -> str:
|
||||
"""Generate a new correlation ID."""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
def log_operation_start(
|
||||
self,
|
||||
operation_name: str,
|
||||
**kwargs
|
||||
):
|
||||
"""Log the start of an operation with context."""
|
||||
logger.info(
|
||||
f"Starting {operation_name}",
|
||||
extra={
|
||||
"operation": operation_name,
|
||||
"event_type": "operation_start",
|
||||
**kwargs
|
||||
}
|
||||
)
|
||||
|
||||
def log_operation_end(
|
||||
self,
|
||||
operation_name: str,
|
||||
duration_ms: float,
|
||||
success: bool = True,
|
||||
**kwargs
|
||||
):
|
||||
"""Log the end of an operation with performance metrics."""
|
||||
logger.info(
|
||||
f"Completed {operation_name} in {duration_ms:.2f}ms",
|
||||
extra={
|
||||
"operation": operation_name,
|
||||
"event_type": "operation_end",
|
||||
"duration_ms": duration_ms,
|
||||
"success": success,
|
||||
**kwargs
|
||||
}
|
||||
)
|
||||
|
||||
def log_api_call(
|
||||
self,
|
||||
api_name: str,
|
||||
endpoint: str,
|
||||
duration_ms: float,
|
||||
status_code: Optional[int] = None,
|
||||
token_usage: Optional[Dict[str, int]] = None,
|
||||
**kwargs
|
||||
):
|
||||
"""Log API call with performance metrics."""
|
||||
logger.info(
|
||||
f"API call to {api_name}",
|
||||
extra={
|
||||
"event_type": "api_call",
|
||||
"api_name": api_name,
|
||||
"endpoint": endpoint,
|
||||
"duration_ms": duration_ms,
|
||||
"status_code": status_code,
|
||||
"token_usage": token_usage,
|
||||
**kwargs
|
||||
}
|
||||
)
|
||||
|
||||
def log_error(
|
||||
self,
|
||||
error: Exception,
|
||||
operation: str,
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
"""Log error with full context."""
|
||||
logger.error(
|
||||
f"Error in {operation}: {str(error)}",
|
||||
extra={
|
||||
"event_type": "error",
|
||||
"operation": operation,
|
||||
"error_type": type(error).__name__,
|
||||
"error_message": str(error),
|
||||
"context": context or {}
|
||||
},
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
def log_performance(
|
||||
self,
|
||||
metric_name: str,
|
||||
value: float,
|
||||
unit: str = "ms",
|
||||
**kwargs
|
||||
):
|
||||
"""Log performance metrics."""
|
||||
logger.info(
|
||||
f"Performance metric: {metric_name} = {value} {unit}",
|
||||
extra={
|
||||
"event_type": "performance",
|
||||
"metric_name": metric_name,
|
||||
"value": value,
|
||||
"unit": unit,
|
||||
**kwargs
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# Global logger instance
|
||||
blog_writer_logger = BlogWriterLogger()
|
||||
|
||||
|
||||
def get_logger(name: str = "blog_writer"):
|
||||
"""Get a logger instance with the given name."""
|
||||
return logger.bind(name=name)
|
||||
|
||||
|
||||
def log_function_call(func_name: str, **kwargs):
|
||||
"""Decorator to log function calls with timing."""
|
||||
def decorator(func):
|
||||
async def async_wrapper(*args, **func_kwargs):
|
||||
start_time = time.time()
|
||||
correlation_id_val = correlation_id.get('')
|
||||
|
||||
blog_writer_logger.log_operation_start(
|
||||
func_name,
|
||||
function=func.__name__,
|
||||
correlation_id=correlation_id_val,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
try:
|
||||
result = await func(*args, **func_kwargs)
|
||||
duration_ms = (time.time() - start_time) * 1000
|
||||
|
||||
blog_writer_logger.log_operation_end(
|
||||
func_name,
|
||||
duration_ms,
|
||||
success=True,
|
||||
function=func.__name__,
|
||||
correlation_id=correlation_id_val
|
||||
)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
duration_ms = (time.time() - start_time) * 1000
|
||||
|
||||
blog_writer_logger.log_error(
|
||||
e,
|
||||
func_name,
|
||||
context={
|
||||
"function": func.__name__,
|
||||
"duration_ms": duration_ms,
|
||||
"correlation_id": correlation_id_val
|
||||
}
|
||||
)
|
||||
raise
|
||||
|
||||
def sync_wrapper(*args, **func_kwargs):
|
||||
start_time = time.time()
|
||||
correlation_id_val = correlation_id.get('')
|
||||
|
||||
blog_writer_logger.log_operation_start(
|
||||
func_name,
|
||||
function=func.__name__,
|
||||
correlation_id=correlation_id_val,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
try:
|
||||
result = func(*args, **func_kwargs)
|
||||
duration_ms = (time.time() - start_time) * 1000
|
||||
|
||||
blog_writer_logger.log_operation_end(
|
||||
func_name,
|
||||
duration_ms,
|
||||
success=True,
|
||||
function=func.__name__,
|
||||
correlation_id=correlation_id_val
|
||||
)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
duration_ms = (time.time() - start_time) * 1000
|
||||
|
||||
blog_writer_logger.log_error(
|
||||
e,
|
||||
func_name,
|
||||
context={
|
||||
"function": func.__name__,
|
||||
"duration_ms": duration_ms,
|
||||
"correlation_id": correlation_id_val
|
||||
}
|
||||
)
|
||||
raise
|
||||
|
||||
# Return appropriate wrapper based on function type
|
||||
import asyncio
|
||||
if asyncio.iscoroutinefunction(func):
|
||||
return async_wrapper
|
||||
else:
|
||||
return sync_wrapper
|
||||
|
||||
return decorator
|
||||
@@ -16,6 +16,7 @@ from models.blog_models import (
|
||||
GroundingSupport,
|
||||
Citation,
|
||||
)
|
||||
from services.blog_writer.logger_config import blog_writer_logger, log_function_call
|
||||
|
||||
from .keyword_analyzer import KeywordAnalyzer
|
||||
from .competitor_analyzer import CompetitorAnalyzer
|
||||
@@ -32,6 +33,7 @@ class ResearchService:
|
||||
self.content_angle_generator = ContentAngleGenerator()
|
||||
self.data_filter = ResearchDataFilter()
|
||||
|
||||
@log_function_call("research_operation")
|
||||
async def research(self, request: BlogResearchRequest) -> BlogResearchResponse:
|
||||
"""
|
||||
Stage 1: Research & Strategy (AI Orchestration)
|
||||
@@ -47,6 +49,16 @@ class ResearchService:
|
||||
industry = request.industry or (request.persona.industry if request.persona and request.persona.industry else "General")
|
||||
target_audience = getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'
|
||||
|
||||
# Log research parameters
|
||||
blog_writer_logger.log_operation_start(
|
||||
"research",
|
||||
topic=topic,
|
||||
industry=industry,
|
||||
target_audience=target_audience,
|
||||
keywords=request.keywords,
|
||||
keyword_count=len(request.keywords)
|
||||
)
|
||||
|
||||
# Check cache first for exact keyword match
|
||||
cached_result = research_cache.get_cached_result(
|
||||
keywords=request.keywords,
|
||||
@@ -56,10 +68,12 @@ class ResearchService:
|
||||
|
||||
if cached_result:
|
||||
logger.info(f"Returning cached research result for keywords: {request.keywords}")
|
||||
blog_writer_logger.log_operation_end("research", 0, success=True, cache_hit=True)
|
||||
return BlogResearchResponse(**cached_result)
|
||||
|
||||
# Cache miss - proceed with API call
|
||||
logger.info(f"Cache miss - making API call for keywords: {request.keywords}")
|
||||
blog_writer_logger.log_operation_start("gemini_api_call", api_name="gemini_grounded", operation="research")
|
||||
gemini = GeminiGroundedProvider()
|
||||
|
||||
# Single comprehensive research prompt - Gemini handles Google Search automatically
|
||||
@@ -82,11 +96,23 @@ class ResearchService:
|
||||
"""
|
||||
|
||||
# Single Gemini call with native Google Search grounding - no fallbacks
|
||||
import time
|
||||
api_start_time = time.time()
|
||||
gemini_result = await gemini.generate_grounded_content(
|
||||
prompt=research_prompt,
|
||||
content_type="research",
|
||||
max_tokens=2000
|
||||
)
|
||||
api_duration_ms = (time.time() - api_start_time) * 1000
|
||||
|
||||
# Log API call performance
|
||||
blog_writer_logger.log_api_call(
|
||||
"gemini_grounded",
|
||||
"generate_grounded_content",
|
||||
api_duration_ms,
|
||||
token_usage=gemini_result.get("token_usage", {}),
|
||||
content_length=len(gemini_result.get("content", ""))
|
||||
)
|
||||
|
||||
# Extract sources from grounding metadata
|
||||
sources = self._extract_sources_from_grounding(gemini_result)
|
||||
@@ -105,6 +131,17 @@ class ResearchService:
|
||||
suggested_angles = self.content_angle_generator.generate(content, topic, industry)
|
||||
|
||||
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
|
||||
|
||||
# Log analysis results
|
||||
blog_writer_logger.log_performance(
|
||||
"research_analysis",
|
||||
len(content),
|
||||
"characters",
|
||||
sources_count=len(sources),
|
||||
search_queries_count=len(search_queries),
|
||||
keyword_analysis_keys=len(keyword_analysis),
|
||||
suggested_angles_count=len(suggested_angles)
|
||||
)
|
||||
|
||||
# Create the response
|
||||
response = BlogResearchResponse(
|
||||
@@ -146,7 +183,47 @@ class ResearchService:
|
||||
error_message = str(e)
|
||||
logger.error(f"Research failed: {error_message}")
|
||||
|
||||
# Return a graceful failure response instead of raising
|
||||
# Log error with full context
|
||||
blog_writer_logger.log_error(
|
||||
e,
|
||||
"research",
|
||||
context={
|
||||
"topic": topic,
|
||||
"keywords": request.keywords,
|
||||
"industry": industry,
|
||||
"target_audience": target_audience
|
||||
}
|
||||
)
|
||||
|
||||
# Import custom exceptions for better error handling
|
||||
from services.blog_writer.exceptions import (
|
||||
ResearchFailedException,
|
||||
APIRateLimitException,
|
||||
APITimeoutException,
|
||||
ValidationException
|
||||
)
|
||||
|
||||
# Determine if this is a retryable error
|
||||
retry_suggested = True
|
||||
user_message = "Research failed. Please try again with different keywords or check your internet connection."
|
||||
|
||||
if isinstance(e, APIRateLimitException):
|
||||
retry_suggested = True
|
||||
user_message = f"Rate limit exceeded. Please wait {e.context.get('retry_after', 60)} seconds before trying again."
|
||||
elif isinstance(e, APITimeoutException):
|
||||
retry_suggested = True
|
||||
user_message = "Research request timed out. Please try again with a shorter query or check your internet connection."
|
||||
elif isinstance(e, ValidationException):
|
||||
retry_suggested = False
|
||||
user_message = "Invalid research request. Please check your input parameters and try again."
|
||||
elif "401" in error_message or "403" in error_message:
|
||||
retry_suggested = False
|
||||
user_message = "Authentication failed. Please check your API credentials."
|
||||
elif "400" in error_message:
|
||||
retry_suggested = False
|
||||
user_message = "Invalid request. Please check your input parameters."
|
||||
|
||||
# Return a graceful failure response with enhanced error information
|
||||
return BlogResearchResponse(
|
||||
success=False,
|
||||
sources=[],
|
||||
@@ -155,9 +232,18 @@ class ResearchService:
|
||||
suggested_angles=[],
|
||||
search_widget="",
|
||||
search_queries=[],
|
||||
error_message=error_message
|
||||
error_message=user_message,
|
||||
retry_suggested=retry_suggested,
|
||||
error_code=getattr(e, 'error_code', 'RESEARCH_FAILED'),
|
||||
actionable_steps=getattr(e, 'actionable_steps', [
|
||||
"Try with different keywords",
|
||||
"Check your internet connection",
|
||||
"Wait a few minutes and try again",
|
||||
"Contact support if the issue persists"
|
||||
])
|
||||
)
|
||||
|
||||
@log_function_call("research_with_progress")
|
||||
async def research_with_progress(self, request: BlogResearchRequest, task_id: str) -> BlogResearchResponse:
|
||||
"""
|
||||
Research method with progress updates for real-time feedback.
|
||||
@@ -291,7 +377,47 @@ class ResearchService:
|
||||
error_message = str(e)
|
||||
logger.error(f"Research failed: {error_message}")
|
||||
|
||||
# Return a graceful failure response instead of raising
|
||||
# Log error with full context
|
||||
blog_writer_logger.log_error(
|
||||
e,
|
||||
"research",
|
||||
context={
|
||||
"topic": topic,
|
||||
"keywords": request.keywords,
|
||||
"industry": industry,
|
||||
"target_audience": target_audience
|
||||
}
|
||||
)
|
||||
|
||||
# Import custom exceptions for better error handling
|
||||
from services.blog_writer.exceptions import (
|
||||
ResearchFailedException,
|
||||
APIRateLimitException,
|
||||
APITimeoutException,
|
||||
ValidationException
|
||||
)
|
||||
|
||||
# Determine if this is a retryable error
|
||||
retry_suggested = True
|
||||
user_message = "Research failed. Please try again with different keywords or check your internet connection."
|
||||
|
||||
if isinstance(e, APIRateLimitException):
|
||||
retry_suggested = True
|
||||
user_message = f"Rate limit exceeded. Please wait {e.context.get('retry_after', 60)} seconds before trying again."
|
||||
elif isinstance(e, APITimeoutException):
|
||||
retry_suggested = True
|
||||
user_message = "Research request timed out. Please try again with a shorter query or check your internet connection."
|
||||
elif isinstance(e, ValidationException):
|
||||
retry_suggested = False
|
||||
user_message = "Invalid research request. Please check your input parameters and try again."
|
||||
elif "401" in error_message or "403" in error_message:
|
||||
retry_suggested = False
|
||||
user_message = "Authentication failed. Please check your API credentials."
|
||||
elif "400" in error_message:
|
||||
retry_suggested = False
|
||||
user_message = "Invalid request. Please check your input parameters."
|
||||
|
||||
# Return a graceful failure response with enhanced error information
|
||||
return BlogResearchResponse(
|
||||
success=False,
|
||||
sources=[],
|
||||
@@ -300,7 +426,15 @@ class ResearchService:
|
||||
suggested_angles=[],
|
||||
search_widget="",
|
||||
search_queries=[],
|
||||
error_message=error_message
|
||||
error_message=user_message,
|
||||
retry_suggested=retry_suggested,
|
||||
error_code=getattr(e, 'error_code', 'RESEARCH_FAILED'),
|
||||
actionable_steps=getattr(e, 'actionable_steps', [
|
||||
"Try with different keywords",
|
||||
"Check your internet connection",
|
||||
"Wait a few minutes and try again",
|
||||
"Contact support if the issue persists"
|
||||
])
|
||||
)
|
||||
|
||||
def _extract_sources_from_grounding(self, gemini_result: Dict[str, Any]) -> List[ResearchSource]:
|
||||
|
||||
223
backend/services/blog_writer/retry_utils.py
Normal file
223
backend/services/blog_writer/retry_utils.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""
|
||||
Enhanced Retry Utilities for Blog Writer
|
||||
|
||||
Provides advanced retry logic with exponential backoff, jitter, retry budgets,
|
||||
and specific error code handling for different types of API failures.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import random
|
||||
import time
|
||||
from typing import Callable, Any, Optional, Dict, List
|
||||
from dataclasses import dataclass
|
||||
from loguru import logger
|
||||
|
||||
from .exceptions import APIRateLimitException, APITimeoutException
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetryConfig:
|
||||
"""Configuration for retry behavior."""
|
||||
max_attempts: int = 3
|
||||
base_delay: float = 1.0
|
||||
max_delay: float = 60.0
|
||||
exponential_base: float = 2.0
|
||||
jitter: bool = True
|
||||
max_total_time: float = 300.0 # 5 minutes max total time
|
||||
retryable_errors: List[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.retryable_errors is None:
|
||||
self.retryable_errors = [
|
||||
"503", "502", "504", # Server errors
|
||||
"429", # Rate limit
|
||||
"timeout", "timed out",
|
||||
"connection", "network",
|
||||
"overloaded", "busy"
|
||||
]
|
||||
|
||||
|
||||
class RetryBudget:
|
||||
"""Tracks retry budget to prevent excessive retries."""
|
||||
|
||||
def __init__(self, max_total_time: float):
|
||||
self.max_total_time = max_total_time
|
||||
self.start_time = time.time()
|
||||
self.used_time = 0.0
|
||||
|
||||
def can_retry(self) -> bool:
|
||||
"""Check if we can still retry within budget."""
|
||||
self.used_time = time.time() - self.start_time
|
||||
return self.used_time < self.max_total_time
|
||||
|
||||
def remaining_time(self) -> float:
|
||||
"""Get remaining time in budget."""
|
||||
return max(0, self.max_total_time - self.used_time)
|
||||
|
||||
|
||||
def is_retryable_error(error: Exception, retryable_errors: List[str]) -> bool:
|
||||
"""Check if an error is retryable based on error message patterns."""
|
||||
error_str = str(error).lower()
|
||||
return any(pattern.lower() in error_str for pattern in retryable_errors)
|
||||
|
||||
|
||||
def calculate_delay(attempt: int, config: RetryConfig) -> float:
|
||||
"""Calculate delay for retry attempt with exponential backoff and jitter."""
|
||||
# Exponential backoff
|
||||
delay = config.base_delay * (config.exponential_base ** attempt)
|
||||
|
||||
# Cap at max delay
|
||||
delay = min(delay, config.max_delay)
|
||||
|
||||
# Add jitter to prevent thundering herd
|
||||
if config.jitter:
|
||||
jitter_range = delay * 0.1 # 10% jitter
|
||||
delay += random.uniform(-jitter_range, jitter_range)
|
||||
|
||||
return max(0, delay)
|
||||
|
||||
|
||||
async def retry_with_backoff(
|
||||
func: Callable,
|
||||
config: Optional[RetryConfig] = None,
|
||||
operation_name: str = "operation",
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
) -> Any:
|
||||
"""
|
||||
Retry a function with enhanced backoff and budget management.
|
||||
|
||||
Args:
|
||||
func: Async function to retry
|
||||
config: Retry configuration
|
||||
operation_name: Name of operation for logging
|
||||
context: Additional context for logging
|
||||
|
||||
Returns:
|
||||
Function result
|
||||
|
||||
Raises:
|
||||
Last exception if all retries fail
|
||||
"""
|
||||
config = config or RetryConfig()
|
||||
budget = RetryBudget(config.max_total_time)
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(config.max_attempts):
|
||||
try:
|
||||
# Check if we're still within budget
|
||||
if not budget.can_retry():
|
||||
logger.warning(f"Retry budget exceeded for {operation_name} after {budget.used_time:.2f}s")
|
||||
break
|
||||
|
||||
# Execute the function
|
||||
result = await func()
|
||||
logger.info(f"{operation_name} succeeded on attempt {attempt + 1}")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
last_exception = e
|
||||
|
||||
# Check if this is the last attempt
|
||||
if attempt == config.max_attempts - 1:
|
||||
logger.error(f"{operation_name} failed after {config.max_attempts} attempts: {str(e)}")
|
||||
break
|
||||
|
||||
# Check if error is retryable
|
||||
if not is_retryable_error(e, config.retryable_errors):
|
||||
logger.warning(f"{operation_name} failed with non-retryable error: {str(e)}")
|
||||
break
|
||||
|
||||
# Calculate delay and wait
|
||||
delay = calculate_delay(attempt, config)
|
||||
remaining_time = budget.remaining_time()
|
||||
|
||||
# Don't wait longer than remaining budget
|
||||
if delay > remaining_time:
|
||||
logger.warning(f"Delay {delay:.2f}s exceeds remaining budget {remaining_time:.2f}s for {operation_name}")
|
||||
break
|
||||
|
||||
logger.warning(
|
||||
f"{operation_name} attempt {attempt + 1} failed: {str(e)}. "
|
||||
f"Retrying in {delay:.2f}s (attempt {attempt + 2}/{config.max_attempts})"
|
||||
)
|
||||
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
# If we get here, all retries failed
|
||||
if last_exception:
|
||||
# Enhance exception with retry context
|
||||
if isinstance(last_exception, Exception):
|
||||
error_str = str(last_exception)
|
||||
if "429" in error_str or "rate limit" in error_str.lower():
|
||||
raise APIRateLimitException(
|
||||
f"Rate limit exceeded after {config.max_attempts} attempts",
|
||||
retry_after=int(delay * 2), # Suggest waiting longer
|
||||
context=context
|
||||
)
|
||||
elif "timeout" in error_str.lower():
|
||||
raise APITimeoutException(
|
||||
f"Request timed out after {config.max_attempts} attempts",
|
||||
timeout_seconds=int(config.max_total_time),
|
||||
context=context
|
||||
)
|
||||
|
||||
raise last_exception
|
||||
|
||||
raise Exception(f"{operation_name} failed after {config.max_attempts} attempts")
|
||||
|
||||
|
||||
def retry_decorator(
|
||||
config: Optional[RetryConfig] = None,
|
||||
operation_name: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Decorator to add retry logic to async functions.
|
||||
|
||||
Args:
|
||||
config: Retry configuration
|
||||
operation_name: Name of operation for logging
|
||||
"""
|
||||
def decorator(func: Callable) -> Callable:
|
||||
async def wrapper(*args, **kwargs):
|
||||
op_name = operation_name or func.__name__
|
||||
return await retry_with_backoff(
|
||||
lambda: func(*args, **kwargs),
|
||||
config=config,
|
||||
operation_name=op_name
|
||||
)
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
# Predefined retry configurations for different operation types
|
||||
RESEARCH_RETRY_CONFIG = RetryConfig(
|
||||
max_attempts=3,
|
||||
base_delay=2.0,
|
||||
max_delay=30.0,
|
||||
max_total_time=180.0, # 3 minutes for research
|
||||
retryable_errors=["503", "429", "timeout", "overloaded", "connection"]
|
||||
)
|
||||
|
||||
OUTLINE_RETRY_CONFIG = RetryConfig(
|
||||
max_attempts=2,
|
||||
base_delay=1.5,
|
||||
max_delay=20.0,
|
||||
max_total_time=120.0, # 2 minutes for outline
|
||||
retryable_errors=["503", "429", "timeout", "overloaded"]
|
||||
)
|
||||
|
||||
CONTENT_RETRY_CONFIG = RetryConfig(
|
||||
max_attempts=3,
|
||||
base_delay=1.0,
|
||||
max_delay=15.0,
|
||||
max_total_time=90.0, # 1.5 minutes for content
|
||||
retryable_errors=["503", "429", "timeout", "overloaded"]
|
||||
)
|
||||
|
||||
SEO_RETRY_CONFIG = RetryConfig(
|
||||
max_attempts=2,
|
||||
base_delay=1.0,
|
||||
max_delay=10.0,
|
||||
max_total_time=60.0, # 1 minute for SEO
|
||||
retryable_errors=["503", "429", "timeout"]
|
||||
)
|
||||
Reference in New Issue
Block a user