SEO Dashboard Fixes and content planning refactoring

This commit is contained in:
ajaysi
2025-10-29 17:10:48 +05:30
parent 5866f49325
commit 4431cd9848
92 changed files with 7046 additions and 1940 deletions

View File

@@ -0,0 +1,209 @@
"""
Circuit Breaker Pattern for Blog Writer API Calls
Implements circuit breaker pattern to prevent cascading failures when external APIs
are experiencing issues. Tracks failure rates and automatically disables calls when
threshold is exceeded, with auto-recovery after cooldown period.
"""
import time
import asyncio
from typing import Callable, Any, Optional, Dict
from enum import Enum
from dataclasses import dataclass
from loguru import logger
from .exceptions import CircuitBreakerOpenException
class CircuitState(Enum):
"""Circuit breaker states."""
CLOSED = "closed" # Normal operation
OPEN = "open" # Circuit is open, calls are blocked
HALF_OPEN = "half_open" # Testing if service is back
@dataclass
class CircuitBreakerConfig:
"""Configuration for circuit breaker."""
failure_threshold: int = 5 # Number of failures before opening
recovery_timeout: int = 60 # Seconds to wait before trying again
success_threshold: int = 3 # Successes needed to close from half-open
timeout: int = 30 # Timeout for individual calls
max_failures_per_minute: int = 10 # Max failures per minute before opening
class CircuitBreaker:
"""Circuit breaker implementation for API calls."""
def __init__(self, name: str, config: Optional[CircuitBreakerConfig] = None):
self.name = name
self.config = config or CircuitBreakerConfig()
self.state = CircuitState.CLOSED
self.failure_count = 0
self.success_count = 0
self.last_failure_time = 0
self.last_success_time = 0
self.failure_times = [] # Track failure times for rate limiting
self._lock = asyncio.Lock()
async def call(self, func: Callable, *args, **kwargs) -> Any:
"""
Execute function with circuit breaker protection.
Args:
func: Function to execute
*args: Function arguments
**kwargs: Function keyword arguments
Returns:
Function result
Raises:
CircuitBreakerOpenException: If circuit is open
"""
async with self._lock:
# Check if circuit should be opened due to rate limiting
await self._check_rate_limit()
# Check circuit state
if self.state == CircuitState.OPEN:
if self._should_attempt_reset():
self.state = CircuitState.HALF_OPEN
self.success_count = 0
logger.info(f"Circuit breaker {self.name} transitioning to HALF_OPEN")
else:
retry_after = int(self.config.recovery_timeout - (time.time() - self.last_failure_time))
raise CircuitBreakerOpenException(
f"Circuit breaker {self.name} is OPEN",
retry_after=max(0, retry_after),
context={"circuit_name": self.name, "state": self.state.value}
)
try:
# Execute the function with timeout
result = await asyncio.wait_for(
func(*args, **kwargs),
timeout=self.config.timeout
)
# Record success
await self._record_success()
return result
except asyncio.TimeoutError:
await self._record_failure("timeout")
raise
except Exception as e:
await self._record_failure(str(e))
raise
async def _check_rate_limit(self):
"""Check if failure rate exceeds threshold."""
current_time = time.time()
# Remove failures older than 1 minute
self.failure_times = [
failure_time for failure_time in self.failure_times
if current_time - failure_time < 60
]
# Check if we've exceeded the rate limit
if len(self.failure_times) >= self.config.max_failures_per_minute:
self.state = CircuitState.OPEN
self.last_failure_time = current_time
logger.warning(f"Circuit breaker {self.name} opened due to rate limit: {len(self.failure_times)} failures in last minute")
def _should_attempt_reset(self) -> bool:
"""Check if enough time has passed to attempt reset."""
return time.time() - self.last_failure_time >= self.config.recovery_timeout
async def _record_success(self):
"""Record a successful call."""
async with self._lock:
self.last_success_time = time.time()
if self.state == CircuitState.HALF_OPEN:
self.success_count += 1
if self.success_count >= self.config.success_threshold:
self.state = CircuitState.CLOSED
self.failure_count = 0
logger.info(f"Circuit breaker {self.name} closed after {self.success_count} successes")
elif self.state == CircuitState.CLOSED:
# Reset failure count on success
self.failure_count = 0
async def _record_failure(self, error: str):
"""Record a failed call."""
async with self._lock:
current_time = time.time()
self.failure_count += 1
self.last_failure_time = current_time
self.failure_times.append(current_time)
logger.warning(f"Circuit breaker {self.name} recorded failure #{self.failure_count}: {error}")
# Open circuit if threshold exceeded
if self.failure_count >= self.config.failure_threshold:
self.state = CircuitState.OPEN
logger.error(f"Circuit breaker {self.name} opened after {self.failure_count} failures")
def get_state(self) -> Dict[str, Any]:
"""Get current circuit breaker state."""
return {
"name": self.name,
"state": self.state.value,
"failure_count": self.failure_count,
"success_count": self.success_count,
"last_failure_time": self.last_failure_time,
"last_success_time": self.last_success_time,
"failures_in_last_minute": len([
t for t in self.failure_times
if time.time() - t < 60
])
}
class CircuitBreakerManager:
"""Manages multiple circuit breakers."""
def __init__(self):
self._breakers: Dict[str, CircuitBreaker] = {}
def get_breaker(self, name: str, config: Optional[CircuitBreakerConfig] = None) -> CircuitBreaker:
"""Get or create a circuit breaker."""
if name not in self._breakers:
self._breakers[name] = CircuitBreaker(name, config)
return self._breakers[name]
def get_all_states(self) -> Dict[str, Dict[str, Any]]:
"""Get states of all circuit breakers."""
return {name: breaker.get_state() for name, breaker in self._breakers.items()}
def reset_breaker(self, name: str):
"""Reset a circuit breaker to closed state."""
if name in self._breakers:
self._breakers[name].state = CircuitState.CLOSED
self._breakers[name].failure_count = 0
self._breakers[name].success_count = 0
logger.info(f"Circuit breaker {name} manually reset")
# Global circuit breaker manager
circuit_breaker_manager = CircuitBreakerManager()
def circuit_breaker(name: str, config: Optional[CircuitBreakerConfig] = None):
"""
Decorator to add circuit breaker protection to async functions.
Args:
name: Circuit breaker name
config: Circuit breaker configuration
"""
def decorator(func: Callable) -> Callable:
async def wrapper(*args, **kwargs):
breaker = circuit_breaker_manager.get_breaker(name, config)
return await breaker.call(func, *args, **kwargs)
return wrapper
return decorator

View File

@@ -0,0 +1,536 @@
"""
Database-Backed Task Manager for Blog Writer
Replaces in-memory task storage with persistent database storage for
reliability, recovery, and analytics.
"""
import asyncio
import uuid
import json
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
from loguru import logger
from services.blog_writer.logger_config import blog_writer_logger, log_function_call
from models.blog_models import (
BlogResearchRequest,
BlogOutlineRequest,
MediumBlogGenerateRequest,
MediumBlogGenerateResult,
)
from services.blog_writer.blog_service import BlogWriterService
class DatabaseTaskManager:
"""Database-backed task manager for blog writer operations."""
def __init__(self, db_connection):
self.db = db_connection
self.service = BlogWriterService()
self._cleanup_task = None
self._start_cleanup_task()
def _start_cleanup_task(self):
"""Start background task to clean up old completed tasks."""
async def cleanup_loop():
while True:
try:
await self.cleanup_old_tasks()
await asyncio.sleep(3600) # Run every hour
except Exception as e:
logger.error(f"Error in cleanup task: {e}")
await asyncio.sleep(300) # Wait 5 minutes on error
self._cleanup_task = asyncio.create_task(cleanup_loop())
@log_function_call("create_task")
async def create_task(
self,
user_id: str,
task_type: str,
request_data: Dict[str, Any],
correlation_id: Optional[str] = None,
operation: Optional[str] = None,
priority: int = 0,
max_retries: int = 3,
metadata: Optional[Dict[str, Any]] = None
) -> str:
"""Create a new task in the database."""
task_id = str(uuid.uuid4())
correlation_id = correlation_id or str(uuid.uuid4())
query = """
INSERT INTO blog_writer_tasks
(id, user_id, task_type, status, request_data, correlation_id, operation, priority, max_retries, metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
"""
await self.db.execute(
query,
task_id,
user_id,
task_type,
'pending',
json.dumps(request_data),
correlation_id,
operation,
priority,
max_retries,
json.dumps(metadata or {})
)
blog_writer_logger.log_operation_start(
"task_created",
task_id=task_id,
task_type=task_type,
user_id=user_id,
correlation_id=correlation_id
)
return task_id
@log_function_call("get_task_status")
async def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
"""Get the status of a task."""
query = """
SELECT
id, user_id, task_type, status, request_data, result_data, error_data,
created_at, updated_at, completed_at, correlation_id, operation,
retry_count, max_retries, priority, metadata
FROM blog_writer_tasks
WHERE id = $1
"""
row = await self.db.fetchrow(query, task_id)
if not row:
return None
# Get progress messages
progress_query = """
SELECT timestamp, message, percentage, progress_type, metadata
FROM blog_writer_task_progress
WHERE task_id = $1
ORDER BY timestamp DESC
LIMIT 10
"""
progress_rows = await self.db.fetch(progress_query, task_id)
progress_messages = [
{
"timestamp": row["timestamp"].isoformat(),
"message": row["message"],
"percentage": float(row["percentage"]),
"progress_type": row["progress_type"],
"metadata": row["metadata"] or {}
}
for row in progress_rows
]
return {
"task_id": row["id"],
"user_id": row["user_id"],
"task_type": row["task_type"],
"status": row["status"],
"created_at": row["created_at"].isoformat(),
"updated_at": row["updated_at"].isoformat(),
"completed_at": row["completed_at"].isoformat() if row["completed_at"] else None,
"correlation_id": row["correlation_id"],
"operation": row["operation"],
"retry_count": row["retry_count"],
"max_retries": row["max_retries"],
"priority": row["priority"],
"progress_messages": progress_messages,
"result": json.loads(row["result_data"]) if row["result_data"] else None,
"error": json.loads(row["error_data"]) if row["error_data"] else None,
"metadata": json.loads(row["metadata"]) if row["metadata"] else {}
}
@log_function_call("update_task_status")
async def update_task_status(
self,
task_id: str,
status: str,
result_data: Optional[Dict[str, Any]] = None,
error_data: Optional[Dict[str, Any]] = None,
completed_at: Optional[datetime] = None
):
"""Update task status and data."""
query = """
UPDATE blog_writer_tasks
SET status = $2, result_data = $3, error_data = $4, completed_at = $5, updated_at = NOW()
WHERE id = $1
"""
await self.db.execute(
query,
task_id,
status,
json.dumps(result_data) if result_data else None,
json.dumps(error_data) if error_data else None,
completed_at or (datetime.now() if status in ['completed', 'failed', 'cancelled'] else None)
)
blog_writer_logger.log_operation_end(
"task_status_updated",
0,
success=status in ['completed', 'cancelled'],
task_id=task_id,
status=status
)
@log_function_call("update_progress")
async def update_progress(
self,
task_id: str,
message: str,
percentage: Optional[float] = None,
progress_type: str = "info",
metadata: Optional[Dict[str, Any]] = None
):
"""Update task progress."""
# Insert progress record
progress_query = """
INSERT INTO blog_writer_task_progress
(task_id, message, percentage, progress_type, metadata)
VALUES ($1, $2, $3, $4, $5)
"""
await self.db.execute(
progress_query,
task_id,
message,
percentage or 0.0,
progress_type,
json.dumps(metadata or {})
)
# Update task status to running if it was pending
status_query = """
UPDATE blog_writer_tasks
SET status = 'running', updated_at = NOW()
WHERE id = $1 AND status = 'pending'
"""
await self.db.execute(status_query, task_id)
logger.info(f"Progress update for task {task_id}: {message}")
@log_function_call("record_metrics")
async def record_metrics(
self,
task_id: str,
operation: str,
duration_ms: int,
token_usage: Optional[Dict[str, int]] = None,
api_calls: int = 0,
cache_hits: int = 0,
cache_misses: int = 0,
error_count: int = 0,
metadata: Optional[Dict[str, Any]] = None
):
"""Record performance metrics for a task."""
query = """
INSERT INTO blog_writer_task_metrics
(task_id, operation, duration_ms, token_usage, api_calls, cache_hits, cache_misses, error_count, metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
"""
await self.db.execute(
query,
task_id,
operation,
duration_ms,
json.dumps(token_usage) if token_usage else None,
api_calls,
cache_hits,
cache_misses,
error_count,
json.dumps(metadata or {})
)
blog_writer_logger.log_performance(
f"task_metrics_{operation}",
duration_ms,
"ms",
task_id=task_id,
operation=operation,
api_calls=api_calls,
cache_hits=cache_hits,
cache_misses=cache_misses
)
@log_function_call("increment_retry_count")
async def increment_retry_count(self, task_id: str) -> int:
"""Increment retry count and return new count."""
query = """
UPDATE blog_writer_tasks
SET retry_count = retry_count + 1, updated_at = NOW()
WHERE id = $1
RETURNING retry_count
"""
result = await self.db.fetchval(query, task_id)
return result or 0
@log_function_call("cleanup_old_tasks")
async def cleanup_old_tasks(self, days: int = 7) -> int:
"""Clean up old completed tasks."""
query = """
DELETE FROM blog_writer_tasks
WHERE status IN ('completed', 'failed', 'cancelled')
AND created_at < NOW() - INTERVAL '%s days'
""" % days
result = await self.db.execute(query)
deleted_count = int(result.split()[-1]) if result else 0
if deleted_count > 0:
logger.info(f"Cleaned up {deleted_count} old blog writer tasks")
return deleted_count
@log_function_call("get_user_tasks")
async def get_user_tasks(
self,
user_id: str,
limit: int = 50,
offset: int = 0,
status_filter: Optional[str] = None
) -> List[Dict[str, Any]]:
"""Get tasks for a specific user."""
query = """
SELECT
id, task_type, status, created_at, updated_at, completed_at,
operation, retry_count, max_retries, priority
FROM blog_writer_tasks
WHERE user_id = $1
"""
params = [user_id]
param_count = 1
if status_filter:
param_count += 1
query += f" AND status = ${param_count}"
params.append(status_filter)
query += f" ORDER BY created_at DESC LIMIT ${param_count + 1} OFFSET ${param_count + 2}"
params.extend([limit, offset])
rows = await self.db.fetch(query, *params)
return [
{
"task_id": row["id"],
"task_type": row["task_type"],
"status": row["status"],
"created_at": row["created_at"].isoformat(),
"updated_at": row["updated_at"].isoformat(),
"completed_at": row["completed_at"].isoformat() if row["completed_at"] else None,
"operation": row["operation"],
"retry_count": row["retry_count"],
"max_retries": row["max_retries"],
"priority": row["priority"]
}
for row in rows
]
@log_function_call("get_task_analytics")
async def get_task_analytics(self, days: int = 7) -> Dict[str, Any]:
"""Get task analytics for monitoring."""
query = """
SELECT
task_type,
status,
COUNT(*) as task_count,
AVG(EXTRACT(EPOCH FROM (COALESCE(completed_at, NOW()) - created_at))) as avg_duration_seconds,
COUNT(CASE WHEN status = 'completed' THEN 1 END) as completed_count,
COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed_count,
COUNT(CASE WHEN status = 'running' THEN 1 END) as running_count
FROM blog_writer_tasks
WHERE created_at >= NOW() - INTERVAL '%s days'
GROUP BY task_type, status
ORDER BY task_type, status
""" % days
rows = await self.db.fetch(query)
analytics = {
"summary": {
"total_tasks": sum(row["task_count"] for row in rows),
"completed_tasks": sum(row["completed_count"] for row in rows),
"failed_tasks": sum(row["failed_count"] for row in rows),
"running_tasks": sum(row["running_count"] for row in rows)
},
"by_task_type": {},
"by_status": {}
}
for row in rows:
task_type = row["task_type"]
status = row["status"]
if task_type not in analytics["by_task_type"]:
analytics["by_task_type"][task_type] = {}
analytics["by_task_type"][task_type][status] = {
"count": row["task_count"],
"avg_duration_seconds": float(row["avg_duration_seconds"]) if row["avg_duration_seconds"] else 0
}
if status not in analytics["by_status"]:
analytics["by_status"][status] = 0
analytics["by_status"][status] += row["task_count"]
return analytics
# Task execution methods (same as original but with database persistence)
async def start_research_task(self, request: BlogResearchRequest, user_id: str) -> str:
"""Start a research operation and return a task ID."""
task_id = await self.create_task(
user_id=user_id,
task_type="research",
request_data=request.dict(),
operation="research_operation"
)
# Start the research operation in the background
asyncio.create_task(self._run_research_task(task_id, request))
return task_id
async def start_outline_task(self, request: BlogOutlineRequest, user_id: str) -> str:
"""Start an outline generation operation and return a task ID."""
task_id = await self.create_task(
user_id=user_id,
task_type="outline",
request_data=request.dict(),
operation="outline_generation"
)
# Start the outline generation operation in the background
asyncio.create_task(self._run_outline_generation_task(task_id, request))
return task_id
async def start_medium_generation_task(self, request: MediumBlogGenerateRequest, user_id: str) -> str:
"""Start a medium blog generation task."""
task_id = await self.create_task(
user_id=user_id,
task_type="medium_generation",
request_data=request.dict(),
operation="medium_blog_generation"
)
asyncio.create_task(self._run_medium_generation_task(task_id, request))
return task_id
async def _run_research_task(self, task_id: str, request: BlogResearchRequest):
"""Background task to run research and update status with progress messages."""
try:
await self.update_progress(task_id, "🔍 Starting research operation...", 0)
# Run the actual research with progress updates
result = await self.service.research_with_progress(request, task_id)
# Check if research failed gracefully
if not result.success:
await self.update_progress(
task_id,
f"❌ Research failed: {result.error_message or 'Unknown error'}",
100,
"error"
)
await self.update_task_status(
task_id,
"failed",
error_data={
"error_message": result.error_message,
"retry_suggested": result.retry_suggested,
"error_code": result.error_code,
"actionable_steps": result.actionable_steps
}
)
else:
await self.update_progress(
task_id,
f"✅ Research completed successfully! Found {len(result.sources)} sources and {len(result.search_queries or [])} search queries.",
100,
"success"
)
await self.update_task_status(
task_id,
"completed",
result_data=result.dict()
)
except Exception as e:
await self.update_progress(task_id, f"❌ Research failed with error: {str(e)}", 100, "error")
await self.update_task_status(
task_id,
"failed",
error_data={"error_message": str(e), "error_type": type(e).__name__}
)
blog_writer_logger.log_error(e, "research_task", context={"task_id": task_id})
async def _run_outline_generation_task(self, task_id: str, request: BlogOutlineRequest):
"""Background task to run outline generation and update status with progress messages."""
try:
await self.update_progress(task_id, "🧩 Starting outline generation...", 0)
# Run the actual outline generation with progress updates
result = await self.service.generate_outline_with_progress(request, task_id)
await self.update_progress(
task_id,
f"✅ Outline generated successfully! Created {len(result.outline)} sections with {len(result.title_options)} title options.",
100,
"success"
)
await self.update_task_status(task_id, "completed", result_data=result.dict())
except Exception as e:
await self.update_progress(task_id, f"❌ Outline generation failed: {str(e)}", 100, "error")
await self.update_task_status(
task_id,
"failed",
error_data={"error_message": str(e), "error_type": type(e).__name__}
)
blog_writer_logger.log_error(e, "outline_generation_task", context={"task_id": task_id})
async def _run_medium_generation_task(self, task_id: str, request: MediumBlogGenerateRequest):
"""Background task to generate a medium blog using a single structured JSON call."""
try:
await self.update_progress(task_id, "📦 Packaging outline and metadata...", 0)
# Basic guard: respect global target words
total_target = int(request.globalTargetWords or 1000)
if total_target > 1000:
raise ValueError("Global target words exceed 1000; medium generation not allowed")
result: MediumBlogGenerateResult = await self.service.generate_medium_blog_with_progress(
request,
task_id,
)
if not result or not getattr(result, "sections", None):
raise ValueError("Empty generation result from model")
# Check if result came from cache
cache_hit = getattr(result, 'cache_hit', False)
if cache_hit:
await self.update_progress(task_id, "⚡ Found cached content - loading instantly!", 100, "success")
else:
await self.update_progress(task_id, "🤖 Generated fresh content with AI...", 100, "success")
await self.update_task_status(task_id, "completed", result_data=result.dict())
except Exception as e:
await self.update_progress(task_id, f"❌ Medium generation failed: {str(e)}", 100, "error")
await self.update_task_status(
task_id,
"failed",
error_data={"error_message": str(e), "error_type": type(e).__name__}
)
blog_writer_logger.log_error(e, "medium_generation_task", context={"task_id": task_id})

View File

@@ -0,0 +1,285 @@
"""
Blog Writer Exception Hierarchy
Defines custom exception classes for different failure modes in the AI Blog Writer.
Each exception includes error_code, user_message, retry_suggested, and actionable_steps.
"""
from typing import List, Optional, Dict, Any
from enum import Enum
class ErrorCategory(Enum):
"""Categories for error classification."""
TRANSIENT = "transient" # Temporary issues, retry recommended
PERMANENT = "permanent" # Permanent issues, no retry
USER_ERROR = "user_error" # User input issues, fix input
API_ERROR = "api_error" # External API issues
VALIDATION_ERROR = "validation_error" # Data validation issues
SYSTEM_ERROR = "system_error" # Internal system issues
class BlogWriterException(Exception):
"""Base exception for all Blog Writer errors."""
def __init__(
self,
message: str,
error_code: str,
user_message: str,
retry_suggested: bool = False,
actionable_steps: Optional[List[str]] = None,
error_category: ErrorCategory = ErrorCategory.SYSTEM_ERROR,
context: Optional[Dict[str, Any]] = None
):
super().__init__(message)
self.error_code = error_code
self.user_message = user_message
self.retry_suggested = retry_suggested
self.actionable_steps = actionable_steps or []
self.error_category = error_category
self.context = context or {}
def to_dict(self) -> Dict[str, Any]:
"""Convert exception to dictionary for API responses."""
return {
"error_code": self.error_code,
"user_message": self.user_message,
"retry_suggested": self.retry_suggested,
"actionable_steps": self.actionable_steps,
"error_category": self.error_category.value,
"context": self.context
}
class ResearchFailedException(BlogWriterException):
"""Raised when research operation fails."""
def __init__(
self,
message: str,
user_message: str = "Research failed. Please try again with different keywords or check your internet connection.",
retry_suggested: bool = True,
context: Optional[Dict[str, Any]] = None
):
super().__init__(
message=message,
error_code="RESEARCH_FAILED",
user_message=user_message,
retry_suggested=retry_suggested,
actionable_steps=[
"Try with different keywords",
"Check your internet connection",
"Wait a few minutes and try again",
"Contact support if the issue persists"
],
error_category=ErrorCategory.API_ERROR,
context=context
)
class OutlineGenerationException(BlogWriterException):
"""Raised when outline generation fails."""
def __init__(
self,
message: str,
user_message: str = "Outline generation failed. Please try again or adjust your research data.",
retry_suggested: bool = True,
context: Optional[Dict[str, Any]] = None
):
super().__init__(
message=message,
error_code="OUTLINE_GENERATION_FAILED",
user_message=user_message,
retry_suggested=retry_suggested,
actionable_steps=[
"Try generating outline again",
"Check if research data is complete",
"Try with different research keywords",
"Contact support if the issue persists"
],
error_category=ErrorCategory.API_ERROR,
context=context
)
class ContentGenerationException(BlogWriterException):
"""Raised when content generation fails."""
def __init__(
self,
message: str,
user_message: str = "Content generation failed. Please try again or adjust your outline.",
retry_suggested: bool = True,
context: Optional[Dict[str, Any]] = None
):
super().__init__(
message=message,
error_code="CONTENT_GENERATION_FAILED",
user_message=user_message,
retry_suggested=retry_suggested,
actionable_steps=[
"Try generating content again",
"Check if outline is complete",
"Try with a shorter outline",
"Contact support if the issue persists"
],
error_category=ErrorCategory.API_ERROR,
context=context
)
class SEOAnalysisException(BlogWriterException):
"""Raised when SEO analysis fails."""
def __init__(
self,
message: str,
user_message: str = "SEO analysis failed. Content was generated but SEO optimization is unavailable.",
retry_suggested: bool = True,
context: Optional[Dict[str, Any]] = None
):
super().__init__(
message=message,
error_code="SEO_ANALYSIS_FAILED",
user_message=user_message,
retry_suggested=retry_suggested,
actionable_steps=[
"Try SEO analysis again",
"Continue without SEO optimization",
"Contact support if the issue persists"
],
error_category=ErrorCategory.API_ERROR,
context=context
)
class APIRateLimitException(BlogWriterException):
"""Raised when API rate limit is exceeded."""
def __init__(
self,
message: str,
retry_after: Optional[int] = None,
context: Optional[Dict[str, Any]] = None
):
retry_message = f"Rate limit exceeded. Please wait {retry_after} seconds before trying again." if retry_after else "Rate limit exceeded. Please wait a few minutes before trying again."
super().__init__(
message=message,
error_code="API_RATE_LIMIT",
user_message=retry_message,
retry_suggested=True,
actionable_steps=[
f"Wait {retry_after or 60} seconds before trying again",
"Reduce the frequency of requests",
"Try again during off-peak hours",
"Contact support if you need higher limits"
],
error_category=ErrorCategory.API_ERROR,
context=context
)
class APITimeoutException(BlogWriterException):
"""Raised when API request times out."""
def __init__(
self,
message: str,
timeout_seconds: int = 60,
context: Optional[Dict[str, Any]] = None
):
super().__init__(
message=message,
error_code="API_TIMEOUT",
user_message=f"Request timed out after {timeout_seconds} seconds. Please try again.",
retry_suggested=True,
actionable_steps=[
"Try again with a shorter request",
"Check your internet connection",
"Try again during off-peak hours",
"Contact support if the issue persists"
],
error_category=ErrorCategory.TRANSIENT,
context=context
)
class ValidationException(BlogWriterException):
"""Raised when input validation fails."""
def __init__(
self,
message: str,
field: str,
user_message: str = "Invalid input provided. Please check your data and try again.",
context: Optional[Dict[str, Any]] = None
):
super().__init__(
message=message,
error_code="VALIDATION_ERROR",
user_message=user_message,
retry_suggested=False,
actionable_steps=[
f"Check the {field} field",
"Ensure all required fields are filled",
"Verify data format is correct",
"Contact support if you need help"
],
error_category=ErrorCategory.USER_ERROR,
context=context
)
class CircuitBreakerOpenException(BlogWriterException):
"""Raised when circuit breaker is open."""
def __init__(
self,
message: str,
retry_after: int,
context: Optional[Dict[str, Any]] = None
):
super().__init__(
message=message,
error_code="CIRCUIT_BREAKER_OPEN",
user_message=f"Service temporarily unavailable. Please wait {retry_after} seconds before trying again.",
retry_suggested=True,
actionable_steps=[
f"Wait {retry_after} seconds before trying again",
"Try again during off-peak hours",
"Contact support if the issue persists"
],
error_category=ErrorCategory.TRANSIENT,
context=context
)
class PartialSuccessException(BlogWriterException):
"""Raised when operation partially succeeds."""
def __init__(
self,
message: str,
partial_results: Dict[str, Any],
failed_operations: List[str],
user_message: str = "Operation partially completed. Some sections were generated successfully.",
context: Optional[Dict[str, Any]] = None
):
super().__init__(
message=message,
error_code="PARTIAL_SUCCESS",
user_message=user_message,
retry_suggested=True,
actionable_steps=[
"Review the generated content",
"Retry failed sections individually",
"Contact support if you need help with failed sections"
],
error_category=ErrorCategory.TRANSIENT,
context=context
)
self.partial_results = partial_results
self.failed_operations = failed_operations

View File

@@ -0,0 +1,293 @@
"""
Structured Logging Configuration for Blog Writer
Configures structured JSON logging with correlation IDs, context tracking,
and performance metrics for the AI Blog Writer system.
"""
import json
import uuid
import time
import sys
from typing import Dict, Any, Optional
from contextvars import ContextVar
from loguru import logger
from datetime import datetime
# Context variables for request tracking
correlation_id: ContextVar[str] = ContextVar('correlation_id', default='')
user_id: ContextVar[str] = ContextVar('user_id', default='')
task_id: ContextVar[str] = ContextVar('task_id', default='')
operation: ContextVar[str] = ContextVar('operation', default='')
class BlogWriterLogger:
"""Enhanced logger for Blog Writer with structured logging and context tracking."""
def __init__(self):
self._setup_logger()
def _setup_logger(self):
"""Configure loguru with structured JSON output."""
from utils.logger_utils import get_service_logger
return get_service_logger("blog_writer")
def _json_formatter(self, record):
"""Format log record as structured JSON."""
# Extract context variables
correlation_id_val = correlation_id.get('')
user_id_val = user_id.get('')
task_id_val = task_id.get('')
operation_val = operation.get('')
# Build structured log entry
log_entry = {
"timestamp": datetime.fromtimestamp(record["time"].timestamp()).isoformat(),
"level": record["level"].name,
"logger": record["name"],
"function": record["function"],
"line": record["line"],
"message": record["message"],
"correlation_id": correlation_id_val,
"user_id": user_id_val,
"task_id": task_id_val,
"operation": operation_val,
"module": record["module"],
"process_id": record["process"].id,
"thread_id": record["thread"].id
}
# Add exception info if present
if record["exception"]:
log_entry["exception"] = {
"type": record["exception"].type.__name__,
"value": str(record["exception"].value),
"traceback": record["exception"].traceback
}
# Add extra fields from record
if record["extra"]:
log_entry.update(record["extra"])
return json.dumps(log_entry, default=str)
def set_context(
self,
correlation_id_val: Optional[str] = None,
user_id_val: Optional[str] = None,
task_id_val: Optional[str] = None,
operation_val: Optional[str] = None
):
"""Set context variables for the current request."""
if correlation_id_val:
correlation_id.set(correlation_id_val)
if user_id_val:
user_id.set(user_id_val)
if task_id_val:
task_id.set(task_id_val)
if operation_val:
operation.set(operation_val)
def clear_context(self):
"""Clear all context variables."""
correlation_id.set('')
user_id.set('')
task_id.set('')
operation.set('')
def generate_correlation_id(self) -> str:
"""Generate a new correlation ID."""
return str(uuid.uuid4())
def log_operation_start(
self,
operation_name: str,
**kwargs
):
"""Log the start of an operation with context."""
logger.info(
f"Starting {operation_name}",
extra={
"operation": operation_name,
"event_type": "operation_start",
**kwargs
}
)
def log_operation_end(
self,
operation_name: str,
duration_ms: float,
success: bool = True,
**kwargs
):
"""Log the end of an operation with performance metrics."""
logger.info(
f"Completed {operation_name} in {duration_ms:.2f}ms",
extra={
"operation": operation_name,
"event_type": "operation_end",
"duration_ms": duration_ms,
"success": success,
**kwargs
}
)
def log_api_call(
self,
api_name: str,
endpoint: str,
duration_ms: float,
status_code: Optional[int] = None,
token_usage: Optional[Dict[str, int]] = None,
**kwargs
):
"""Log API call with performance metrics."""
logger.info(
f"API call to {api_name}",
extra={
"event_type": "api_call",
"api_name": api_name,
"endpoint": endpoint,
"duration_ms": duration_ms,
"status_code": status_code,
"token_usage": token_usage,
**kwargs
}
)
def log_error(
self,
error: Exception,
operation: str,
context: Optional[Dict[str, Any]] = None
):
"""Log error with full context."""
logger.error(
f"Error in {operation}: {str(error)}",
extra={
"event_type": "error",
"operation": operation,
"error_type": type(error).__name__,
"error_message": str(error),
"context": context or {}
},
exc_info=True
)
def log_performance(
self,
metric_name: str,
value: float,
unit: str = "ms",
**kwargs
):
"""Log performance metrics."""
logger.info(
f"Performance metric: {metric_name} = {value} {unit}",
extra={
"event_type": "performance",
"metric_name": metric_name,
"value": value,
"unit": unit,
**kwargs
}
)
# Global logger instance
blog_writer_logger = BlogWriterLogger()
def get_logger(name: str = "blog_writer"):
"""Get a logger instance with the given name."""
return logger.bind(name=name)
def log_function_call(func_name: str, **kwargs):
"""Decorator to log function calls with timing."""
def decorator(func):
async def async_wrapper(*args, **func_kwargs):
start_time = time.time()
correlation_id_val = correlation_id.get('')
blog_writer_logger.log_operation_start(
func_name,
function=func.__name__,
correlation_id=correlation_id_val,
**kwargs
)
try:
result = await func(*args, **func_kwargs)
duration_ms = (time.time() - start_time) * 1000
blog_writer_logger.log_operation_end(
func_name,
duration_ms,
success=True,
function=func.__name__,
correlation_id=correlation_id_val
)
return result
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
blog_writer_logger.log_error(
e,
func_name,
context={
"function": func.__name__,
"duration_ms": duration_ms,
"correlation_id": correlation_id_val
}
)
raise
def sync_wrapper(*args, **func_kwargs):
start_time = time.time()
correlation_id_val = correlation_id.get('')
blog_writer_logger.log_operation_start(
func_name,
function=func.__name__,
correlation_id=correlation_id_val,
**kwargs
)
try:
result = func(*args, **func_kwargs)
duration_ms = (time.time() - start_time) * 1000
blog_writer_logger.log_operation_end(
func_name,
duration_ms,
success=True,
function=func.__name__,
correlation_id=correlation_id_val
)
return result
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
blog_writer_logger.log_error(
e,
func_name,
context={
"function": func.__name__,
"duration_ms": duration_ms,
"correlation_id": correlation_id_val
}
)
raise
# Return appropriate wrapper based on function type
import asyncio
if asyncio.iscoroutinefunction(func):
return async_wrapper
else:
return sync_wrapper
return decorator

View File

@@ -16,6 +16,7 @@ from models.blog_models import (
GroundingSupport,
Citation,
)
from services.blog_writer.logger_config import blog_writer_logger, log_function_call
from .keyword_analyzer import KeywordAnalyzer
from .competitor_analyzer import CompetitorAnalyzer
@@ -32,6 +33,7 @@ class ResearchService:
self.content_angle_generator = ContentAngleGenerator()
self.data_filter = ResearchDataFilter()
@log_function_call("research_operation")
async def research(self, request: BlogResearchRequest) -> BlogResearchResponse:
"""
Stage 1: Research & Strategy (AI Orchestration)
@@ -47,6 +49,16 @@ class ResearchService:
industry = request.industry or (request.persona.industry if request.persona and request.persona.industry else "General")
target_audience = getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'
# Log research parameters
blog_writer_logger.log_operation_start(
"research",
topic=topic,
industry=industry,
target_audience=target_audience,
keywords=request.keywords,
keyword_count=len(request.keywords)
)
# Check cache first for exact keyword match
cached_result = research_cache.get_cached_result(
keywords=request.keywords,
@@ -56,10 +68,12 @@ class ResearchService:
if cached_result:
logger.info(f"Returning cached research result for keywords: {request.keywords}")
blog_writer_logger.log_operation_end("research", 0, success=True, cache_hit=True)
return BlogResearchResponse(**cached_result)
# Cache miss - proceed with API call
logger.info(f"Cache miss - making API call for keywords: {request.keywords}")
blog_writer_logger.log_operation_start("gemini_api_call", api_name="gemini_grounded", operation="research")
gemini = GeminiGroundedProvider()
# Single comprehensive research prompt - Gemini handles Google Search automatically
@@ -82,11 +96,23 @@ class ResearchService:
"""
# Single Gemini call with native Google Search grounding - no fallbacks
import time
api_start_time = time.time()
gemini_result = await gemini.generate_grounded_content(
prompt=research_prompt,
content_type="research",
max_tokens=2000
)
api_duration_ms = (time.time() - api_start_time) * 1000
# Log API call performance
blog_writer_logger.log_api_call(
"gemini_grounded",
"generate_grounded_content",
api_duration_ms,
token_usage=gemini_result.get("token_usage", {}),
content_length=len(gemini_result.get("content", ""))
)
# Extract sources from grounding metadata
sources = self._extract_sources_from_grounding(gemini_result)
@@ -105,6 +131,17 @@ class ResearchService:
suggested_angles = self.content_angle_generator.generate(content, topic, industry)
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
# Log analysis results
blog_writer_logger.log_performance(
"research_analysis",
len(content),
"characters",
sources_count=len(sources),
search_queries_count=len(search_queries),
keyword_analysis_keys=len(keyword_analysis),
suggested_angles_count=len(suggested_angles)
)
# Create the response
response = BlogResearchResponse(
@@ -146,7 +183,47 @@ class ResearchService:
error_message = str(e)
logger.error(f"Research failed: {error_message}")
# Return a graceful failure response instead of raising
# Log error with full context
blog_writer_logger.log_error(
e,
"research",
context={
"topic": topic,
"keywords": request.keywords,
"industry": industry,
"target_audience": target_audience
}
)
# Import custom exceptions for better error handling
from services.blog_writer.exceptions import (
ResearchFailedException,
APIRateLimitException,
APITimeoutException,
ValidationException
)
# Determine if this is a retryable error
retry_suggested = True
user_message = "Research failed. Please try again with different keywords or check your internet connection."
if isinstance(e, APIRateLimitException):
retry_suggested = True
user_message = f"Rate limit exceeded. Please wait {e.context.get('retry_after', 60)} seconds before trying again."
elif isinstance(e, APITimeoutException):
retry_suggested = True
user_message = "Research request timed out. Please try again with a shorter query or check your internet connection."
elif isinstance(e, ValidationException):
retry_suggested = False
user_message = "Invalid research request. Please check your input parameters and try again."
elif "401" in error_message or "403" in error_message:
retry_suggested = False
user_message = "Authentication failed. Please check your API credentials."
elif "400" in error_message:
retry_suggested = False
user_message = "Invalid request. Please check your input parameters."
# Return a graceful failure response with enhanced error information
return BlogResearchResponse(
success=False,
sources=[],
@@ -155,9 +232,18 @@ class ResearchService:
suggested_angles=[],
search_widget="",
search_queries=[],
error_message=error_message
error_message=user_message,
retry_suggested=retry_suggested,
error_code=getattr(e, 'error_code', 'RESEARCH_FAILED'),
actionable_steps=getattr(e, 'actionable_steps', [
"Try with different keywords",
"Check your internet connection",
"Wait a few minutes and try again",
"Contact support if the issue persists"
])
)
@log_function_call("research_with_progress")
async def research_with_progress(self, request: BlogResearchRequest, task_id: str) -> BlogResearchResponse:
"""
Research method with progress updates for real-time feedback.
@@ -291,7 +377,47 @@ class ResearchService:
error_message = str(e)
logger.error(f"Research failed: {error_message}")
# Return a graceful failure response instead of raising
# Log error with full context
blog_writer_logger.log_error(
e,
"research",
context={
"topic": topic,
"keywords": request.keywords,
"industry": industry,
"target_audience": target_audience
}
)
# Import custom exceptions for better error handling
from services.blog_writer.exceptions import (
ResearchFailedException,
APIRateLimitException,
APITimeoutException,
ValidationException
)
# Determine if this is a retryable error
retry_suggested = True
user_message = "Research failed. Please try again with different keywords or check your internet connection."
if isinstance(e, APIRateLimitException):
retry_suggested = True
user_message = f"Rate limit exceeded. Please wait {e.context.get('retry_after', 60)} seconds before trying again."
elif isinstance(e, APITimeoutException):
retry_suggested = True
user_message = "Research request timed out. Please try again with a shorter query or check your internet connection."
elif isinstance(e, ValidationException):
retry_suggested = False
user_message = "Invalid research request. Please check your input parameters and try again."
elif "401" in error_message or "403" in error_message:
retry_suggested = False
user_message = "Authentication failed. Please check your API credentials."
elif "400" in error_message:
retry_suggested = False
user_message = "Invalid request. Please check your input parameters."
# Return a graceful failure response with enhanced error information
return BlogResearchResponse(
success=False,
sources=[],
@@ -300,7 +426,15 @@ class ResearchService:
suggested_angles=[],
search_widget="",
search_queries=[],
error_message=error_message
error_message=user_message,
retry_suggested=retry_suggested,
error_code=getattr(e, 'error_code', 'RESEARCH_FAILED'),
actionable_steps=getattr(e, 'actionable_steps', [
"Try with different keywords",
"Check your internet connection",
"Wait a few minutes and try again",
"Contact support if the issue persists"
])
)
def _extract_sources_from_grounding(self, gemini_result: Dict[str, Any]) -> List[ResearchSource]:

View File

@@ -0,0 +1,223 @@
"""
Enhanced Retry Utilities for Blog Writer
Provides advanced retry logic with exponential backoff, jitter, retry budgets,
and specific error code handling for different types of API failures.
"""
import asyncio
import random
import time
from typing import Callable, Any, Optional, Dict, List
from dataclasses import dataclass
from loguru import logger
from .exceptions import APIRateLimitException, APITimeoutException
@dataclass
class RetryConfig:
"""Configuration for retry behavior."""
max_attempts: int = 3
base_delay: float = 1.0
max_delay: float = 60.0
exponential_base: float = 2.0
jitter: bool = True
max_total_time: float = 300.0 # 5 minutes max total time
retryable_errors: List[str] = None
def __post_init__(self):
if self.retryable_errors is None:
self.retryable_errors = [
"503", "502", "504", # Server errors
"429", # Rate limit
"timeout", "timed out",
"connection", "network",
"overloaded", "busy"
]
class RetryBudget:
"""Tracks retry budget to prevent excessive retries."""
def __init__(self, max_total_time: float):
self.max_total_time = max_total_time
self.start_time = time.time()
self.used_time = 0.0
def can_retry(self) -> bool:
"""Check if we can still retry within budget."""
self.used_time = time.time() - self.start_time
return self.used_time < self.max_total_time
def remaining_time(self) -> float:
"""Get remaining time in budget."""
return max(0, self.max_total_time - self.used_time)
def is_retryable_error(error: Exception, retryable_errors: List[str]) -> bool:
"""Check if an error is retryable based on error message patterns."""
error_str = str(error).lower()
return any(pattern.lower() in error_str for pattern in retryable_errors)
def calculate_delay(attempt: int, config: RetryConfig) -> float:
"""Calculate delay for retry attempt with exponential backoff and jitter."""
# Exponential backoff
delay = config.base_delay * (config.exponential_base ** attempt)
# Cap at max delay
delay = min(delay, config.max_delay)
# Add jitter to prevent thundering herd
if config.jitter:
jitter_range = delay * 0.1 # 10% jitter
delay += random.uniform(-jitter_range, jitter_range)
return max(0, delay)
async def retry_with_backoff(
func: Callable,
config: Optional[RetryConfig] = None,
operation_name: str = "operation",
context: Optional[Dict[str, Any]] = None
) -> Any:
"""
Retry a function with enhanced backoff and budget management.
Args:
func: Async function to retry
config: Retry configuration
operation_name: Name of operation for logging
context: Additional context for logging
Returns:
Function result
Raises:
Last exception if all retries fail
"""
config = config or RetryConfig()
budget = RetryBudget(config.max_total_time)
last_exception = None
for attempt in range(config.max_attempts):
try:
# Check if we're still within budget
if not budget.can_retry():
logger.warning(f"Retry budget exceeded for {operation_name} after {budget.used_time:.2f}s")
break
# Execute the function
result = await func()
logger.info(f"{operation_name} succeeded on attempt {attempt + 1}")
return result
except Exception as e:
last_exception = e
# Check if this is the last attempt
if attempt == config.max_attempts - 1:
logger.error(f"{operation_name} failed after {config.max_attempts} attempts: {str(e)}")
break
# Check if error is retryable
if not is_retryable_error(e, config.retryable_errors):
logger.warning(f"{operation_name} failed with non-retryable error: {str(e)}")
break
# Calculate delay and wait
delay = calculate_delay(attempt, config)
remaining_time = budget.remaining_time()
# Don't wait longer than remaining budget
if delay > remaining_time:
logger.warning(f"Delay {delay:.2f}s exceeds remaining budget {remaining_time:.2f}s for {operation_name}")
break
logger.warning(
f"{operation_name} attempt {attempt + 1} failed: {str(e)}. "
f"Retrying in {delay:.2f}s (attempt {attempt + 2}/{config.max_attempts})"
)
await asyncio.sleep(delay)
# If we get here, all retries failed
if last_exception:
# Enhance exception with retry context
if isinstance(last_exception, Exception):
error_str = str(last_exception)
if "429" in error_str or "rate limit" in error_str.lower():
raise APIRateLimitException(
f"Rate limit exceeded after {config.max_attempts} attempts",
retry_after=int(delay * 2), # Suggest waiting longer
context=context
)
elif "timeout" in error_str.lower():
raise APITimeoutException(
f"Request timed out after {config.max_attempts} attempts",
timeout_seconds=int(config.max_total_time),
context=context
)
raise last_exception
raise Exception(f"{operation_name} failed after {config.max_attempts} attempts")
def retry_decorator(
config: Optional[RetryConfig] = None,
operation_name: Optional[str] = None
):
"""
Decorator to add retry logic to async functions.
Args:
config: Retry configuration
operation_name: Name of operation for logging
"""
def decorator(func: Callable) -> Callable:
async def wrapper(*args, **kwargs):
op_name = operation_name or func.__name__
return await retry_with_backoff(
lambda: func(*args, **kwargs),
config=config,
operation_name=op_name
)
return wrapper
return decorator
# Predefined retry configurations for different operation types
RESEARCH_RETRY_CONFIG = RetryConfig(
max_attempts=3,
base_delay=2.0,
max_delay=30.0,
max_total_time=180.0, # 3 minutes for research
retryable_errors=["503", "429", "timeout", "overloaded", "connection"]
)
OUTLINE_RETRY_CONFIG = RetryConfig(
max_attempts=2,
base_delay=1.5,
max_delay=20.0,
max_total_time=120.0, # 2 minutes for outline
retryable_errors=["503", "429", "timeout", "overloaded"]
)
CONTENT_RETRY_CONFIG = RetryConfig(
max_attempts=3,
base_delay=1.0,
max_delay=15.0,
max_total_time=90.0, # 1.5 minutes for content
retryable_errors=["503", "429", "timeout", "overloaded"]
)
SEO_RETRY_CONFIG = RetryConfig(
max_attempts=2,
base_delay=1.0,
max_delay=10.0,
max_total_time=60.0, # 1 minute for SEO
retryable_errors=["503", "429", "timeout"]
)