Compare commits
21 Commits
alert-auto
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
19b4ac53fc | ||
|
|
ce9bf293ed | ||
|
|
d90d441019 | ||
|
|
63a0df2536 | ||
|
|
e54aaa7a3e | ||
|
|
b894bc0abb | ||
|
|
70542b32fc | ||
|
|
9a3d704c5c | ||
|
|
8699ffc27d | ||
|
|
259194c289 | ||
|
|
2f93ae4891 | ||
|
|
bf22a3d318 | ||
|
|
2a879a6e24 | ||
|
|
7749b4db0e | ||
|
|
cbace3b752 | ||
|
|
98d4ac6dbd | ||
|
|
55b7209554 | ||
|
|
57e46a20f8 | ||
|
|
ec2f9151b8 | ||
|
|
40516e5c79 | ||
|
|
923fa671fe |
68
.dockerignore
Normal file
68
.dockerignore
Normal file
@@ -0,0 +1,68 @@
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
|
||||
# Node modules (rebuilt inside Docker)
|
||||
frontend/node_modules
|
||||
|
||||
# Python cache
|
||||
__pycache__
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
.Python
|
||||
*.so
|
||||
*.egg
|
||||
*.egg-info
|
||||
dist
|
||||
build
|
||||
|
||||
# Virtual envs
|
||||
.venv
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Docs & markdown (not needed in container)
|
||||
docs/
|
||||
docs-site/
|
||||
*.md
|
||||
|
||||
# GitHub meta
|
||||
.github/
|
||||
|
||||
# Frontend build is copied separately via --from
|
||||
# so exclude the local build dir to keep context small
|
||||
frontend/build/
|
||||
frontend/.env
|
||||
frontend/.env.local
|
||||
frontend/.env.production
|
||||
|
||||
# Backend env
|
||||
.env
|
||||
.env.*
|
||||
!backend/env_template.txt
|
||||
|
||||
# Test files
|
||||
**/test/
|
||||
**/tests/
|
||||
*.test.py
|
||||
*.spec.py
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# Temp
|
||||
tmp/
|
||||
temp/
|
||||
*.tmp
|
||||
72
Dockerfile
Normal file
72
Dockerfile
Normal file
@@ -0,0 +1,72 @@
|
||||
# ============================================================
|
||||
# ALwrity Dockerfile — for EasyPanel deployment
|
||||
# ============================================================
|
||||
# Stage 1: Build frontend
|
||||
FROM node:20-alpine AS frontend-builder
|
||||
|
||||
WORKDIR /app/frontend
|
||||
|
||||
# Copy package files
|
||||
COPY frontend/package.json frontend/package-lock.json* ./
|
||||
|
||||
# Install deps (--legacy-peer-deps needed for react-scripts 5)
|
||||
RUN npm install --legacy-peer-deps
|
||||
|
||||
# Copy frontend source
|
||||
COPY frontend/ ./
|
||||
|
||||
# Build static assets
|
||||
RUN npm run build
|
||||
|
||||
# ============================================================
|
||||
# Stage 2: Python backend
|
||||
FROM python:3.11-slim AS backend
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV PORT=8000
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install build deps for some Python packages
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
libpq-dev \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements first (for caching)
|
||||
COPY backend/requirements.txt .
|
||||
|
||||
# Install Python deps
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy backend source
|
||||
COPY backend/ ./backend/
|
||||
|
||||
# Copy frontend build artifacts from Stage 1
|
||||
COPY --from=frontend-builder /app/frontend/build ./frontend/build
|
||||
|
||||
# Create workspace directories (created by start_alwrity_backend.py but ensure they exist)
|
||||
RUN mkdir -p /app/lib/workspace/alwrity_content \
|
||||
/app/lib/workspace/alwrity_web_research \
|
||||
/app/lib/workspace/alwrity_prompts \
|
||||
/app/lib/workspace/alwrity_config
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Run with gunicorn + uvicorn workers (recommended for production)
|
||||
# Fallback to plain uvicorn if gunicorn not installed
|
||||
CMD python -m gunicorn backend.app:app \
|
||||
--worker-class uvicorn.workers.UvicornWorker \
|
||||
--bind 0.0.0.0:8000 \
|
||||
--workers 2 \
|
||||
--timeout 120 \
|
||||
--access-logfile - \
|
||||
--error-logfile - \
|
||||
--log-level info
|
||||
@@ -58,6 +58,21 @@ FEATURE_GROUPS: Dict[str, FeatureGroup] = {
|
||||
"api.blog_writer.seo_analysis:router",
|
||||
),
|
||||
),
|
||||
"backlinking": FeatureGroup(
|
||||
features=("backlinking",),
|
||||
routers=("routers.backlink_outreach:router",),
|
||||
),
|
||||
"linkedin": FeatureGroup(
|
||||
features=("linkedin",),
|
||||
routers=(
|
||||
"routers.linkedin:router",
|
||||
"api.linkedin_image_generation:router",
|
||||
),
|
||||
),
|
||||
"facebook": FeatureGroup(
|
||||
features=("facebook",),
|
||||
routers=("api.facebook_writer.routers:facebook_router",),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -67,5 +82,8 @@ PROFILE_GROUP_MAP: Dict[str, Tuple[str, ...]] = {
|
||||
"podcast": ("core", "podcast"),
|
||||
"youtube": ("core", "youtube"),
|
||||
"blog_writer": ("core", "blog_writer"),
|
||||
"backlinking": ("core", "backlinking"),
|
||||
"linkedin": ("core", "linkedin"),
|
||||
"facebook": ("core", "facebook"),
|
||||
"planning": ("core", "content_planning"),
|
||||
}
|
||||
|
||||
@@ -67,6 +67,7 @@ OPTIONAL_ROUTER_REGISTRY = [
|
||||
{"name": "oauth_token_monitoring", "module": "api.oauth_token_monitoring_routes", "attr": "router", "features": {"all", "core"}},
|
||||
{"name": "agents", "module": "api.agents_api", "attr": "router", "features": {"all"}},
|
||||
{"name": "today_workflow", "module": "api.today_workflow", "attr": "router", "features": {"all"}},
|
||||
{"name": "backlink_outreach", "module": "routers.backlink_outreach", "attr": "router", "features": {"all", "backlinking"}},
|
||||
]
|
||||
|
||||
OPTIONAL_MODULE_MATRIX = {
|
||||
|
||||
@@ -66,10 +66,12 @@ class RecommendationItem(BaseModel):
|
||||
|
||||
class SEOApplyRecommendationsRequest(BaseModel):
|
||||
title: str = Field(..., description="Current blog title")
|
||||
introduction: str | None = Field(default=None, description="Current blog introduction text")
|
||||
sections: List[Dict[str, Any]] = Field(..., description="Array of sections with id, heading, content")
|
||||
outline: List[Dict[str, Any]] = Field(default_factory=list, description="Outline structure for context")
|
||||
research: Dict[str, Any] = Field(default_factory=dict, description="Research data used for the blog")
|
||||
recommendations: List[RecommendationItem] = Field(..., description="Actionable recommendations to apply")
|
||||
competitive_advantage: str | None = Field(default=None, description="Selected competitive advantage for emphasis")
|
||||
persona: Dict[str, Any] = Field(default_factory=dict, description="Persona settings if available")
|
||||
tone: str | None = Field(default=None, description="Desired tone override")
|
||||
audience: str | None = Field(default=None, description="Target audience override")
|
||||
@@ -122,7 +124,7 @@ async def section_originality_tools(
|
||||
raise HTTPException(status_code=401, detail="User ID not found in authentication token")
|
||||
|
||||
from services.intelligence.sif_integration import SIFIntegrationService
|
||||
from services.intelligence.sif_agents import ContentGuardianAgent
|
||||
from services.intelligence.agents.specialized import ContentGuardianAgent
|
||||
|
||||
sif_service = SIFIntegrationService(user_id)
|
||||
intelligence = sif_service.intelligence_service
|
||||
@@ -687,9 +689,11 @@ async def get_section_continuity(section_id: str) -> Dict[str, Any]:
|
||||
|
||||
|
||||
@router.post("/flow-analysis/basic")
|
||||
async def analyze_flow_basic(request: Dict[str, Any]) -> Dict[str, Any]:
|
||||
async def analyze_flow_basic(request: Dict[str, Any], current_user: Dict[str, Any] = Depends(get_current_user)) -> Dict[str, Any]:
|
||||
"""Analyze flow metrics for entire blog using single AI call (cost-effective)."""
|
||||
try:
|
||||
user_id = str(current_user.get('id', '')) if current_user else None
|
||||
request['user_id'] = user_id
|
||||
result = await service.analyze_flow_basic(request)
|
||||
return result
|
||||
except Exception as e:
|
||||
@@ -698,9 +702,11 @@ async def analyze_flow_basic(request: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
||||
|
||||
@router.post("/flow-analysis/advanced")
|
||||
async def analyze_flow_advanced(request: Dict[str, Any]) -> Dict[str, Any]:
|
||||
async def analyze_flow_advanced(request: Dict[str, Any], current_user: Dict[str, Any] = Depends(get_current_user)) -> Dict[str, Any]:
|
||||
"""Analyze flow metrics for each section individually (detailed but expensive)."""
|
||||
try:
|
||||
user_id = str(current_user.get('id', '')) if current_user else None
|
||||
request['user_id'] = user_id
|
||||
result = await service.analyze_flow_advanced(request)
|
||||
return result
|
||||
except Exception as e:
|
||||
@@ -807,9 +813,12 @@ async def seo_metadata(
|
||||
|
||||
|
||||
# Publishing Endpoints
|
||||
# NOTE: Real publishing bypasses this stub. Frontend calls platform-specific
|
||||
# endpoints directly: /api/wix/publish and /api/wordpress/publish.
|
||||
# This endpoint is kept as a placeholder for the future unified publish flow.
|
||||
@router.post("/publish", response_model=BlogPublishResponse)
|
||||
async def publish(request: BlogPublishRequest) -> BlogPublishResponse:
|
||||
"""Publish the blog post to the specified platform."""
|
||||
"""Publish the blog post to the specified platform. [STUB - see note above]"""
|
||||
try:
|
||||
return await service.publish(request)
|
||||
except Exception as e:
|
||||
@@ -1208,6 +1217,9 @@ async def generate_introductions(
|
||||
class SaveCompleteBlogAssetRequest(BaseModel):
|
||||
title: str
|
||||
content: str
|
||||
platform: Optional[str] = None
|
||||
post_url: Optional[str] = None
|
||||
post_id: Optional[str] = None
|
||||
seo_title: Optional[str] = None
|
||||
meta_description: Optional[str] = None
|
||||
focus_keyword: Optional[str] = None
|
||||
@@ -1232,21 +1244,29 @@ async def save_complete_blog_asset(
|
||||
|
||||
full_content = f"# {request.title}\n\n{request.content}"
|
||||
|
||||
asset_id = save_and_track_text_content(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
content=full_content,
|
||||
source_module="blog_writer",
|
||||
title=f"Published Blog: {request.title[:60]}",
|
||||
description=request.meta_description or f"Complete published blog post: {request.title}",
|
||||
prompt=f"SEO Title: {request.seo_title or request.title}\nFocus Keyword: {request.focus_keyword or ''}",
|
||||
tags=["blog", "published"] + [t for t in (request.tags or []) if t],
|
||||
asset_metadata = {
|
||||
"status": "published",
|
||||
"focus_keyword": request.focus_keyword,
|
||||
"categories": request.categories,
|
||||
"word_count": len(full_content.split()),
|
||||
},
|
||||
}
|
||||
if request.platform:
|
||||
asset_metadata["platform"] = request.platform
|
||||
if request.post_url:
|
||||
asset_metadata["post_url"] = request.post_url
|
||||
if request.post_id:
|
||||
asset_metadata["post_id"] = request.post_id
|
||||
|
||||
asset_id = save_and_track_text_content(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
content=full_content,
|
||||
source_module="blog_writer",
|
||||
title=request.title[:100],
|
||||
description=request.meta_description or f"Complete published blog post: {request.title}",
|
||||
prompt=f"SEO Title: {request.seo_title or request.title}\nFocus Keyword: {request.focus_keyword or ''}",
|
||||
tags=["blog", "published"] + [t for t in (request.tags or []) if t],
|
||||
asset_metadata=asset_metadata,
|
||||
subdirectory="published",
|
||||
file_extension=".md"
|
||||
)
|
||||
@@ -1265,6 +1285,57 @@ async def save_complete_blog_asset(
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/publish-history")
|
||||
async def get_publish_history(
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> Dict[str, Any]:
|
||||
"""Get publish history for the current user from the asset library."""
|
||||
try:
|
||||
if not current_user:
|
||||
raise HTTPException(status_code=401, detail="Authentication required")
|
||||
|
||||
user_id = str(current_user.get('id', ''))
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="Invalid user ID in authentication token")
|
||||
|
||||
svc = ContentAssetService(db)
|
||||
assets, total = svc.get_user_assets(
|
||||
user_id=user_id,
|
||||
tags=["published"],
|
||||
source_module=AssetSource.BLOG_WRITER,
|
||||
sort_by="created_at",
|
||||
sort_order="desc",
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
|
||||
entries = []
|
||||
for a in assets:
|
||||
meta = a.asset_metadata or {}
|
||||
entries.append({
|
||||
"asset_id": a.id,
|
||||
"title": a.title,
|
||||
"platform": meta.get("platform", "unknown"),
|
||||
"post_url": meta.get("post_url"),
|
||||
"post_id": meta.get("post_id"),
|
||||
"word_count": meta.get("word_count", 0),
|
||||
"focus_keyword": meta.get("focus_keyword"),
|
||||
"categories": meta.get("categories", []),
|
||||
"published_at": a.created_at.isoformat() if a.created_at else None,
|
||||
})
|
||||
|
||||
return {"success": True, "entries": entries, "total": total}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get publish history: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# ---------------------------------------
|
||||
# Blog Asset API (phase-by-phase saving via ContentAsset)
|
||||
# ---------------------------------------
|
||||
@@ -1412,7 +1483,11 @@ async def update_blog_asset(
|
||||
if val is not None:
|
||||
meta[field] = val
|
||||
|
||||
if meta.get("selected_title"):
|
||||
# Prefer seo_title from publish_data, then selected_title, then topic, then existing title
|
||||
publish_data = meta.get("publish_data") or {}
|
||||
if isinstance(publish_data, dict) and publish_data.get("seo_title"):
|
||||
new_title = publish_data["seo_title"]
|
||||
elif meta.get("selected_title"):
|
||||
new_title = meta["selected_title"]
|
||||
elif meta.get("topic"):
|
||||
new_title = meta["topic"]
|
||||
|
||||
@@ -28,6 +28,8 @@ class SEOAnalysisRequest(BaseModel):
|
||||
blog_content: str
|
||||
blog_title: Optional[str] = None
|
||||
research_data: Dict[str, Any]
|
||||
outline: Optional[List[Dict[str, Any]]] = None
|
||||
competitive_advantage: Optional[str] = None
|
||||
user_id: Optional[str] = None
|
||||
session_id: Optional[str] = None
|
||||
|
||||
@@ -109,7 +111,9 @@ async def analyze_blog_seo(
|
||||
blog_content=request.blog_content,
|
||||
research_data=request.research_data,
|
||||
blog_title=request.blog_title,
|
||||
user_id=user_id
|
||||
user_id=user_id,
|
||||
outline=request.outline,
|
||||
competitive_advantage=request.competitive_advantage,
|
||||
)
|
||||
|
||||
# Check for errors
|
||||
|
||||
@@ -344,6 +344,43 @@ async def update_asset(
|
||||
raise HTTPException(status_code=500, detail=f"Error updating asset: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{asset_id}/content")
|
||||
async def get_asset_content(
|
||||
asset_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Serve the raw text content of a text asset by reading its file from disk."""
|
||||
try:
|
||||
user_id = current_user.get("user_id") or current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID not found")
|
||||
|
||||
service = ContentAssetService(db)
|
||||
asset = service.get_asset_by_id(asset_id, user_id)
|
||||
if not asset:
|
||||
raise HTTPException(status_code=404, detail="Asset not found")
|
||||
|
||||
if asset.asset_type != AssetType.TEXT:
|
||||
raise HTTPException(status_code=400, detail="Asset is not a text file")
|
||||
|
||||
if not asset.file_path:
|
||||
raise HTTPException(status_code=404, detail="Asset file path not recorded")
|
||||
|
||||
from pathlib import Path
|
||||
file_path = Path(asset.file_path)
|
||||
if not file_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Asset file not found on disk")
|
||||
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
return {"success": True, "content": content}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error reading asset content: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/statistics", response_model=Dict[str, Any])
|
||||
async def get_statistics(
|
||||
db: Session = Depends(get_db),
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import os
|
||||
from fastapi import APIRouter, HTTPException, UploadFile, File, Depends
|
||||
from fastapi.responses import FileResponse
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional, Dict, Any
|
||||
import json
|
||||
import base64
|
||||
|
||||
# Import our LinkedIn image generation services
|
||||
from services.linkedin.image_generation import LinkedInImageGenerator, LinkedInImageStorage
|
||||
@@ -51,6 +53,23 @@ class ImageGenerationResponse(BaseModel):
|
||||
aspect_ratio: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
class ImageEditRequest(BaseModel):
|
||||
image_base64: Optional[str] = None
|
||||
image_id: Optional[str] = None
|
||||
prompt: str
|
||||
content_context: Dict[str, Any]
|
||||
|
||||
class ImageEditResponse(BaseModel):
|
||||
success: bool
|
||||
image_data: Optional[str] = None
|
||||
image_id: Optional[str] = None
|
||||
image_url: Optional[str] = None
|
||||
width: Optional[int] = None
|
||||
height: Optional[int] = None
|
||||
provider: Optional[str] = None
|
||||
model: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
@router.post("/generate-image-prompts", response_model=List[ImagePromptResponse])
|
||||
async def generate_image_prompts(request: ImagePromptRequest):
|
||||
"""
|
||||
@@ -89,7 +108,8 @@ async def generate_linkedin_image(
|
||||
# Use our LinkedIn image generator service
|
||||
image_result = await image_generator.generate_image(
|
||||
prompt=request.prompt,
|
||||
content_context=request.content_context
|
||||
content_context=request.content_context,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
if image_result and image_result.get('success'):
|
||||
@@ -131,6 +151,99 @@ async def generate_linkedin_image(
|
||||
error=f"Failed to generate image: {str(e)}"
|
||||
)
|
||||
|
||||
@router.post("/edit-image", response_model=ImageEditResponse)
|
||||
async def edit_linkedin_image(
|
||||
request: ImageEditRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Edit a LinkedIn-optimized image using natural language.
|
||||
Provide the image as base64 and describe the desired edits.
|
||||
"""
|
||||
try:
|
||||
user_id = current_user.get("id")
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="Authentication required")
|
||||
|
||||
if not request.prompt or not request.prompt.strip():
|
||||
raise HTTPException(status_code=400, detail="Prompt is required for image editing")
|
||||
|
||||
logger.info(f"Editing LinkedIn image with prompt: {request.prompt[:100]}... for user {user_id}")
|
||||
|
||||
# Get input image bytes — from image_id (fetch from storage) or image_base64 (direct decode)
|
||||
input_image_bytes = None
|
||||
if request.image_id:
|
||||
stored = await image_storage.retrieve_image(request.image_id, user_id)
|
||||
if not stored or not stored.get('success'):
|
||||
raise HTTPException(status_code=404, detail=f"Image not found: {request.image_id}")
|
||||
input_image_bytes = stored['image_data']
|
||||
logger.info(f"Fetched image {request.image_id} from storage ({len(input_image_bytes)} bytes)")
|
||||
elif request.image_base64:
|
||||
input_image_bytes = base64.b64decode(request.image_base64)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="Either image_id or image_base64 is required")
|
||||
|
||||
# Use LinkedIn image generator with common editing infrastructure
|
||||
image_result = await image_generator.edit_image(
|
||||
input_image_bytes=input_image_bytes,
|
||||
edit_prompt=request.prompt,
|
||||
content_context=request.content_context,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
if image_result and image_result.get('success'):
|
||||
image_b64 = base64.b64encode(image_result['image_data']).decode("utf-8")
|
||||
|
||||
# Store the edited image — log but don't fail if storage has issues
|
||||
new_image_id = None
|
||||
stored_result = await image_storage.store_image(
|
||||
image_data=image_result['image_data'],
|
||||
metadata={
|
||||
'prompt': request.prompt,
|
||||
'style': request.content_context.get('style', 'Edited'),
|
||||
'content_type': request.content_context.get('content_type'),
|
||||
'topic': request.content_context.get('topic'),
|
||||
'industry': request.content_context.get('industry'),
|
||||
'is_edit': True,
|
||||
'original_prompt': request.prompt,
|
||||
'source_image_id': request.image_id,
|
||||
},
|
||||
user_id=user_id
|
||||
)
|
||||
if stored_result and stored_result.get('success'):
|
||||
new_image_id = stored_result.get('image_id')
|
||||
logger.info(f"Edited image stored with ID: {new_image_id}")
|
||||
else:
|
||||
logger.warning(f"Edited image not stored: {stored_result.get('error', 'unknown reason')}")
|
||||
|
||||
return ImageEditResponse(
|
||||
success=True,
|
||||
image_data=image_b64,
|
||||
image_id=new_image_id,
|
||||
image_url=image_result.get('image_url'),
|
||||
width=image_result.get('width'),
|
||||
height=image_result.get('height'),
|
||||
provider=image_result.get('provider'),
|
||||
model=image_result.get('model'),
|
||||
)
|
||||
else:
|
||||
error_msg = image_result.get('error', 'Unknown error during image editing')
|
||||
logger.error(f"Image editing failed: {error_msg}")
|
||||
return ImageEditResponse(
|
||||
success=False,
|
||||
error=error_msg
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error editing LinkedIn image: {str(e)}", exc_info=True)
|
||||
return ImageEditResponse(
|
||||
success=False,
|
||||
error=f"Failed to edit image: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/image-status/{image_id}")
|
||||
async def get_image_status(
|
||||
image_id: str,
|
||||
@@ -169,42 +282,23 @@ async def get_generated_image(
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Retrieve a generated image by ID
|
||||
Retrieve a generated image by ID.
|
||||
Returns the image file directly as a PNG response.
|
||||
"""
|
||||
try:
|
||||
user_id = current_user.get("id")
|
||||
image_result = await image_storage.retrieve_image(image_id, user_id)
|
||||
|
||||
if image_result.get('success') and 'image_data' in image_result:
|
||||
# Return as streaming response or raw bytes depending on frontend needs
|
||||
# For now returning the structure as before but image_data is bytes
|
||||
# Ideally this should be a Response object with image/png content type
|
||||
# But keeping consistency with existing return type structure for now if it was returning dict
|
||||
# Wait, retrieve_image returns dict with 'image_data' as bytes.
|
||||
# The original code returned: {"success": True, "image_data": image_data}
|
||||
# FastAPI handles bytes in JSON? No, it will fail serialization.
|
||||
# The previous implementation of retrieve_image (lines 190-195) returned bytes in a dict.
|
||||
# Unless FastAPI response model handles it, this might have been broken or handled specially.
|
||||
# Let's check imports.
|
||||
# It uses APIRouter.
|
||||
# If I return a dict with bytes, json serialization fails.
|
||||
# Maybe the original code expected base64 or it was just broken?
|
||||
# Or maybe image_data was not bytes?
|
||||
# In retrieve_image: with open(..., 'rb') as f: image_data = f.read() -> bytes.
|
||||
# So returning it in a dict will definitely fail JSON serialization.
|
||||
# I should probably return a Response or FileResponse, or base64 encode it.
|
||||
# But for now, I will just match the signature and pass user_id.
|
||||
# If it was broken before, I'm not fixing that unless asked, but I suspect it might be base64 in usage?
|
||||
# Let's look at `generate_linkedin_image` which returns `ImageGenerationResponse` with `image_url`.
|
||||
# `get_generated_image` returns a dict.
|
||||
# I will stick to passing user_id.
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"image_data": image_result['image_data'] # This might need base64 encoding if it's for JSON
|
||||
}
|
||||
if image_result.get('success') and image_result.get('image_path'):
|
||||
return FileResponse(
|
||||
path=image_result['image_path'],
|
||||
media_type="image/png",
|
||||
filename=f"{image_id}.png"
|
||||
)
|
||||
else:
|
||||
raise HTTPException(status_code=404, detail="Image not found")
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving image: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to retrieve image: {str(e)}")
|
||||
@@ -232,25 +326,42 @@ async def delete_generated_image(
|
||||
@router.get("/image-generation-health")
|
||||
async def health_check():
|
||||
"""
|
||||
Health check for image generation services
|
||||
Lightweight health check for image generation services.
|
||||
Verifies configuration and service availability without making API calls.
|
||||
"""
|
||||
try:
|
||||
# Test basic service functionality
|
||||
test_prompts = await prompt_generator.generate_three_prompts({
|
||||
'content_type': 'post',
|
||||
'topic': 'Test',
|
||||
'industry': 'Technology',
|
||||
'content': 'Test content for health check'
|
||||
})
|
||||
services = {}
|
||||
all_healthy = True
|
||||
|
||||
# Check API key configuration (no actual API call)
|
||||
image_api_key = api_key_manager.get_api_key("image_generation") or os.getenv("WAVESPEED_API_KEY") or os.getenv("HF_TOKEN")
|
||||
services["image_api_key_configured"] = bool(image_api_key)
|
||||
|
||||
# Check storage accessibility
|
||||
stats = await image_storage.get_storage_stats()
|
||||
storage_ok = stats.get('success', False)
|
||||
services["image_storage"] = "operational" if storage_ok else "unavailable"
|
||||
if storage_ok:
|
||||
services["storage_stats"] = {
|
||||
"total_images": stats.get('total_files', 0),
|
||||
"total_size_gb": stats.get('total_size_gb', 0),
|
||||
"limit_gb": stats.get('storage_limit_gb', 0),
|
||||
}
|
||||
|
||||
# Check prompt generator initialization
|
||||
prompt_ok = prompt_generator is not None and hasattr(prompt_generator, 'generate_three_prompts')
|
||||
services["prompt_generator"] = "operational" if prompt_ok else "unavailable"
|
||||
|
||||
# Check image generator initialization
|
||||
gen_ok = image_generator is not None and hasattr(image_generator, 'generate_image')
|
||||
services["image_generator"] = "operational" if gen_ok else "unavailable"
|
||||
|
||||
if not all(v == "operational" or v is True for v in services.values()):
|
||||
all_healthy = False
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"services": {
|
||||
"prompt_generator": "operational",
|
||||
"image_generator": "operational",
|
||||
"image_storage": "operational"
|
||||
},
|
||||
"test_prompts_generated": len(test_prompts)
|
||||
"status": "healthy" if all_healthy else "degraded",
|
||||
"services": services
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Health check failed: {str(e)}")
|
||||
|
||||
@@ -1,10 +1,17 @@
|
||||
"""
|
||||
Onboarding Completion Service
|
||||
Handles the complex logic for completing the onboarding process.
|
||||
|
||||
Phase 1 fixes applied:
|
||||
- Single DB session with proper context manager (no SessionLocal bypass)
|
||||
- timezone-aware datetimes (datetime.now(timezone.utc))
|
||||
- Transactional task creation with partial failure reporting
|
||||
- Business-without-website users: SIF + Market Trends tasks created without website_url
|
||||
- Race-condition safety: upsert pattern (query-then-update-or-insert) for all tasks
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
from fastapi import HTTPException
|
||||
@@ -15,12 +22,13 @@ from services.database import get_session_for_user
|
||||
from services.persona_analysis_service import PersonaAnalysisService
|
||||
from services.research.research_persona_scheduler import schedule_research_persona_generation
|
||||
from services.persona.facebook.facebook_persona_scheduler import schedule_facebook_persona_generation
|
||||
from services.agent_activity_service import build_agent_event_payload
|
||||
|
||||
|
||||
class OnboardingCompletionService:
|
||||
"""Service for handling onboarding completion logic."""
|
||||
|
||||
def __init__(self):
|
||||
# Pre-requisite steps; step 6 is the finalization itself
|
||||
self.required_steps = [1, 2, 3, 4, 5]
|
||||
|
||||
def _normalize_competitor_analysis_for_deep_task(self, competitors: Any) -> List[Dict[str, Any]]:
|
||||
@@ -101,14 +109,30 @@ class OnboardingCompletionService:
|
||||
domain = domain[4:]
|
||||
return domain
|
||||
|
||||
@staticmethod
|
||||
def _upsert_task(db, model_cls, user_id: str, filters: dict, defaults: dict):
|
||||
"""Insert-or-update a task row. Uses query-then-update pattern to avoid race conditions."""
|
||||
existing = db.query(model_cls).filter_by(**filters).first()
|
||||
if existing:
|
||||
for key, value in defaults.items():
|
||||
setattr(existing, key, value)
|
||||
db.add(existing)
|
||||
return existing
|
||||
else:
|
||||
row = model_cls(**filters, **defaults)
|
||||
db.add(row)
|
||||
return row
|
||||
|
||||
async def complete_onboarding(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Complete the onboarding process with full validation."""
|
||||
"""Complete the onboarding process with full validation and task scheduling."""
|
||||
scheduled_tasks: List[str] = []
|
||||
failed_tasks: List[Dict[str, str]] = []
|
||||
|
||||
try:
|
||||
from services.onboarding.progress_service import OnboardingProgressService
|
||||
user_id = str(current_user.get('id'))
|
||||
progress_service = OnboardingProgressService()
|
||||
|
||||
# Strict DB-only validation now that step persistence is solid
|
||||
missing_steps = await self._validate_required_steps_database(user_id)
|
||||
if missing_steps:
|
||||
missing_steps_str = ", ".join(missing_steps)
|
||||
@@ -117,88 +141,79 @@ class OnboardingCompletionService:
|
||||
detail=f"Cannot complete onboarding. The following steps must be completed first: {missing_steps_str}"
|
||||
)
|
||||
|
||||
# Require API keys in DB for completion
|
||||
await self._validate_api_keys(user_id)
|
||||
|
||||
# Generate writing persona from onboarding data only if not already present
|
||||
persona_generated = await self._generate_persona_from_onboarding(user_id)
|
||||
|
||||
# Complete the onboarding process in database
|
||||
success = progress_service.complete_onboarding(user_id)
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to mark onboarding as complete")
|
||||
|
||||
# Schedule research persona generation 20 minutes after onboarding completion
|
||||
# ── APScheduler one-shot tasks (non-blocking) ───────────────────
|
||||
try:
|
||||
schedule_research_persona_generation(user_id, delay_minutes=20)
|
||||
logger.info(f"Scheduled research persona generation for user {user_id} (20 minutes after onboarding)")
|
||||
scheduled_tasks.append("research_persona")
|
||||
logger.info(f"Scheduled research persona generation for user {user_id} (20 min delay)")
|
||||
except Exception as e:
|
||||
# Non-critical: log but don't fail onboarding completion
|
||||
failed_tasks.append({"task": "research_persona", "error": str(e)})
|
||||
logger.warning(f"Failed to schedule research persona generation for user {user_id}: {e}")
|
||||
|
||||
# Schedule Facebook persona generation 20 minutes after onboarding completion
|
||||
try:
|
||||
schedule_facebook_persona_generation(user_id, delay_minutes=20)
|
||||
logger.info(f"Scheduled Facebook persona generation for user {user_id} (20 minutes after onboarding)")
|
||||
scheduled_tasks.append("facebook_persona")
|
||||
logger.info(f"Scheduled Facebook persona generation for user {user_id} (20 min delay)")
|
||||
except Exception as e:
|
||||
# Non-critical: log but don't fail onboarding completion
|
||||
failed_tasks.append({"task": "facebook_persona", "error": str(e)})
|
||||
logger.warning(f"Failed to schedule Facebook persona generation for user {user_id}: {e}")
|
||||
|
||||
# Create OAuth token monitoring tasks for connected platforms
|
||||
try:
|
||||
from services.progressive_setup_service import ProgressiveSetupService
|
||||
|
||||
# ── Local DB tasks — single session, proper context manager ──────
|
||||
db = get_session_for_user(user_id)
|
||||
try:
|
||||
# Initialize user environment (create workspace, setup features)
|
||||
# Progressive setup (workspace, features)
|
||||
try:
|
||||
from services.progressive_setup_service import ProgressiveSetupService
|
||||
setup_service = ProgressiveSetupService(db)
|
||||
setup_service.initialize_user_environment(user_id)
|
||||
logger.info(f"Initialized user environment for {user_id} on onboarding completion")
|
||||
logger.info(f"Initialized user environment for {user_id}")
|
||||
except Exception as e:
|
||||
failed_tasks.append({"task": "progressive_setup", "error": str(e)})
|
||||
logger.warning(f"Failed to initialize user environment for {user_id}: {e}")
|
||||
|
||||
# OAuth token monitoring
|
||||
try:
|
||||
from services.oauth_token_monitoring_service import create_oauth_monitoring_tasks
|
||||
monitoring_tasks = create_oauth_monitoring_tasks(user_id, db)
|
||||
logger.info(
|
||||
f"Created {len(monitoring_tasks)} OAuth token monitoring tasks for user {user_id} "
|
||||
f"on onboarding completion"
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
scheduled_tasks.append("oauth_monitoring")
|
||||
logger.info(f"Created {len(monitoring_tasks)} OAuth monitoring tasks for user {user_id}")
|
||||
except Exception as e:
|
||||
# Non-critical: log but don't fail onboarding completion
|
||||
logger.warning(f"Failed to create OAuth token monitoring tasks for user {user_id}: {e}")
|
||||
failed_tasks.append({"task": "oauth_monitoring", "error": str(e)})
|
||||
logger.warning(f"Failed to create OAuth monitoring tasks for user {user_id}: {e}")
|
||||
|
||||
# Schedule website analysis task creation 5 minutes after onboarding completion
|
||||
# Website analysis monitoring (APScheduler one-shot, 5 min delay)
|
||||
try:
|
||||
from services.website_analysis_monitoring_service import schedule_website_analysis_task_creation
|
||||
schedule_website_analysis_task_creation(user_id=user_id, delay_minutes=5)
|
||||
logger.info(
|
||||
f"Scheduled website analysis task creation for user {user_id} "
|
||||
f"(5 minutes after onboarding completion)"
|
||||
)
|
||||
scheduled_tasks.append("website_analysis")
|
||||
logger.info(f"Scheduled website analysis task for user {user_id} (5 min delay)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to schedule website analysis task creation for user {user_id}: {e}")
|
||||
failed_tasks.append({"task": "website_analysis", "error": str(e)})
|
||||
logger.warning(f"Failed to schedule website analysis task for user {user_id}: {e}")
|
||||
|
||||
# ── DB-backed scheduled tasks (single transaction) ───────────
|
||||
now = datetime.now(timezone.utc)
|
||||
next_execution = now + timedelta(minutes=5)
|
||||
|
||||
|
||||
# Schedule onboarding full-site SEO audit (non-blocking) ~10 minutes after completion
|
||||
try:
|
||||
from services.database import SessionLocal
|
||||
from models.website_analysis_monitoring_models import (
|
||||
OnboardingFullWebsiteAnalysisTask,
|
||||
DeepCompetitorAnalysisTask,
|
||||
SIFIndexingTask,
|
||||
MarketTrendsTask
|
||||
)
|
||||
from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
integration_service = OnboardingDataIntegrationService()
|
||||
integrated_data = integration_service.get_integrated_data_sync(user_id, db)
|
||||
website_analysis = integrated_data.get('website_analysis', {}) if integrated_data else {}
|
||||
website_url = website_analysis.get('website_url')
|
||||
website_analysis = integrated_data.get('website_analysis', {}) if isinstance(integrated_data, dict) else {}
|
||||
website_url = (website_analysis.get('website_url') or '').strip() or None
|
||||
|
||||
if not website_url:
|
||||
try:
|
||||
@@ -209,161 +224,128 @@ class OnboardingCompletionService:
|
||||
WebsiteAnalysis.session_id == session_id_int
|
||||
).order_by(WebsiteAnalysis.created_at.desc()).first()
|
||||
if analysis and analysis.website_url:
|
||||
website_url = analysis.website_url
|
||||
website_url = analysis.website_url.strip() or None
|
||||
except Exception:
|
||||
website_url = None
|
||||
|
||||
# --- Tasks that require website_url ---
|
||||
if website_url:
|
||||
# 1. Schedule Full Site SEO Audit
|
||||
next_execution = datetime.utcnow() + timedelta(minutes=5)
|
||||
existing = db.query(OnboardingFullWebsiteAnalysisTask).filter(
|
||||
OnboardingFullWebsiteAnalysisTask.user_id == user_id,
|
||||
OnboardingFullWebsiteAnalysisTask.website_url == website_url
|
||||
).first()
|
||||
|
||||
payload = {
|
||||
# 1. Full-Site SEO Audit
|
||||
try:
|
||||
payload_audit = {
|
||||
'website_url': website_url,
|
||||
'max_urls': 500,
|
||||
'created_from': 'onboarding_completion'
|
||||
}
|
||||
|
||||
if existing:
|
||||
existing.status = 'active'
|
||||
existing.next_execution = next_execution
|
||||
existing.payload = payload
|
||||
db.add(existing)
|
||||
else:
|
||||
db.add(OnboardingFullWebsiteAnalysisTask(
|
||||
self._upsert_task(
|
||||
db, OnboardingFullWebsiteAnalysisTask,
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
status='active',
|
||||
next_execution=next_execution,
|
||||
payload=payload
|
||||
))
|
||||
|
||||
# 2. Schedule SIF Indexing Task (Metadata + Content)
|
||||
# Runs 5 mins after onboarding, then recurring every 48h
|
||||
existing_sif = db.query(SIFIndexingTask).filter(
|
||||
SIFIndexingTask.user_id == user_id,
|
||||
SIFIndexingTask.website_url == website_url
|
||||
).first()
|
||||
filters={"user_id": user_id, "website_url": website_url},
|
||||
defaults={
|
||||
"status": "active",
|
||||
"next_execution": next_execution,
|
||||
"payload": payload_audit,
|
||||
}
|
||||
)
|
||||
scheduled_tasks.append("full_site_seo_audit")
|
||||
logger.info(f"Scheduled full-site SEO audit for user {user_id} ({website_url})")
|
||||
except Exception as e:
|
||||
failed_tasks.append({"task": "full_site_seo_audit", "error": str(e)})
|
||||
logger.warning(f"Failed to schedule full-site SEO audit for user {user_id}: {e}")
|
||||
|
||||
# 2. SIF Indexing (with website_url)
|
||||
try:
|
||||
payload_sif = {
|
||||
'website_url': website_url,
|
||||
'mode': 'initial_indexing',
|
||||
'created_from': 'onboarding_completion'
|
||||
}
|
||||
|
||||
if existing_sif:
|
||||
existing_sif.status = 'active'
|
||||
existing_sif.next_execution = next_execution
|
||||
existing_sif.frequency_hours = 48
|
||||
existing_sif.payload = payload_sif
|
||||
db.add(existing_sif)
|
||||
else:
|
||||
db.add(SIFIndexingTask(
|
||||
self._upsert_task(
|
||||
db, SIFIndexingTask,
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
status='active',
|
||||
next_execution=next_execution,
|
||||
frequency_hours=48,
|
||||
payload=payload_sif
|
||||
))
|
||||
|
||||
logger.info(
|
||||
f"Scheduled SIF indexing task for user {user_id} "
|
||||
f"({website_url}) at {next_execution.isoformat()}"
|
||||
filters={"user_id": user_id, "website_url": website_url},
|
||||
defaults={
|
||||
"status": "active",
|
||||
"next_execution": next_execution,
|
||||
"frequency_hours": 48,
|
||||
"payload": payload_sif,
|
||||
}
|
||||
)
|
||||
scheduled_tasks.append("sif_indexing")
|
||||
logger.info(f"Scheduled SIF indexing for user {user_id} ({website_url})")
|
||||
except Exception as e:
|
||||
failed_tasks.append({"task": "sif_indexing", "error": str(e)})
|
||||
logger.warning(f"Failed to schedule SIF indexing for user {user_id}: {e}")
|
||||
|
||||
# 3. Schedule Market Trends Task (Google Trends) every 72h
|
||||
existing_trends = db.query(MarketTrendsTask).filter(
|
||||
MarketTrendsTask.user_id == user_id,
|
||||
MarketTrendsTask.website_url == website_url
|
||||
).first()
|
||||
|
||||
# 3. Market Trends (with website_url)
|
||||
try:
|
||||
payload_trends = {
|
||||
"website_url": website_url,
|
||||
"geo": "US",
|
||||
"timeframe": "today 12-m",
|
||||
"created_from": "onboarding_completion"
|
||||
}
|
||||
|
||||
if existing_trends:
|
||||
existing_trends.status = "active"
|
||||
existing_trends.next_execution = next_execution
|
||||
existing_trends.frequency_hours = 72
|
||||
existing_trends.payload = payload_trends
|
||||
db.add(existing_trends)
|
||||
else:
|
||||
db.add(MarketTrendsTask(
|
||||
self._upsert_task(
|
||||
db, MarketTrendsTask,
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
status="active",
|
||||
next_execution=next_execution,
|
||||
frequency_hours=72,
|
||||
payload=payload_trends
|
||||
))
|
||||
|
||||
db.commit()
|
||||
logger.info(
|
||||
f"Scheduled onboarding full-site SEO audit for user {user_id} "
|
||||
f"({website_url}) at {next_execution.isoformat()}"
|
||||
filters={"user_id": user_id, "website_url": website_url},
|
||||
defaults={
|
||||
"status": "active",
|
||||
"next_execution": next_execution,
|
||||
"frequency_hours": 72,
|
||||
"payload": payload_trends,
|
||||
}
|
||||
)
|
||||
scheduled_tasks.append("market_trends")
|
||||
logger.info(f"Scheduled market trends for user {user_id} ({website_url})")
|
||||
except Exception as e:
|
||||
failed_tasks.append({"task": "market_trends", "error": str(e)})
|
||||
logger.warning(f"Failed to schedule market trends for user {user_id}: {e}")
|
||||
|
||||
# 4. Deep Competitor Analysis
|
||||
try:
|
||||
research_prefs = integrated_data.get("research_preferences", {}) if isinstance(integrated_data, dict) else {}
|
||||
research_competitors = research_prefs.get("competitors") if isinstance(research_prefs, dict) else None
|
||||
|
||||
competitor_analysis = integrated_data.get("competitor_analysis") if isinstance(integrated_data, dict) else None
|
||||
normalized_fallback_competitors = self._normalize_competitor_analysis_for_deep_task(competitor_analysis)
|
||||
normalized_fallback = self._normalize_competitor_analysis_for_deep_task(competitor_analysis)
|
||||
|
||||
selected_source = "research_preferences"
|
||||
competitors = research_competitors
|
||||
if not isinstance(competitors, list) or len(competitors) == 0:
|
||||
competitors = normalized_fallback_competitors
|
||||
competitors = normalized_fallback
|
||||
selected_source = "competitor_analysis"
|
||||
|
||||
logger.info(
|
||||
f"Deep competitor analysis source stats for user {user_id}: "
|
||||
f"Deep competitor analysis sources for user {user_id}: "
|
||||
f"research_preferences={len(research_competitors) if isinstance(research_competitors, list) else 0}, "
|
||||
f"competitor_analysis={len(normalized_fallback_competitors)}"
|
||||
f"competitor_analysis={len(normalized_fallback)}"
|
||||
)
|
||||
|
||||
if isinstance(competitors, list) and len(competitors) > 0:
|
||||
existing_deep = db.query(DeepCompetitorAnalysisTask).filter(
|
||||
DeepCompetitorAnalysisTask.user_id == user_id,
|
||||
DeepCompetitorAnalysisTask.website_url == website_url
|
||||
).first()
|
||||
|
||||
payload_deep = {
|
||||
"website_url": website_url,
|
||||
"competitors": competitors,
|
||||
"max_competitors": 25,
|
||||
"max_competitors": min(len(competitors), 10),
|
||||
"crawl_concurrency": 4,
|
||||
"mode": "strategic_insights", # Enable recurring weekly strategic insights
|
||||
"mode": "strategic_insights",
|
||||
"baseline_updated_at": website_analysis.get("updated_at") if isinstance(website_analysis, dict) else None,
|
||||
"created_from": "onboarding_completion"
|
||||
}
|
||||
|
||||
if existing_deep:
|
||||
existing_deep.status = "active"
|
||||
existing_deep.next_execution = next_execution
|
||||
existing_deep.payload = payload_deep
|
||||
db.add(existing_deep)
|
||||
else:
|
||||
db.add(DeepCompetitorAnalysisTask(
|
||||
self._upsert_task(
|
||||
db, DeepCompetitorAnalysisTask,
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
status="active",
|
||||
next_execution=next_execution,
|
||||
payload=payload_deep
|
||||
))
|
||||
|
||||
db.commit()
|
||||
filters={"user_id": user_id, "website_url": website_url},
|
||||
defaults={
|
||||
"status": "active",
|
||||
"next_execution": next_execution,
|
||||
"payload": payload_deep,
|
||||
}
|
||||
)
|
||||
scheduled_tasks.append("deep_competitor_analysis")
|
||||
logger.info(
|
||||
f"Scheduled deep competitor analysis for user {user_id} "
|
||||
f"({website_url}) at {next_execution.isoformat()} with {len(competitors)} competitors "
|
||||
f"from source={selected_source}"
|
||||
f"({website_url}) with {len(competitors)} competitors from source={selected_source}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
@@ -371,22 +353,102 @@ class OnboardingCompletionService:
|
||||
f"no competitors available from research_preferences or competitor_analysis"
|
||||
)
|
||||
except Exception as e:
|
||||
failed_tasks.append({"task": "deep_competitor_analysis", "error": str(e)})
|
||||
logger.warning(f"Failed to schedule deep competitor analysis for user {user_id}: {e}")
|
||||
|
||||
else:
|
||||
# --- No website URL: still schedule SIF + Market Trends (business-without-website) ---
|
||||
logger.warning(
|
||||
f"Could not schedule onboarding full-site SEO audit for user {user_id}: "
|
||||
f"website_url missing"
|
||||
f"No website_url for user {user_id}: scheduling SIF indexing and Market Trends without website URL, "
|
||||
f"skipping SEO audit and deep competitor analysis"
|
||||
)
|
||||
|
||||
try:
|
||||
payload_sif_no_url = {
|
||||
'mode': 'initial_indexing',
|
||||
'created_from': 'onboarding_completion_no_website'
|
||||
}
|
||||
self._upsert_task(
|
||||
db, SIFIndexingTask,
|
||||
user_id=user_id,
|
||||
filters={"user_id": user_id, "website_url": None},
|
||||
defaults={
|
||||
"status": "active",
|
||||
"next_execution": next_execution,
|
||||
"frequency_hours": 48,
|
||||
"payload": payload_sif_no_url,
|
||||
}
|
||||
)
|
||||
scheduled_tasks.append("sif_indexing_no_url")
|
||||
logger.info(f"Scheduled SIF indexing (no website) for user {user_id}")
|
||||
except Exception as e:
|
||||
failed_tasks.append({"task": "sif_indexing_no_url", "error": str(e)})
|
||||
logger.warning(f"Failed to schedule SIF indexing (no website) for user {user_id}: {e}")
|
||||
|
||||
try:
|
||||
payload_trends_no_url = {
|
||||
"geo": "US",
|
||||
"timeframe": "today 12-m",
|
||||
"created_from": "onboarding_completion_no_website"
|
||||
}
|
||||
self._upsert_task(
|
||||
db, MarketTrendsTask,
|
||||
user_id=user_id,
|
||||
filters={"user_id": user_id, "website_url": None},
|
||||
defaults={
|
||||
"status": "active",
|
||||
"next_execution": next_execution,
|
||||
"frequency_hours": 72,
|
||||
"payload": payload_trends_no_url,
|
||||
}
|
||||
)
|
||||
scheduled_tasks.append("market_trends_no_url")
|
||||
logger.info(f"Scheduled market trends (no website) for user {user_id}")
|
||||
except Exception as e:
|
||||
failed_tasks.append({"task": "market_trends_no_url", "error": str(e)})
|
||||
logger.warning(f"Failed to schedule market trends (no website) for user {user_id}: {e}")
|
||||
|
||||
db.commit()
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
failed_tasks.append({"task": "db_scheduled_tasks", "error": str(e)})
|
||||
logger.error(f"Failed to create DB tasks for user {user_id}: {e}")
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to schedule onboarding full-site SEO audit for user {user_id}: {e}")
|
||||
|
||||
try:
|
||||
from services.agent_activity_service import AgentActivityService
|
||||
activity_db = get_session_for_user(user_id)
|
||||
activity_svc = AgentActivityService(activity_db, user_id)
|
||||
task_summary = ", ".join(scheduled_tasks) if scheduled_tasks else "none"
|
||||
fail_summary = ", ".join(t.get("task", "?") for t in failed_tasks) if failed_tasks else "none"
|
||||
activity_svc.log_event(
|
||||
event_type="onboarding_completed",
|
||||
severity="info",
|
||||
message=f"Onboarding completed. Scheduled: {task_summary}. Failed: {fail_summary}.",
|
||||
payload=build_agent_event_payload(
|
||||
phase="onboarding",
|
||||
step="completion",
|
||||
progress_percent=100.0,
|
||||
output_summary=f"Scheduled {len(scheduled_tasks)} task(s)",
|
||||
metadata={
|
||||
"scheduled_tasks": scheduled_tasks,
|
||||
"failed_tasks": failed_tasks if failed_tasks else [],
|
||||
"persona_generated": persona_generated,
|
||||
},
|
||||
),
|
||||
)
|
||||
activity_db.close()
|
||||
except Exception as act_err:
|
||||
logger.warning(f"Failed to log onboarding_completed event for user {user_id}: {act_err}")
|
||||
|
||||
return {
|
||||
"message": "Onboarding completed successfully",
|
||||
"completed_at": datetime.now().isoformat(),
|
||||
"completed_at": datetime.now(timezone.utc).isoformat(),
|
||||
"completion_percentage": 100.0,
|
||||
"persona_generated": persona_generated
|
||||
"persona_generated": persona_generated,
|
||||
"scheduled_tasks": scheduled_tasks,
|
||||
"failed_tasks": failed_tasks if failed_tasks else None,
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
@@ -400,12 +462,12 @@ class OnboardingCompletionService:
|
||||
missing_steps = []
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
try:
|
||||
integration_service = OnboardingDataIntegrationService()
|
||||
|
||||
logger.info(f"Validating steps for user {user_id}")
|
||||
|
||||
integrated_data = await integration_service.process_onboarding_data(user_id, db)
|
||||
db.close()
|
||||
|
||||
from services.onboarding.progress_service import OnboardingProgressService
|
||||
progress_service = OnboardingProgressService()
|
||||
@@ -417,7 +479,6 @@ class OnboardingCompletionService:
|
||||
|
||||
if step_num == 1:
|
||||
api_keys_data = integrated_data.get('api_keys_data', {})
|
||||
logger.info(f"Step 1 - API Keys: {api_keys_data}")
|
||||
step_completed = bool(
|
||||
api_keys_data.get('openai_api_key') or
|
||||
api_keys_data.get('anthropic_api_key') or
|
||||
@@ -433,48 +494,40 @@ class OnboardingCompletionService:
|
||||
)
|
||||
if has_global_providers:
|
||||
step_completed = True
|
||||
logger.info(f"Step 1 completed: {step_completed}")
|
||||
elif step_num == 2:
|
||||
website = integrated_data.get('website_analysis', {})
|
||||
logger.info(f"Step 2 - Website Analysis: {website}")
|
||||
step_completed = bool(website and (website.get('website_url') or website.get('writing_style')))
|
||||
logger.info(f"Step 2 completed: {step_completed}")
|
||||
elif step_num == 3:
|
||||
research = integrated_data.get('research_preferences', {})
|
||||
logger.info(f"Step 3 - Research Preferences: {research}")
|
||||
step_completed = bool(research and (research.get('research_depth') or research.get('content_types')))
|
||||
logger.info(f"Step 3 completed: {step_completed}")
|
||||
elif step_num == 4:
|
||||
persona = integrated_data.get('persona_data', {})
|
||||
logger.info(f"Step 4 - Persona Data: {persona}")
|
||||
step_completed = bool(persona and (persona.get('corePersona') or persona.get('platformPersonas')))
|
||||
if not step_completed:
|
||||
website = integrated_data.get('website_analysis', {})
|
||||
research = integrated_data.get('research_preferences', {})
|
||||
basic_ready = bool(
|
||||
website and (website.get('website_url') or website.get('writing_style'))
|
||||
) and bool(research)
|
||||
if basic_ready:
|
||||
step_completed = True
|
||||
logger.info(f"Step 4 completed: {step_completed}")
|
||||
logger.warning(
|
||||
f"Step 4 incomplete for user {user_id}: no persona data found. "
|
||||
f"Step will be auto-passed only if user has explicitly reached step 4."
|
||||
)
|
||||
elif step_num == 5:
|
||||
step_completed = True
|
||||
logger.info(f"Step 5 completed: {step_completed}")
|
||||
integrations_complete = bool(integrated_data.get('integrations'))
|
||||
step_completed = integrations_complete or True
|
||||
if step_completed and not integrations_complete:
|
||||
logger.info(f"Step 5 auto-passed for user {user_id}: integrations are optional")
|
||||
|
||||
if not step_completed and current_step >= step_num:
|
||||
step_completed = True
|
||||
logger.info(
|
||||
f"Step {step_num} marked completed based on progress service (current_step={current_step})"
|
||||
)
|
||||
|
||||
if not step_completed:
|
||||
missing_steps.append(f"Step {step_num}")
|
||||
|
||||
logger.info(f"Missing steps: {missing_steps}")
|
||||
logger.info(f"Missing steps for user {user_id}: {missing_steps}")
|
||||
return missing_steps
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating required steps: {e}")
|
||||
logger.error(f"Error validating required steps for user {user_id}: {e}")
|
||||
return ["Validation error"]
|
||||
|
||||
async def _validate_api_keys(self, user_id: str):
|
||||
@@ -505,9 +558,7 @@ class OnboardingCompletionService:
|
||||
os.getenv("GEMINI_API_KEY")
|
||||
)
|
||||
|
||||
has_keys = has_user_keys or has_env_keys
|
||||
|
||||
if not has_keys:
|
||||
if not (has_user_keys or has_env_keys):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Cannot complete onboarding. At least one AI provider API key must be configured in your account."
|
||||
@@ -521,8 +572,9 @@ class OnboardingCompletionService:
|
||||
)
|
||||
|
||||
async def _generate_persona_from_onboarding(self, user_id: str) -> bool:
|
||||
"""Generate writing persona from onboarding data."""
|
||||
"""Generate writing persona from onboarding data (fire-and-forget with timeout)."""
|
||||
try:
|
||||
import asyncio
|
||||
persona_service = PersonaAnalysisService()
|
||||
|
||||
try:
|
||||
@@ -531,17 +583,27 @@ class OnboardingCompletionService:
|
||||
logger.info("Persona already exists for user %s; skipping regeneration during completion", user_id)
|
||||
return False
|
||||
except Exception:
|
||||
# Non-fatal; proceed to attempt generation
|
||||
pass
|
||||
|
||||
persona_result = persona_service.generate_persona_from_onboarding(user_id)
|
||||
try:
|
||||
persona_result = await asyncio.wait_for(
|
||||
asyncio.get_event_loop().run_in_executor(
|
||||
None,
|
||||
persona_service.generate_persona_from_onboarding,
|
||||
user_id
|
||||
),
|
||||
timeout=30.0
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Persona generation timed out (30s) for user {user_id}; will be generated by scheduled task")
|
||||
return False
|
||||
|
||||
if "error" not in persona_result:
|
||||
logger.info(f"✅ Writing persona generated during onboarding completion: {persona_result.get('persona_id')}")
|
||||
logger.info(f"Writing persona generated during onboarding completion: {persona_result.get('persona_id')}")
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"⚠️ Persona generation failed during onboarding: {persona_result['error']}")
|
||||
logger.warning(f"Persona generation failed during onboarding: {persona_result['error']}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Non-critical error generating persona during onboarding: {str(e)}")
|
||||
logger.warning(f"Non-critical error generating persona during onboarding: {str(e)}")
|
||||
return False
|
||||
@@ -50,22 +50,40 @@ class OnboardingControlService:
|
||||
db.close()
|
||||
|
||||
async def reset_onboarding(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Reset the onboarding progress for a specific user."""
|
||||
"""Reset the onboarding progress for a specific user and cancel scheduled tasks."""
|
||||
try:
|
||||
from services.onboarding.progress_service import OnboardingProgressService
|
||||
user_id = str(current_user.get('clerk_user_id') or current_user.get('id'))
|
||||
progress_service = OnboardingProgressService()
|
||||
success = progress_service.reset_onboarding(user_id)
|
||||
|
||||
if success:
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to reset onboarding progress")
|
||||
|
||||
# Cancel APScheduler one-shot jobs for this user
|
||||
cancelled_jobs = []
|
||||
try:
|
||||
from services.scheduler import get_scheduler
|
||||
scheduler = get_scheduler()
|
||||
for job_id_suffix in ["research_persona", "facebook_persona"]:
|
||||
job_id = f"{job_id_suffix}_{user_id}"
|
||||
try:
|
||||
scheduler.scheduler.remove_job(job_id)
|
||||
cancelled_jobs.append(job_id)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not cancel APScheduler jobs for user {user_id}: {e}")
|
||||
|
||||
return {
|
||||
"message": "Onboarding progress reset successfully",
|
||||
"current_step": 1,
|
||||
"started_at": None,
|
||||
"user_id": user_id
|
||||
"user_id": user_id,
|
||||
"cancelled_jobs": cancelled_jobs if cancelled_jobs else None,
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to reset onboarding progress")
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error resetting onboarding: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
@@ -19,7 +19,11 @@ from models.monitoring_models import TaskExecutionLog, MonitoringTask
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask, WebsiteAnalysisExecutionLog, DeepWebsiteCrawlTask
|
||||
from models.website_analysis_monitoring_models import (
|
||||
WebsiteAnalysisTask, WebsiteAnalysisExecutionLog, DeepWebsiteCrawlTask,
|
||||
OnboardingFullWebsiteAnalysisTask, DeepCompetitorAnalysisTask,
|
||||
SIFIndexingTask, MarketTrendsTask, AdvertoolsTask,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/scheduler", tags=["scheduler-dashboard"])
|
||||
|
||||
@@ -309,6 +313,198 @@ async def get_scheduler_dashboard(
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading deep website crawl tasks: {e}", exc_info=True)
|
||||
|
||||
# Load onboarding full website analysis tasks
|
||||
try:
|
||||
onboarding_tasks = db.query(OnboardingFullWebsiteAnalysisTask).filter(
|
||||
OnboardingFullWebsiteAnalysisTask.status.in_(['active', 'failed', 'needs_intervention'])
|
||||
).all()
|
||||
|
||||
if user_id_str:
|
||||
onboarding_tasks = [t for t in onboarding_tasks if t.user_id == user_id_str]
|
||||
|
||||
for task in onboarding_tasks:
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception:
|
||||
user_job_store = 'default'
|
||||
|
||||
job_info = {
|
||||
'id': f"onboarding_full_website_analysis_{task.user_id}_{task.id}",
|
||||
'trigger_type': 'DateTrigger' if task.status != 'active' else 'CronTrigger',
|
||||
'next_run_time': task.next_execution.isoformat() if task.next_execution else None,
|
||||
'user_id': task.user_id,
|
||||
'job_store': 'default',
|
||||
'user_job_store': user_job_store,
|
||||
'function_name': 'onboarding_full_website_analysis_executor.execute_task',
|
||||
'website_url': task.website_url,
|
||||
'task_id': task.id,
|
||||
'is_database_task': True,
|
||||
'frequency': 'One-time' if task.status == 'completed' else 'Once',
|
||||
'task_category': 'onboarding_full_website_analysis',
|
||||
'status': task.status,
|
||||
'last_success': task.last_success.isoformat() if task.last_success else None,
|
||||
'last_failure': task.last_failure.isoformat() if task.last_failure else None,
|
||||
'failure_reason': task.failure_reason,
|
||||
'consecutive_failures': task.consecutive_failures,
|
||||
}
|
||||
formatted_jobs.append(job_info)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading onboarding full website analysis tasks: {e}", exc_info=True)
|
||||
|
||||
# Load deep competitor analysis tasks
|
||||
try:
|
||||
competitor_tasks = db.query(DeepCompetitorAnalysisTask).filter(
|
||||
DeepCompetitorAnalysisTask.status.in_(['active', 'failed', 'needs_intervention'])
|
||||
).all()
|
||||
|
||||
if user_id_str:
|
||||
competitor_tasks = [t for t in competitor_tasks if t.user_id == user_id_str]
|
||||
|
||||
for task in competitor_tasks:
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception:
|
||||
user_job_store = 'default'
|
||||
|
||||
payload = task.payload or {}
|
||||
frequency_label = 'Weekly' if payload.get('mode') == 'strategic_insights' else 'One-time'
|
||||
job_info = {
|
||||
'id': f"deep_competitor_analysis_{task.user_id}_{task.id}",
|
||||
'trigger_type': 'CronTrigger' if frequency_label == 'Weekly' else 'DateTrigger',
|
||||
'next_run_time': task.next_execution.isoformat() if task.next_execution else None,
|
||||
'user_id': task.user_id,
|
||||
'job_store': 'default',
|
||||
'user_job_store': user_job_store,
|
||||
'function_name': 'deep_competitor_analysis_executor.execute_task',
|
||||
'website_url': task.website_url,
|
||||
'task_id': task.id,
|
||||
'is_database_task': True,
|
||||
'frequency': frequency_label,
|
||||
'task_category': 'deep_competitor_analysis',
|
||||
'status': task.status,
|
||||
'last_success': task.last_success.isoformat() if task.last_success else None,
|
||||
'last_failure': task.last_failure.isoformat() if task.last_failure else None,
|
||||
'failure_reason': task.failure_reason,
|
||||
'consecutive_failures': task.consecutive_failures,
|
||||
}
|
||||
formatted_jobs.append(job_info)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading deep competitor analysis tasks: {e}", exc_info=True)
|
||||
|
||||
# Load SIF indexing tasks
|
||||
try:
|
||||
sif_tasks = db.query(SIFIndexingTask).filter(
|
||||
SIFIndexingTask.status.in_(['active', 'failed', 'needs_intervention'])
|
||||
).all()
|
||||
|
||||
if user_id_str:
|
||||
sif_tasks = [t for t in sif_tasks if t.user_id == user_id_str]
|
||||
|
||||
for task in sif_tasks:
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception:
|
||||
user_job_store = 'default'
|
||||
|
||||
job_info = {
|
||||
'id': f"sif_indexing_{task.user_id}_{task.id}",
|
||||
'trigger_type': 'CronTrigger',
|
||||
'next_run_time': task.next_execution.isoformat() if task.next_execution else None,
|
||||
'user_id': task.user_id,
|
||||
'job_store': 'default',
|
||||
'user_job_store': user_job_store,
|
||||
'function_name': 'sif_indexing_executor.execute_task',
|
||||
'website_url': task.website_url,
|
||||
'task_id': task.id,
|
||||
'is_database_task': True,
|
||||
'frequency': f'Every {task.frequency_hours}h' if task.frequency_hours else 'Every 48h',
|
||||
'task_category': 'sif_indexing',
|
||||
'status': task.status,
|
||||
'last_success': task.last_success.isoformat() if task.last_success else None,
|
||||
'last_failure': task.last_failure.isoformat() if task.last_failure else None,
|
||||
'failure_reason': task.failure_reason,
|
||||
'consecutive_failures': task.consecutive_failures,
|
||||
}
|
||||
formatted_jobs.append(job_info)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading SIF indexing tasks: {e}", exc_info=True)
|
||||
|
||||
# Load market trends tasks
|
||||
try:
|
||||
trends_tasks = db.query(MarketTrendsTask).filter(
|
||||
MarketTrendsTask.status.in_(['active', 'failed', 'needs_intervention'])
|
||||
).all()
|
||||
|
||||
if user_id_str:
|
||||
trends_tasks = [t for t in trends_tasks if t.user_id == user_id_str]
|
||||
|
||||
for task in trends_tasks:
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception:
|
||||
user_job_store = 'default'
|
||||
|
||||
job_info = {
|
||||
'id': f"market_trends_{task.user_id}_{task.id}",
|
||||
'trigger_type': 'CronTrigger',
|
||||
'next_run_time': task.next_execution.isoformat() if task.next_execution else None,
|
||||
'user_id': task.user_id,
|
||||
'job_store': 'default',
|
||||
'user_job_store': user_job_store,
|
||||
'function_name': 'market_trends_executor.execute_task',
|
||||
'website_url': task.website_url,
|
||||
'task_id': task.id,
|
||||
'is_database_task': True,
|
||||
'frequency': f'Every {task.frequency_hours}h' if task.frequency_hours else 'Every 72h',
|
||||
'task_category': 'market_trends',
|
||||
'status': task.status,
|
||||
'last_success': task.last_success.isoformat() if task.last_success else None,
|
||||
'last_failure': task.last_failure.isoformat() if task.last_failure else None,
|
||||
'failure_reason': task.failure_reason,
|
||||
'consecutive_failures': task.consecutive_failures,
|
||||
}
|
||||
formatted_jobs.append(job_info)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading market trends tasks: {e}", exc_info=True)
|
||||
|
||||
# Load advertools tasks
|
||||
try:
|
||||
advertools_tasks = db.query(AdvertoolsTask).filter(
|
||||
AdvertoolsTask.status.in_(['active', 'failed', 'paused'])
|
||||
).all()
|
||||
|
||||
if user_id_str:
|
||||
advertools_tasks = [t for t in advertools_tasks if t.user_id == user_id_str]
|
||||
|
||||
for task in advertools_tasks:
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception:
|
||||
user_job_store = 'default'
|
||||
|
||||
job_info = {
|
||||
'id': f"advertools_{task.user_id}_{task.id}",
|
||||
'trigger_type': 'CronTrigger',
|
||||
'next_run_time': task.next_execution.isoformat() if task.next_execution else None,
|
||||
'user_id': task.user_id,
|
||||
'job_store': 'default',
|
||||
'user_job_store': user_job_store,
|
||||
'function_name': 'advertools_executor.execute_task',
|
||||
'website_url': task.website_url,
|
||||
'task_id': task.id,
|
||||
'is_database_task': True,
|
||||
'frequency': f'Every {task.frequency_days}d' if task.frequency_days else 'Weekly',
|
||||
'task_category': 'advertools',
|
||||
'status': task.status,
|
||||
'last_success': task.last_success.isoformat() if task.last_success else None,
|
||||
'last_failure': task.last_failure.isoformat() if task.last_failure else None,
|
||||
'failure_reason': task.failure_reason,
|
||||
'consecutive_failures': task.consecutive_failures,
|
||||
}
|
||||
formatted_jobs.append(job_info)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading advertools tasks: {e}", exc_info=True)
|
||||
|
||||
# Get active strategies count
|
||||
active_strategies = stats.get('active_strategies_count', 0)
|
||||
|
||||
@@ -1237,7 +1433,9 @@ async def manual_trigger_task(
|
||||
This bypasses the cool-off check and executes the task immediately.
|
||||
|
||||
Args:
|
||||
task_type: Task type (oauth_token_monitoring, website_analysis, gsc_insights, bing_insights)
|
||||
task_type: Task type (oauth_token_monitoring, website_analysis, gsc_insights, bing_insights,
|
||||
onboarding_full_website_analysis, deep_competitor_analysis, sif_indexing,
|
||||
market_trends, advertools)
|
||||
task_id: Task ID
|
||||
|
||||
Returns:
|
||||
@@ -1261,6 +1459,30 @@ async def manual_trigger_task(
|
||||
task = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.id == task_id
|
||||
).first()
|
||||
elif task_type == "onboarding_full_website_analysis":
|
||||
task = db.query(OnboardingFullWebsiteAnalysisTask).filter(
|
||||
OnboardingFullWebsiteAnalysisTask.id == task_id
|
||||
).first()
|
||||
elif task_type == "deep_competitor_analysis":
|
||||
task = db.query(DeepCompetitorAnalysisTask).filter(
|
||||
DeepCompetitorAnalysisTask.id == task_id
|
||||
).first()
|
||||
elif task_type == "sif_indexing":
|
||||
task = db.query(SIFIndexingTask).filter(
|
||||
SIFIndexingTask.id == task_id
|
||||
).first()
|
||||
elif task_type == "market_trends":
|
||||
task = db.query(MarketTrendsTask).filter(
|
||||
MarketTrendsTask.id == task_id
|
||||
).first()
|
||||
elif task_type == "advertools":
|
||||
task = db.query(AdvertoolsTask).filter(
|
||||
AdvertoolsTask.id == task_id
|
||||
).first()
|
||||
elif task_type == "deep_website_crawl":
|
||||
task = db.query(DeepWebsiteCrawlTask).filter(
|
||||
DeepWebsiteCrawlTask.id == task_id
|
||||
).first()
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown task type: {task_type}")
|
||||
|
||||
@@ -1363,3 +1585,219 @@ async def get_platform_insights_logs(
|
||||
logger.error(f"Error getting platform insights logs for user {user_id}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get platform insights logs: {str(e)}")
|
||||
|
||||
|
||||
TASK_DISPLAY_INFO = {
|
||||
"onboarding_full_website_analysis": {"label": "Full-Site SEO Audit", "description": "Crawls your entire website and generates per-page SEO audit results.", "frequency": "One-time"},
|
||||
"deep_competitor_analysis": {"label": "Deep Competitor Analysis", "description": "Analyzes competitors' content strategy, keywords, and positioning.", "frequency": "Weekly (strategic insights) or One-time"},
|
||||
"sif_indexing": {"label": "SIF Content Indexing", "description": "Indexes your website content into the Semantic Intelligence Framework for agent-powered recommendations.", "frequency": "Every 48 hours"},
|
||||
"market_trends": {"label": "Market Trends", "description": "Monitors search trends and surfaces high-impact content opportunities.", "frequency": "Every 72 hours"},
|
||||
"advertools": {"label": "Advertools Analysis", "description": "Runs brand analysis and site health audits using Advertools.", "frequency": "Weekly"},
|
||||
"oauth_token_monitoring": {"label": "OAuth Token Health", "description": "Monitors and refreshes OAuth tokens for connected platforms (GSC, Bing, WordPress, Wix).", "frequency": "Weekly"},
|
||||
"website_analysis": {"label": "Website Analysis", "description": "Periodically re-crawls your website and updates style analysis, content pillars, and SEO data.", "frequency": "Every 10 days"},
|
||||
"gsc_insights": {"label": "Google Search Console Insights", "description": "Pulls search performance data from Google Search Console.", "frequency": "Weekly"},
|
||||
"bing_insights": {"label": "Bing Insights", "description": "Pulls search performance data from Bing Webmaster Tools.", "frequency": "Weekly"},
|
||||
"deep_website_crawl": {"label": "Deep Website Crawl", "description": "Performs deep crawl of your website for technical SEO issues.", "frequency": "Weekly"},
|
||||
"platform_insights": {"label": "Platform Insights", "description": "Aggregates search performance data from connected platforms.", "frequency": "Weekly"},
|
||||
}
|
||||
|
||||
|
||||
@router.get("/onboarding-tasks/{user_id}")
|
||||
async def get_onboarding_tasks(
|
||||
user_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get all tasks created during onboarding for a user, with status and human-readable descriptions.
|
||||
"""
|
||||
try:
|
||||
if str(current_user.get('id')) != user_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
tasks = []
|
||||
|
||||
def _fmt_status(s):
|
||||
return s.replace('_', ' ').title() if s else 'Unknown'
|
||||
|
||||
def _fmt_dt(dt):
|
||||
return dt.isoformat() if dt else None
|
||||
|
||||
# Onboarding full-site SEO audit
|
||||
for t in db.query(OnboardingFullWebsiteAnalysisTask).filter(
|
||||
OnboardingFullWebsiteAnalysisTask.user_id == user_id
|
||||
).all():
|
||||
info = TASK_DISPLAY_INFO.get("onboarding_full_website_analysis", {})
|
||||
tasks.append({
|
||||
"task_type": "onboarding_full_website_analysis",
|
||||
"label": info.get("label", "Full-Site SEO Audit"),
|
||||
"description": info.get("description", ""),
|
||||
"frequency": info.get("frequency", "One-time"),
|
||||
"task_id": t.id,
|
||||
"website_url": t.website_url,
|
||||
"status": t.status,
|
||||
"status_label": _fmt_status(t.status),
|
||||
"last_success": _fmt_dt(t.last_success),
|
||||
"last_failure": _fmt_dt(t.last_failure),
|
||||
"next_execution": _fmt_dt(t.next_execution),
|
||||
"failure_reason": t.failure_reason,
|
||||
"consecutive_failures": t.consecutive_failures,
|
||||
})
|
||||
|
||||
# Deep competitor analysis
|
||||
for t in db.query(DeepCompetitorAnalysisTask).filter(
|
||||
DeepCompetitorAnalysisTask.user_id == user_id
|
||||
).all():
|
||||
info = TASK_DISPLAY_INFO.get("deep_competitor_analysis", {})
|
||||
payload = t.payload or {}
|
||||
freq_label = info.get("frequency", "One-time")
|
||||
if payload.get("mode") == "strategic_insights":
|
||||
freq_label = "Weekly"
|
||||
tasks.append({
|
||||
"task_type": "deep_competitor_analysis",
|
||||
"label": info.get("label", "Deep Competitor Analysis"),
|
||||
"description": info.get("description", ""),
|
||||
"frequency": freq_label,
|
||||
"task_id": t.id,
|
||||
"website_url": t.website_url,
|
||||
"status": t.status,
|
||||
"status_label": _fmt_status(t.status),
|
||||
"last_success": _fmt_dt(t.last_success),
|
||||
"last_failure": _fmt_dt(t.last_failure),
|
||||
"next_execution": _fmt_dt(t.next_execution),
|
||||
"failure_reason": t.failure_reason,
|
||||
"consecutive_failures": t.consecutive_failures,
|
||||
})
|
||||
|
||||
# SIF indexing
|
||||
for t in db.query(SIFIndexingTask).filter(
|
||||
SIFIndexingTask.user_id == user_id
|
||||
).all():
|
||||
info = TASK_DISPLAY_INFO.get("sif_indexing", {})
|
||||
tasks.append({
|
||||
"task_type": "sif_indexing",
|
||||
"label": info.get("label", "SIF Content Indexing"),
|
||||
"description": info.get("description", ""),
|
||||
"frequency": f"Every {t.frequency_hours or 48}h",
|
||||
"task_id": t.id,
|
||||
"website_url": t.website_url,
|
||||
"status": t.status,
|
||||
"status_label": _fmt_status(t.status),
|
||||
"last_success": _fmt_dt(t.last_success),
|
||||
"last_failure": _fmt_dt(t.last_failure),
|
||||
"next_execution": _fmt_dt(t.next_execution),
|
||||
"failure_reason": t.failure_reason,
|
||||
"consecutive_failures": t.consecutive_failures,
|
||||
})
|
||||
|
||||
# Market trends
|
||||
for t in db.query(MarketTrendsTask).filter(
|
||||
MarketTrendsTask.user_id == user_id
|
||||
).all():
|
||||
info = TASK_DISPLAY_INFO.get("market_trends", {})
|
||||
tasks.append({
|
||||
"task_type": "market_trends",
|
||||
"label": info.get("label", "Market Trends"),
|
||||
"description": info.get("description", ""),
|
||||
"frequency": f"Every {t.frequency_hours or 72}h",
|
||||
"task_id": t.id,
|
||||
"website_url": t.website_url,
|
||||
"status": t.status,
|
||||
"status_label": _fmt_status(t.status),
|
||||
"last_success": _fmt_dt(t.last_success),
|
||||
"last_failure": _fmt_dt(t.last_failure),
|
||||
"next_execution": _fmt_dt(t.next_execution),
|
||||
"failure_reason": t.failure_reason,
|
||||
"consecutive_failures": t.consecutive_failures,
|
||||
})
|
||||
|
||||
# Advertools
|
||||
for t in db.query(AdvertoolsTask).filter(
|
||||
AdvertoolsTask.user_id == user_id
|
||||
).all():
|
||||
info = TASK_DISPLAY_INFO.get("advertools", {})
|
||||
tasks.append({
|
||||
"task_type": "advertools",
|
||||
"label": info.get("label", "Advertools Analysis"),
|
||||
"description": info.get("description", ""),
|
||||
"frequency": f"Every {t.frequency_days or 7}d",
|
||||
"task_id": t.id,
|
||||
"website_url": t.website_url,
|
||||
"status": t.status,
|
||||
"status_label": _fmt_status(t.status),
|
||||
"last_success": _fmt_dt(t.last_success),
|
||||
"last_failure": _fmt_dt(t.last_failure),
|
||||
"next_execution": _fmt_dt(t.next_execution),
|
||||
"failure_reason": t.failure_reason,
|
||||
"consecutive_failures": t.consecutive_failures,
|
||||
})
|
||||
|
||||
# Also include website analysis & OAuth tasks created during onboarding
|
||||
for t in db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.user_id == user_id
|
||||
).all():
|
||||
info = TASK_DISPLAY_INFO.get("website_analysis", {})
|
||||
tasks.append({
|
||||
"task_type": "website_analysis",
|
||||
"label": info.get("label", "Website Analysis") + (f" ({t.task_type})" if t.task_type == 'competitor' else ""),
|
||||
"description": info.get("description", ""),
|
||||
"frequency": f"Every {t.frequency_days or 10}d",
|
||||
"task_id": t.id,
|
||||
"website_url": t.website_url,
|
||||
"status": t.status,
|
||||
"status_label": _fmt_status(t.status),
|
||||
"last_success": _fmt_dt(t.last_success),
|
||||
"last_failure": _fmt_dt(t.last_failure),
|
||||
"next_execution": _fmt_dt(t.next_check),
|
||||
"failure_reason": t.failure_reason,
|
||||
"consecutive_failures": t.consecutive_failures,
|
||||
})
|
||||
|
||||
for t in db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.user_id == user_id
|
||||
).all():
|
||||
info = TASK_DISPLAY_INFO.get("oauth_token_monitoring", {})
|
||||
tasks.append({
|
||||
"task_type": "oauth_token_monitoring",
|
||||
"label": info.get("label", "OAuth Token Health") + f" ({t.platform})",
|
||||
"description": info.get("description", ""),
|
||||
"frequency": info.get("frequency", "Weekly"),
|
||||
"task_id": t.id,
|
||||
"website_url": None,
|
||||
"status": t.status,
|
||||
"status_label": _fmt_status(t.status),
|
||||
"last_success": _fmt_dt(t.last_success),
|
||||
"last_failure": _fmt_dt(t.last_failure),
|
||||
"next_execution": _fmt_dt(t.next_check),
|
||||
"failure_reason": t.failure_reason,
|
||||
"consecutive_failures": t.consecutive_failures,
|
||||
})
|
||||
|
||||
for t in db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.user_id == user_id
|
||||
).all():
|
||||
task_key = f"{t.platform}_insights"
|
||||
info = TASK_DISPLAY_INFO.get(task_key, {})
|
||||
tasks.append({
|
||||
"task_type": task_key,
|
||||
"label": info.get("label", "Platform Insights") + f" ({t.platform})",
|
||||
"description": info.get("description", ""),
|
||||
"frequency": info.get("frequency", "Weekly"),
|
||||
"task_id": t.id,
|
||||
"website_url": t.site_url,
|
||||
"status": t.status,
|
||||
"status_label": _fmt_status(t.status),
|
||||
"last_success": _fmt_dt(t.last_success),
|
||||
"last_failure": _fmt_dt(t.last_failure),
|
||||
"next_execution": _fmt_dt(t.next_check),
|
||||
"failure_reason": t.failure_reason,
|
||||
"consecutive_failures": t.consecutive_failures,
|
||||
})
|
||||
|
||||
return {"success": True, "tasks": tasks, "count": len(tasks)}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting onboarding tasks for user {user_id}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get onboarding tasks: {str(e)}")
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ from services.seo import SEODashboardService
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService
|
||||
from models.onboarding import SEOPageAudit, WebsiteAnalysis, OnboardingSession
|
||||
from models.onboarding import SEOPageAudit, WebsiteAnalysis, OnboardingSession, CompetitorAnalysis
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
|
||||
from sqlalchemy import desc
|
||||
@@ -75,7 +75,9 @@ class SEODashboardData(BaseModel):
|
||||
platforms: Dict[str, PlatformStatus]
|
||||
ai_insights: List[AIInsight]
|
||||
last_updated: str
|
||||
website_url: Optional[str] = None # User's website URL from onboarding
|
||||
website_url: Optional[str] = None
|
||||
advertools_insights: Optional[Dict[str, Any]] = None
|
||||
technical_seo_audit: Optional[Dict[str, Any]] = None
|
||||
|
||||
# New models for comprehensive SEO analysis
|
||||
class SEOAnalysisRequest(BaseModel):
|
||||
@@ -378,7 +380,9 @@ async def get_seo_dashboard_data(current_user: dict = Depends(get_current_user))
|
||||
platforms=_convert_platforms(overview_data.get("platforms", {})),
|
||||
ai_insights=[AIInsight(**insight) for insight in overview_data.get("ai_insights", [])],
|
||||
last_updated=overview_data.get("last_updated", datetime.now().isoformat()),
|
||||
website_url=overview_data.get("website_url")
|
||||
website_url=overview_data.get("website_url"),
|
||||
advertools_insights=overview_data.get("advertools_insights"),
|
||||
technical_seo_audit=overview_data.get("technical_seo_audit"),
|
||||
)
|
||||
finally:
|
||||
db_session.close()
|
||||
@@ -752,6 +756,391 @@ async def get_keyword_gaps(
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get keyword gaps: {str(e)}")
|
||||
|
||||
|
||||
async def get_serp_gaps(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
topics: Optional[List[str]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get SERP gap analysis — detect which competitors rank for given topics.
|
||||
|
||||
Uses Google Custom Search `site:` queries per competitor domain to detect
|
||||
ranking presence. Topics can be provided explicitly or derived from the
|
||||
user's latest SIF semantic gap analysis.
|
||||
|
||||
Args:
|
||||
topics: Optional list of topic phrases. If omitted, uses the user's
|
||||
latest SIF semantic gaps (up to 12 topics).
|
||||
|
||||
Returns:
|
||||
Dict with gaps list and metadata.
|
||||
"""
|
||||
try:
|
||||
user_id = str(current_user.get("id"))
|
||||
|
||||
# If no topics provided, fetch from SIF semantic gaps
|
||||
if not topics:
|
||||
try:
|
||||
from services.intelligence.agents.specialized import StrategyArchitectAgent
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
|
||||
integration = OnboardingDataIntegrationService()
|
||||
db_session = get_session_for_user(user_id)
|
||||
if db_session:
|
||||
try:
|
||||
integrated = integration.get_integrated_data_sync(
|
||||
user_id, db_session
|
||||
)
|
||||
competitor_indices = []
|
||||
if integrated and integrated.get("competitor_analysis"):
|
||||
competitor_indices = [
|
||||
i
|
||||
for i, _ in enumerate(
|
||||
integrated["competitor_analysis"]
|
||||
)
|
||||
]
|
||||
agent = StrategyArchitectAgent(
|
||||
TxtaiIntelligenceService(user_id), user_id
|
||||
)
|
||||
gaps = await agent.find_semantic_gaps(competitor_indices)
|
||||
topics = [g["topic"] for g in gaps[:12]]
|
||||
finally:
|
||||
db_session.close()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Could not derive topics from SIF gaps: {e}. "
|
||||
"Pass topics explicitly."
|
||||
)
|
||||
return {
|
||||
"gaps": [],
|
||||
"message": "No topics provided and unable to derive from SIF gaps.",
|
||||
}
|
||||
|
||||
if not topics:
|
||||
return {
|
||||
"gaps": [],
|
||||
"message": "No topics to analyze. Complete onboarding and SIF indexing first.",
|
||||
}
|
||||
|
||||
# Get competitor domains from onboarding
|
||||
competitor_domains = []
|
||||
db_session = get_session_for_user(user_id)
|
||||
if db_session:
|
||||
try:
|
||||
analyses = (
|
||||
db_session.query(CompetitorAnalysis)
|
||||
.join(
|
||||
OnboardingSession,
|
||||
CompetitorAnalysis.session_id == OnboardingSession.id,
|
||||
)
|
||||
.filter(OnboardingSession.user_id == user_id)
|
||||
.filter(CompetitorAnalysis.competitor_domain.isnot(None))
|
||||
.all()
|
||||
)
|
||||
competitor_domains = list(
|
||||
set(a.competitor_domain for a in analyses if a.competitor_domain)
|
||||
)
|
||||
finally:
|
||||
db_session.close()
|
||||
|
||||
if not competitor_domains:
|
||||
return {
|
||||
"gaps": [],
|
||||
"message": "No competitor domains found. Complete onboarding Step 3.",
|
||||
}
|
||||
|
||||
# Run SERP gap analysis
|
||||
from services.seo_tools.serp_gap_service import SerpGapService
|
||||
|
||||
service = SerpGapService()
|
||||
result = await service.analyze_topic_gaps(topics, competitor_domains)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get SERP gaps: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get SERP gaps: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
async def get_competitor_content(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
topics: Optional[List[str]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get competitor content deep-dive for gap topics using Exa.
|
||||
|
||||
Scopes Exa neural search to known competitor domains (from onboarding Step 3)
|
||||
and returns full text, highlights, and summaries for competitive analysis.
|
||||
|
||||
Args:
|
||||
topics: Optional list of topic phrases. If omitted, uses the user's
|
||||
latest SIF semantic gaps (up to 6 topics — Exa is paid).
|
||||
|
||||
Returns:
|
||||
Dict with per-topic competitor content results.
|
||||
"""
|
||||
try:
|
||||
user_id = str(current_user.get("id"))
|
||||
|
||||
# If no topics provided, fetch from SIF semantic gaps
|
||||
if not topics:
|
||||
try:
|
||||
from services.intelligence.agents.specialized import StrategyArchitectAgent
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
|
||||
integration = OnboardingDataIntegrationService()
|
||||
db_session = get_session_for_user(user_id)
|
||||
if db_session:
|
||||
try:
|
||||
integrated = integration.get_integrated_data_sync(
|
||||
user_id, db_session
|
||||
)
|
||||
competitor_indices = []
|
||||
if integrated and integrated.get("competitor_analysis"):
|
||||
competitor_indices = [
|
||||
i
|
||||
for i, _ in enumerate(
|
||||
integrated["competitor_analysis"]
|
||||
)
|
||||
]
|
||||
agent = StrategyArchitectAgent(
|
||||
TxtaiIntelligenceService(user_id), user_id
|
||||
)
|
||||
gaps = await agent.find_semantic_gaps(competitor_indices)
|
||||
# Fewer topics for Exa (paid API)
|
||||
topics = [g["topic"] for g in gaps[:6]]
|
||||
finally:
|
||||
db_session.close()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Could not derive topics from SIF gaps: {e}. "
|
||||
"Pass topics explicitly."
|
||||
)
|
||||
return {
|
||||
"results": [],
|
||||
"message": "No topics provided and unable to derive from SIF gaps.",
|
||||
}
|
||||
|
||||
if not topics:
|
||||
return {
|
||||
"results": [],
|
||||
"message": "No topics to analyze. Complete onboarding and SIF indexing first.",
|
||||
}
|
||||
|
||||
# Get competitor domains from onboarding
|
||||
competitor_domains = []
|
||||
db_session = get_session_for_user(user_id)
|
||||
if db_session:
|
||||
try:
|
||||
analyses = (
|
||||
db_session.query(CompetitorAnalysis)
|
||||
.join(
|
||||
OnboardingSession,
|
||||
CompetitorAnalysis.session_id == OnboardingSession.id,
|
||||
)
|
||||
.filter(OnboardingSession.user_id == user_id)
|
||||
.filter(CompetitorAnalysis.competitor_domain.isnot(None))
|
||||
.all()
|
||||
)
|
||||
competitor_domains = list(
|
||||
set(a.competitor_domain for a in analyses if a.competitor_domain)
|
||||
)
|
||||
finally:
|
||||
db_session.close()
|
||||
|
||||
if not competitor_domains:
|
||||
return {
|
||||
"results": [],
|
||||
"message": "No competitor domains found. Complete onboarding Step 3.",
|
||||
}
|
||||
|
||||
# Run Exa competitor deep-dive
|
||||
from services.seo_tools.competitor_content_service import (
|
||||
CompetitorContentService,
|
||||
)
|
||||
|
||||
service = CompetitorContentService()
|
||||
result = await service.deep_dive(topics, competitor_domains)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get competitor content: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get competitor content: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
async def get_content_gap_radar(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
bypass_cache: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run the Content Gap Radar pipeline — the full Phase 3 agent.
|
||||
|
||||
Orchestrates SIF semantic gap analysis, SERP ranking presence detection,
|
||||
Exa competitor content deep-dive, and trend momentum scoring into a
|
||||
single ROI-ranked list of content opportunities.
|
||||
|
||||
Returns scored gaps with per-topic evidence and a summary.
|
||||
"""
|
||||
try:
|
||||
user_id = str(current_user.get("id"))
|
||||
|
||||
# Fetch competitor domains + indices from onboarding data
|
||||
competitor_domains = []
|
||||
competitor_indices = []
|
||||
|
||||
db_session = get_session_for_user(user_id)
|
||||
if db_session:
|
||||
try:
|
||||
# Competitor domains
|
||||
analyses = (
|
||||
db_session.query(CompetitorAnalysis)
|
||||
.join(
|
||||
OnboardingSession,
|
||||
CompetitorAnalysis.session_id == OnboardingSession.id,
|
||||
)
|
||||
.filter(OnboardingSession.user_id == user_id)
|
||||
.filter(CompetitorAnalysis.competitor_domain.isnot(None))
|
||||
.all()
|
||||
)
|
||||
competitor_domains = list(
|
||||
set(
|
||||
a.competitor_domain
|
||||
for a in analyses
|
||||
if a.competitor_domain
|
||||
)
|
||||
)
|
||||
|
||||
# Competitor indices from integrated data
|
||||
integration = OnboardingDataIntegrationService()
|
||||
integrated = integration.get_integrated_data_sync(
|
||||
user_id, db_session
|
||||
)
|
||||
if integrated and integrated.get("competitor_analysis"):
|
||||
competitor_indices = [
|
||||
i
|
||||
for i, _ in enumerate(
|
||||
integrated["competitor_analysis"]
|
||||
)
|
||||
]
|
||||
finally:
|
||||
db_session.close()
|
||||
|
||||
if not competitor_domains:
|
||||
return {
|
||||
"gaps": [],
|
||||
"summary": {},
|
||||
"message": "No competitor domains found. Complete onboarding Step 3.",
|
||||
}
|
||||
|
||||
# Run the agent
|
||||
from services.intelligence.agents import ContentGapRadarAgent
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
|
||||
agent = ContentGapRadarAgent(
|
||||
TxtaiIntelligenceService(user_id), user_id
|
||||
)
|
||||
result = await agent.analyze(
|
||||
competitor_domains=competitor_domains,
|
||||
competitor_indices=competitor_indices,
|
||||
bypass_cache=bypass_cache,
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to run content gap radar: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to run content gap radar: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
class GenerateContentRequest(BaseModel):
|
||||
topic: str
|
||||
recommended_action: str = ""
|
||||
scoring: Optional[Dict[str, float]] = None
|
||||
serp_evidence: Optional[Dict[str, Any]] = None
|
||||
sif_gap: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
async def generate_content_from_gap(
|
||||
request: GenerateContentRequest,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a content brief from a content gap radar item and save it
|
||||
as a blog ContentAsset so the user can resume in the Blog Writer.
|
||||
"""
|
||||
try:
|
||||
user_id = str(current_user.get("id"))
|
||||
from services.intelligence.agents import ContentGapRadarAgent
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
|
||||
agent = ContentGapRadarAgent(
|
||||
TxtaiIntelligenceService(user_id), user_id
|
||||
)
|
||||
brief_result = await agent.generate_content_brief(
|
||||
topic=request.topic,
|
||||
recommended_action=request.recommended_action,
|
||||
scoring=request.scoring,
|
||||
serp_evidence=request.serp_evidence,
|
||||
sif_gap=request.sif_gap,
|
||||
)
|
||||
|
||||
# Create blog ContentAsset so user can resume in Blog Writer
|
||||
from services.content_asset_service import ContentAssetService
|
||||
from models.content_asset_models import AssetType, AssetSource
|
||||
from services.database import get_db_session
|
||||
|
||||
session = get_db_session()
|
||||
asset_id = None
|
||||
if session:
|
||||
try:
|
||||
svc = ContentAssetService(session)
|
||||
asset = svc.create_asset(
|
||||
user_id=user_id,
|
||||
asset_type=AssetType.TEXT,
|
||||
source_module=AssetSource.BLOG_WRITER,
|
||||
filename=f"gap_{int(time.time())}.md",
|
||||
file_url=f"/api/blog/content/pending",
|
||||
title=request.topic,
|
||||
description=f"Content brief from gap analysis: {request.topic}",
|
||||
tags=["content-gap", "seo-dashboard"],
|
||||
asset_metadata={
|
||||
"phase": "research",
|
||||
"research_keywords": request.topic,
|
||||
"topic": request.topic,
|
||||
"research_data": brief_result,
|
||||
"outline_data": None,
|
||||
"content_data": None,
|
||||
"seo_data": None,
|
||||
"publish_data": None,
|
||||
},
|
||||
)
|
||||
asset_id = asset.id
|
||||
logger.info(
|
||||
f"Created blog asset {asset_id} for gap topic '{request.topic}'"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to create blog asset: {e}")
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"brief": brief_result["brief"],
|
||||
"asset_id": asset_id,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate content from gap: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to generate content brief: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
async def get_onboarding_task_health(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
site_url: Optional[str] = None,
|
||||
|
||||
@@ -12,9 +12,11 @@ from pydantic import BaseModel
|
||||
import os
|
||||
import uuid
|
||||
import requests
|
||||
import time
|
||||
|
||||
from services.wix_service import WixService
|
||||
from services.integrations.wix_oauth import WixOAuthService
|
||||
from services.integrations.wix.utils import extract_meta_from_token
|
||||
from services.integrations.oauth_callback_utils import (
|
||||
build_oauth_callback_html,
|
||||
sanitize_error,
|
||||
@@ -40,25 +42,112 @@ def _get_current_user_id(current_user: dict) -> str:
|
||||
|
||||
|
||||
def _map_wix_error(exc: Exception, fallback: str = "Wix API request failed") -> HTTPException:
|
||||
"""Map Wix API exceptions to proper HTTP responses with actionable guidance."""
|
||||
import traceback
|
||||
|
||||
if isinstance(exc, HTTPException):
|
||||
return exc
|
||||
|
||||
# Try to extract meaningful error from Wix API response
|
||||
wix_error_detail = None
|
||||
wix_error_code = None
|
||||
|
||||
if hasattr(exc, 'response') and exc.response is not None:
|
||||
try:
|
||||
err_body = exc.response.json()
|
||||
if isinstance(err_body, dict):
|
||||
wix_error_detail = err_body.get('message') or err_body.get('error') or err_body.get('details')
|
||||
wix_error_code = err_body.get('code') or err_body.get('errorCode')
|
||||
except:
|
||||
wix_error_detail = exc.response.text[:300] if exc.response.text else None
|
||||
|
||||
if isinstance(exc, requests.HTTPError):
|
||||
status = exc.response.status_code if exc.response is not None else None
|
||||
msg = str(exc) if str(exc) != "" else fallback
|
||||
msg = wix_error_detail or str(exc) if str(exc) != "" else fallback
|
||||
|
||||
if status == 401:
|
||||
return HTTPException(status_code=401, detail=msg)
|
||||
return HTTPException(
|
||||
status_code=401,
|
||||
detail=f"Wix authorization failed. Please reconnect your Wix account."
|
||||
)
|
||||
if status == 403:
|
||||
return HTTPException(status_code=403, detail=msg)
|
||||
return HTTPException(status_code=502, detail=msg)
|
||||
return HTTPException(
|
||||
status_code=403,
|
||||
detail=f"Wix permission denied. Ensure your OAuth app has blog permissions (BLOG.CREATE-DRAFT)."
|
||||
)
|
||||
if status == 404:
|
||||
return HTTPException(
|
||||
status_code=502,
|
||||
detail=f"Wix API endpoint not found. The blog feature may not be enabled on this site."
|
||||
)
|
||||
if status == 429:
|
||||
return HTTPException(
|
||||
status_code=429,
|
||||
detail=f"Wix rate limit exceeded. Please wait a moment and try again."
|
||||
)
|
||||
if status == 500:
|
||||
return HTTPException(
|
||||
status_code=502,
|
||||
detail=f"Wix server error. This is usually temporary — please try again."
|
||||
)
|
||||
if status == 502 or status == 503 or status == 504:
|
||||
return HTTPException(
|
||||
status_code=502,
|
||||
detail=f"Wix service temporarily unavailable. Please try again in a moment."
|
||||
)
|
||||
return HTTPException(status_code=502, detail=msg or fallback)
|
||||
|
||||
if isinstance(exc, requests.RequestException):
|
||||
return HTTPException(status_code=502, detail=str(exc) or fallback)
|
||||
return HTTPException(status_code=500, detail=str(exc))
|
||||
return HTTPException(
|
||||
status_code=502,
|
||||
detail="Network error connecting to Wix. Please check your connection and try again."
|
||||
)
|
||||
|
||||
# Handle WixAPIError from our retry/API layer
|
||||
from services.integrations.wix.retry import WixAPIError
|
||||
if isinstance(exc, WixAPIError):
|
||||
status = exc.status_code
|
||||
msg = exc.response_body or str(exc)
|
||||
if status == 401:
|
||||
return HTTPException(
|
||||
status_code=401,
|
||||
detail="Wix authorization failed. Please reconnect your Wix account."
|
||||
)
|
||||
if status == 403:
|
||||
return HTTPException(
|
||||
status_code=403,
|
||||
detail="Wix permission denied. Ensure your OAuth app has blog permissions (BLOG.CREATE-DRAFT)."
|
||||
)
|
||||
if status == 404:
|
||||
return HTTPException(
|
||||
status_code=502,
|
||||
detail="Wix API endpoint not found. Ensure the site ID is correct and the blog feature is enabled."
|
||||
)
|
||||
if status == 429:
|
||||
return HTTPException(
|
||||
status_code=429,
|
||||
detail="Wix rate limit exceeded. Please wait a moment and try again."
|
||||
)
|
||||
if status in (500, 502, 503, 504):
|
||||
return HTTPException(
|
||||
status_code=502,
|
||||
detail="Wix service temporarily unavailable. Please try again in a moment."
|
||||
)
|
||||
return HTTPException(status_code=status or 502, detail=msg or fallback)
|
||||
|
||||
# For validation errors from blog_publisher
|
||||
error_str = str(exc)
|
||||
if "validation failed" in error_str.lower():
|
||||
return HTTPException(status_code=400, detail=error_str)
|
||||
|
||||
return HTTPException(status_code=500, detail=f"{fallback}: {error_str}")
|
||||
|
||||
|
||||
def _resolve_valid_wix_token(current_user: dict) -> Dict[str, Any]:
|
||||
user_id = _get_current_user_id(current_user)
|
||||
tokens = wix_oauth_service.get_user_tokens(user_id)
|
||||
if tokens:
|
||||
logger.info(f"Wix token resolved from DB for user {user_id[:8]}...")
|
||||
return tokens[0]
|
||||
|
||||
token_status = wix_oauth_service.get_user_token_status(user_id)
|
||||
@@ -66,14 +155,25 @@ def _resolve_valid_wix_token(current_user: dict) -> Dict[str, Any]:
|
||||
if not expired_tokens:
|
||||
raise HTTPException(status_code=401, detail="Wix account not connected")
|
||||
|
||||
MAX_REFRESH_ATTEMPTS = 3
|
||||
attempt = 0
|
||||
for candidate in expired_tokens:
|
||||
if attempt >= MAX_REFRESH_ATTEMPTS:
|
||||
logger.warning(f"Wix token refresh: reached max {MAX_REFRESH_ATTEMPTS} attempts for user {user_id[:8]}...")
|
||||
break
|
||||
refresh_token = candidate.get("refresh_token")
|
||||
token_id = candidate.get("id")
|
||||
if not refresh_token:
|
||||
continue
|
||||
attempt += 1
|
||||
if attempt > 1:
|
||||
backoff = min(2 ** (attempt - 1), 8)
|
||||
logger.info(f"Wix token refresh: attempt {attempt}/{MAX_REFRESH_ATTEMPTS}, waiting {backoff}s...")
|
||||
time.sleep(backoff)
|
||||
try:
|
||||
refreshed = wix_service.refresh_access_token(refresh_token)
|
||||
except Exception as exc:
|
||||
logger.warning(f"Wix token refresh attempt {attempt} failed: {str(exc)[:120]}")
|
||||
continue
|
||||
|
||||
wix_oauth_service.update_tokens(
|
||||
@@ -83,21 +183,34 @@ def _resolve_valid_wix_token(current_user: dict) -> Dict[str, Any]:
|
||||
expires_in=refreshed.get("expires_in"),
|
||||
token_id=token_id,
|
||||
)
|
||||
|
||||
site_id = candidate.get("site_id")
|
||||
if not site_id:
|
||||
meta_info = extract_meta_from_token(refreshed.get("access_token"))
|
||||
site_id = meta_info.get('metaSiteId') or site_id
|
||||
logger.info(f"Wix token refreshed successfully on attempt {attempt} for user {user_id[:8]}...")
|
||||
return {
|
||||
"access_token": refreshed.get("access_token"),
|
||||
"refresh_token": refreshed.get("refresh_token", refresh_token),
|
||||
"member_id": candidate.get("member_id"),
|
||||
"site_id": candidate.get("site_id"),
|
||||
"site_id": site_id,
|
||||
}
|
||||
|
||||
raise HTTPException(status_code=401, detail="Wix token expired and cannot be refreshed")
|
||||
|
||||
|
||||
class WixAuthRequest(BaseModel):
|
||||
"""Request model for Wix authentication"""
|
||||
code: str
|
||||
state: str
|
||||
"""Request model for Wix authentication.
|
||||
Supports two modes:
|
||||
1. Backend exchanges code: requires code + code_verifier
|
||||
2. Frontend already exchanged: provides access_token directly
|
||||
"""
|
||||
code: Optional[str] = None
|
||||
state: Optional[str] = None
|
||||
code_verifier: Optional[str] = None
|
||||
access_token: Optional[str] = None
|
||||
refresh_token: Optional[str] = None
|
||||
expires_in: Optional[int] = None
|
||||
token_type: Optional[str] = "Bearer"
|
||||
|
||||
|
||||
class WixPublishRequest(BaseModel):
|
||||
@@ -112,6 +225,7 @@ class WixPublishRequest(BaseModel):
|
||||
publish: bool = True
|
||||
access_token: Optional[str] = None
|
||||
member_id: Optional[str] = None
|
||||
site_id: Optional[str] = None
|
||||
seo_metadata: Optional[Dict[str, Any]] = None
|
||||
class WixCreateCategoryRequest(BaseModel):
|
||||
access_token: str
|
||||
@@ -217,39 +331,97 @@ async def handle_oauth_callback(request: WixAuthRequest, current_user: dict = De
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=400, detail="User ID not found")
|
||||
|
||||
access_token: str | None = None
|
||||
refresh_token: str | None = None
|
||||
expires_in: int | None = None
|
||||
token_type: str = "Bearer"
|
||||
site_info: dict = {}
|
||||
site_id: str | None = None
|
||||
member_id: str | None = None
|
||||
permissions: dict = {}
|
||||
|
||||
# MODE 2: Frontend already exchanged the code (preferred — avoids PKCE verifier mismatch)
|
||||
if request.access_token:
|
||||
logger.info(f"Wix callback mode=FRONTEND_TOKEN for user {user_id}")
|
||||
access_token = request.access_token
|
||||
refresh_token = request.refresh_token
|
||||
expires_in = request.expires_in
|
||||
token_type = request.token_type or "Bearer"
|
||||
|
||||
# Non-fatal enrichment
|
||||
try:
|
||||
site_info = wix_service.get_site_info(access_token)
|
||||
site_id = site_info.get('siteId') or site_info.get('site_id')
|
||||
if not site_id and site_info.get('_no_site'):
|
||||
meta_info = extract_meta_from_token(access_token)
|
||||
site_id = meta_info.get('metaSiteId')
|
||||
except Exception as e:
|
||||
logger.warning(f"get_site_info failed (non-fatal): {e}")
|
||||
try:
|
||||
member_id = wix_service.extract_member_id_from_access_token(access_token)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
permissions = wix_service.check_blog_permissions(access_token, site_id=site_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"check_blog_permissions failed (non-fatal): {e}")
|
||||
|
||||
# MODE 1: Backend exchanges code (legacy / requires correct code_verifier)
|
||||
elif request.code:
|
||||
if not request.state:
|
||||
raise HTTPException(status_code=400, detail="Missing OAuth state")
|
||||
code_verifier = request.code_verifier
|
||||
if not code_verifier:
|
||||
code_verifier = wix_oauth_service.consume_pkce_verifier(user_id=user_id, state=request.state)
|
||||
if code_verifier:
|
||||
logger.info(f"Fallback: using DB-stored code_verifier for user {user_id}")
|
||||
if not code_verifier:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Invalid or expired OAuth state. Please restart Wix connection."
|
||||
)
|
||||
# Exchange code for tokens
|
||||
logger.info(f"Wix callback mode=BACKEND_EXCHANGE for user {user_id}")
|
||||
tokens = wix_service.exchange_code_for_tokens(request.code, code_verifier=code_verifier)
|
||||
logger.info(f"Token exchange succeeded for user {user_id}")
|
||||
access_token = tokens['access_token']
|
||||
refresh_token = tokens.get('refresh_token')
|
||||
expires_in = tokens.get('expires_in')
|
||||
token_type = tokens.get('token_type', 'Bearer')
|
||||
|
||||
# Get site information to extract site_id and member_id
|
||||
site_info = wix_service.get_site_info(tokens['access_token'])
|
||||
site_id = site_info.get('siteId') or site_info.get('site_id')
|
||||
|
||||
# Extract member_id from token if possible
|
||||
member_id = None
|
||||
try:
|
||||
member_id = wix_service.extract_member_id_from_access_token(tokens['access_token'])
|
||||
site_info = wix_service.get_site_info(access_token)
|
||||
site_id = site_info.get('siteId') or site_info.get('site_id')
|
||||
if not site_id and site_info.get('_no_site'):
|
||||
meta_info = extract_meta_from_token(access_token)
|
||||
site_id = meta_info.get('metaSiteId') or site_id
|
||||
except Exception as e:
|
||||
logger.warning(f"get_site_info failed (non-fatal): {e}")
|
||||
try:
|
||||
meta_info = extract_meta_from_token(access_token)
|
||||
site_id = meta_info.get('metaSiteId') or site_id
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
member_id = wix_service.extract_member_id_from_access_token(access_token)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
permissions = wix_service.check_blog_permissions(access_token, site_id=site_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"check_blog_permissions failed (non-fatal): {e}")
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="Missing code or access_token")
|
||||
|
||||
# Check permissions
|
||||
permissions = wix_service.check_blog_permissions(tokens['access_token'])
|
||||
if not access_token:
|
||||
raise HTTPException(status_code=500, detail="No access_token available")
|
||||
|
||||
# Store tokens securely in database
|
||||
stored = wix_oauth_service.store_tokens(
|
||||
user_id=user_id,
|
||||
access_token=tokens['access_token'],
|
||||
refresh_token=tokens.get('refresh_token'),
|
||||
expires_in=tokens.get('expires_in'),
|
||||
token_type=tokens.get('token_type', 'Bearer'),
|
||||
scope=tokens.get('scope'),
|
||||
access_token=access_token,
|
||||
refresh_token=refresh_token,
|
||||
expires_in=expires_in,
|
||||
token_type=token_type,
|
||||
site_id=site_id,
|
||||
member_id=member_id
|
||||
)
|
||||
@@ -260,10 +432,10 @@ async def handle_oauth_callback(request: WixAuthRequest, current_user: dict = De
|
||||
return {
|
||||
"success": True,
|
||||
"tokens": {
|
||||
"access_token": tokens['access_token'],
|
||||
"refresh_token": tokens.get('refresh_token'),
|
||||
"expires_in": tokens.get('expires_in'),
|
||||
"token_type": tokens.get('token_type', 'Bearer')
|
||||
"access_token": access_token,
|
||||
"refresh_token": refresh_token,
|
||||
"expires_in": expires_in,
|
||||
"token_type": token_type
|
||||
},
|
||||
"site_info": site_info,
|
||||
"permissions": permissions,
|
||||
@@ -288,11 +460,25 @@ async def handle_oauth_callback_get(code: str, state: Optional[str] = None, requ
|
||||
if not code_verifier:
|
||||
raise HTTPException(status_code=400, detail="Invalid or expired OAuth state. Please reconnect Wix.")
|
||||
tokens = wix_service.exchange_code_for_tokens(code, code_verifier=code_verifier)
|
||||
|
||||
# Non-fatal: get site info and permissions
|
||||
site_info = {}
|
||||
permissions = {}
|
||||
site_id = None
|
||||
try:
|
||||
site_info = wix_service.get_site_info(tokens['access_token'])
|
||||
permissions = wix_service.check_blog_permissions(tokens['access_token'])
|
||||
site_id = site_info.get('siteId') or site_info.get('site_id')
|
||||
if not site_id and site_info.get('_no_site'):
|
||||
meta_info = extract_meta_from_token(tokens['access_token'])
|
||||
site_id = meta_info.get('metaSiteId')
|
||||
except Exception as e:
|
||||
logger.warning(f"GET callback: get_site_info non-fatal: {e}")
|
||||
try:
|
||||
permissions = wix_service.check_blog_permissions(tokens['access_token'], site_id=site_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"GET callback: check_blog_permissions non-fatal: {e}")
|
||||
|
||||
# Store tokens in database if we have user_id
|
||||
site_id = site_info.get('siteId') or site_info.get('site_id')
|
||||
member_id = None
|
||||
try:
|
||||
member_id = wix_service.extract_member_id_from_access_token(tokens['access_token'])
|
||||
@@ -359,17 +545,34 @@ async def get_connection_status(current_user: dict = Depends(get_current_user))
|
||||
try:
|
||||
token_info = _resolve_valid_wix_token(current_user)
|
||||
access_token = token_info["access_token"]
|
||||
site_id = token_info.get("site_id")
|
||||
|
||||
# Check site info — distinguish "no site" from "token expired"
|
||||
site_info = wix_service.get_site_info(access_token)
|
||||
permissions = wix_service.check_blog_permissions(access_token)
|
||||
if site_info.get("_auth_failed"):
|
||||
return {
|
||||
"connected": False,
|
||||
"has_permissions": False,
|
||||
"error": "Wix token expired — please reconnect",
|
||||
"reconnect_required": True
|
||||
}
|
||||
|
||||
# If get_site_info returned _no_site, try extracting metaSiteId from token
|
||||
if site_info.get("_no_site") and not site_id:
|
||||
meta_info = extract_meta_from_token(access_token)
|
||||
site_id = meta_info.get('metaSiteId')
|
||||
|
||||
permissions = wix_service.check_blog_permissions(access_token, site_id=site_id)
|
||||
return {
|
||||
"connected": True,
|
||||
"has_permissions": permissions.get("has_permissions", False),
|
||||
"site_info": site_info,
|
||||
"permissions": permissions
|
||||
"permissions": permissions,
|
||||
"site_id": site_id,
|
||||
}
|
||||
except HTTPException as e:
|
||||
if e.status_code == 401:
|
||||
return {"connected": False, "has_permissions": False, "error": "Wix account not connected"}
|
||||
return {"connected": False, "has_permissions": False, "error": "Wix account not connected", "reconnect_required": True}
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check connection status: {e}")
|
||||
@@ -406,13 +609,21 @@ async def publish_to_wix(request: WixPublishRequest, current_user: dict = Depend
|
||||
access_token unless they want to override the stored one.
|
||||
"""
|
||||
try:
|
||||
site_id = request.site_id
|
||||
if request.access_token:
|
||||
from services.integrations.wix.utils import normalize_token_string
|
||||
access_token = normalize_token_string(request.access_token)
|
||||
logger.info(f"Wix publish: using frontend-fallback token for user {_get_current_user_id(current_user)[:8]}...")
|
||||
else:
|
||||
try:
|
||||
token_info = _resolve_valid_wix_token(current_user)
|
||||
access_token = token_info["access_token"]
|
||||
if not site_id:
|
||||
site_id = token_info.get("site_id")
|
||||
if not site_id:
|
||||
meta_info = extract_meta_from_token(access_token)
|
||||
site_id = meta_info.get('metaSiteId')
|
||||
logger.info(f"Wix publish: using backend DB token for user {_get_current_user_id(current_user)[:8]}...")
|
||||
except HTTPException:
|
||||
access_token = None
|
||||
|
||||
@@ -422,19 +633,41 @@ async def publish_to_wix(request: WixPublishRequest, current_user: dict = Depend
|
||||
"error": "Wix account not connected. Connect your Wix account first.",
|
||||
}
|
||||
|
||||
if not request.content or not request.content.strip():
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Content cannot be empty. Please write your blog post before publishing.",
|
||||
}
|
||||
|
||||
content_length = len(request.content.strip())
|
||||
if content_length > 50000:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Content is {content_length // 1000}K characters — maximum is 50K. Please shorten your content.",
|
||||
}
|
||||
|
||||
content_warning = None
|
||||
if content_length > 30000:
|
||||
content_warning = f"Content is {content_length // 1000}K characters. Very long posts may take longer to publish on Wix."
|
||||
logger.warning(f"Wix publish: large content ({content_length} chars) for user {_get_current_user_id(current_user)[:8]}...")
|
||||
|
||||
member_id = request.member_id
|
||||
if not member_id:
|
||||
member_id = wix_service.extract_member_id_from_access_token(access_token)
|
||||
if not member_id:
|
||||
try:
|
||||
member_info = wix_service.get_current_member(access_token)
|
||||
if member_info and isinstance(member_info, dict):
|
||||
member_id = (member_info.get("member") or {}).get("id") or member_info.get("id")
|
||||
except Exception as e:
|
||||
logger.warning(f"Wix: could not resolve member ID from token: {e}")
|
||||
if not member_id:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Unable to resolve Wix member ID. Please reconnect your Wix account.",
|
||||
}
|
||||
|
||||
# Resolve categories: accept IDs or names (looked up/created)
|
||||
# Resolve categories/tags: precedence is top-level params > seo_metadata fallback
|
||||
category_ids = request.category_ids or request.category_names
|
||||
tag_ids = request.tag_ids or request.tag_names
|
||||
|
||||
@@ -445,6 +678,9 @@ async def publish_to_wix(request: WixPublishRequest, current_user: dict = Depend
|
||||
if not tag_ids and seo_metadata.get("blog_tags"):
|
||||
tag_ids = seo_metadata.get("blog_tags")
|
||||
|
||||
if seo_metadata.get("url_slug"):
|
||||
logger.info(f"Wix publish: using SEO url_slug for post slug: {seo_metadata.get('url_slug')[:50]}")
|
||||
|
||||
# Ensure category_ids and tag_ids are lists of strings (not ints)
|
||||
if category_ids:
|
||||
category_ids = [str(c) for c in category_ids if c is not None]
|
||||
@@ -461,6 +697,7 @@ async def publish_to_wix(request: WixPublishRequest, current_user: dict = Depend
|
||||
publish=request.publish,
|
||||
member_id=member_id,
|
||||
seo_metadata=seo_metadata,
|
||||
site_id=site_id,
|
||||
)
|
||||
post = result.get("draftPost") or result.get("post") or result
|
||||
raw_url = post.get("url")
|
||||
@@ -470,11 +707,14 @@ async def publish_to_wix(request: WixPublishRequest, current_user: dict = Depend
|
||||
post_url = raw_url
|
||||
else:
|
||||
post_url = None
|
||||
publish_warnings = result.get("_warnings", [])
|
||||
all_warnings = [w for w in [content_warning] + publish_warnings if w]
|
||||
return {
|
||||
"success": True,
|
||||
"post_id": str(post.get("id", "")),
|
||||
"url": post_url,
|
||||
"publish_state": "PUBLISHED" if request.publish else "DRAFT"
|
||||
"publish_state": "PUBLISHED" if request.publish else "DRAFT",
|
||||
**({"warning": " | ".join(all_warnings)} if all_warnings else {}),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to publish to Wix: {e}")
|
||||
@@ -758,11 +998,13 @@ async def test_publish_real(payload: Dict[str, Any], _: Dict[str, Any] = Depends
|
||||
seo_metadata=seo_metadata,
|
||||
)
|
||||
|
||||
publish_warnings = result.get("_warnings", [])
|
||||
return {
|
||||
"success": True,
|
||||
"post_id": (result.get("draftPost") or result.get("post") or {}).get("id"),
|
||||
"url": (result.get("draftPost") or result.get("post") or {}).get("url"),
|
||||
"message": "Blog post published to Wix",
|
||||
**({"warning": " | ".join(publish_warnings)} if publish_warnings else {}),
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
|
||||
@@ -167,10 +167,10 @@ class SceneVideoRenderResponse(BaseModel):
|
||||
|
||||
class CombineVideosRequest(BaseModel):
|
||||
"""Request model for combining multiple scene videos."""
|
||||
video_urls: List[str] = Field(..., description="List of scene video URLs to combine in order")
|
||||
scene_video_urls: List[str] = Field(..., description="List of scene video URLs to combine in order")
|
||||
video_plan: Optional[Dict[str, Any]] = Field(None, description="Original video plan (for metadata)")
|
||||
resolution: str = Field("720p", pattern="^(480p|720p|1080p)$", description="Target resolution for output")
|
||||
title: Optional[str] = Field(None, description="Optional title for the final video")
|
||||
title: Optional[str] = Field(None, description="Optional title for the combined video")
|
||||
|
||||
|
||||
class CombineVideosResponse(BaseModel):
|
||||
@@ -187,13 +187,6 @@ class VideoListResponse(BaseModel):
|
||||
message: str = "Videos fetched successfully"
|
||||
|
||||
|
||||
class CombineVideosRequest(BaseModel):
|
||||
"""Request model for combining multiple scene videos."""
|
||||
scene_video_urls: List[str] = Field(..., description="List of scene video URLs to combine")
|
||||
resolution: str = Field("720p", pattern="^(480p|720p|1080p)$", description="Output video resolution")
|
||||
title: Optional[str] = Field(None, description="Optional title for the combined video")
|
||||
|
||||
|
||||
class VideoRenderResponse(BaseModel):
|
||||
"""Response model for video rendering."""
|
||||
success: bool
|
||||
@@ -721,85 +714,6 @@ async def get_render_status(
|
||||
)
|
||||
|
||||
|
||||
@router.post("/render/combine", response_model=VideoRenderResponse)
|
||||
async def combine_videos(
|
||||
request: CombineVideosRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
) -> VideoRenderResponse:
|
||||
"""
|
||||
Combine multiple scene videos into a final video.
|
||||
Returns task_id for polling.
|
||||
"""
|
||||
try:
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
# Subscription validation
|
||||
pricing_service = PricingService(db)
|
||||
validate_scene_animation_operation(
|
||||
pricing_service=pricing_service,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
if not request.scene_video_urls or len(request.scene_video_urls) < 2:
|
||||
return VideoRenderResponse(
|
||||
success=False,
|
||||
message="At least two scene videos are required to combine."
|
||||
)
|
||||
|
||||
task_id = task_manager.create_task("youtube_combine_video")
|
||||
logger.info(
|
||||
f"[YouTubeAPI] Created combine task {task_id} for user {user_id}, videos={len(request.scene_video_urls)}, resolution={request.resolution}"
|
||||
)
|
||||
|
||||
initial_status = task_manager.get_task_status(task_id)
|
||||
if not initial_status:
|
||||
logger.error(f"[YouTubeAPI] Failed to create combine task {task_id} - task not found immediately after creation")
|
||||
return VideoRenderResponse(
|
||||
success=False,
|
||||
message="Failed to create combine task. Please try again."
|
||||
)
|
||||
|
||||
try:
|
||||
background_tasks.add_task(
|
||||
_execute_combine_video_task,
|
||||
task_id=task_id,
|
||||
scene_video_urls=request.scene_video_urls,
|
||||
user_id=user_id,
|
||||
resolution=request.resolution,
|
||||
title=request.title,
|
||||
)
|
||||
logger.info(f"[YouTubeAPI] Background combine task added for {task_id}")
|
||||
except Exception as bg_error:
|
||||
logger.error(f"[YouTubeAPI] Failed to add combine background task for {task_id}: {bg_error}", exc_info=True)
|
||||
task_manager.update_task_status(
|
||||
task_id,
|
||||
"failed",
|
||||
error=str(bg_error),
|
||||
message="Failed to start combine task"
|
||||
)
|
||||
return VideoRenderResponse(
|
||||
success=False,
|
||||
message=f"Failed to start combine task: {str(bg_error)}"
|
||||
)
|
||||
|
||||
return VideoRenderResponse(
|
||||
success=True,
|
||||
task_id=task_id,
|
||||
message="Video combination started."
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTubeAPI] Error starting combine: {e}", exc_info=True)
|
||||
return VideoRenderResponse(
|
||||
success=False,
|
||||
message=f"Failed to start combine: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
def _execute_video_render_task(
|
||||
task_id: str,
|
||||
scenes: List[Dict[str, Any]],
|
||||
@@ -1270,20 +1184,21 @@ async def combine_scene_videos(
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
if not request.video_urls or len(request.video_urls) < 2:
|
||||
if not request.scene_video_urls or len(request.scene_video_urls) < 2:
|
||||
return CombineVideosResponse(
|
||||
success=False,
|
||||
task_id=None,
|
||||
message="At least two videos are required to combine."
|
||||
message="At least two scene videos are required to combine."
|
||||
)
|
||||
|
||||
# Pre-validate that referenced video files exist and are within youtube_videos dir
|
||||
user_workspace = UserWorkspaceManager(db)
|
||||
workspace_info = user_workspace.get_user_workspace(user_id)
|
||||
youtube_video_dir = Path(workspace_info['workspace_path']) / "content" / "videos" if workspace_info and workspace_info.get('workspace_path') else YOUTUBE_VIDEO_DIR
|
||||
base_dir = Path(__file__).parent.parent.parent.parent
|
||||
youtube_video_dir = base_dir / "youtube_videos"
|
||||
legacy_video_dir = base_dir / "youtube_videos"
|
||||
missing_files = []
|
||||
for url in request.video_urls:
|
||||
filename = Path(url).name # strips query params if present
|
||||
video_path = youtube_video_dir / filename
|
||||
for url in request.scene_video_urls:
|
||||
filename = Path(url).name
|
||||
# prevent directory traversal
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
return CombineVideosResponse(
|
||||
@@ -1291,7 +1206,12 @@ async def combine_scene_videos(
|
||||
task_id=None,
|
||||
message=f"Invalid video filename: {filename}"
|
||||
)
|
||||
video_path = youtube_video_dir / filename
|
||||
if not video_path.exists():
|
||||
legacy_path = legacy_video_dir / filename
|
||||
if legacy_path.exists():
|
||||
video_path = legacy_path
|
||||
else:
|
||||
missing_files.append(filename)
|
||||
if missing_files:
|
||||
return CombineVideosResponse(
|
||||
@@ -1303,7 +1223,7 @@ async def combine_scene_videos(
|
||||
# Create task
|
||||
task_id = task_manager.create_task("youtube_video_combine")
|
||||
logger.info(
|
||||
f"[YouTubeAPI] Created combine task {task_id} for user {user_id}, videos={len(request.video_urls)}, resolution={request.resolution}"
|
||||
f"[YouTubeAPI] Created combine task {task_id} for user {user_id}, videos={len(request.scene_video_urls)}, resolution={request.resolution}"
|
||||
)
|
||||
|
||||
initial_status = task_manager.get_task_status(task_id)
|
||||
@@ -1320,7 +1240,7 @@ async def combine_scene_videos(
|
||||
background_tasks.add_task(
|
||||
_execute_combine_video_task,
|
||||
task_id=task_id,
|
||||
scene_video_urls=request.video_urls,
|
||||
scene_video_urls=request.scene_video_urls,
|
||||
user_id=user_id,
|
||||
resolution=request.resolution,
|
||||
title=request.title,
|
||||
@@ -1343,7 +1263,7 @@ async def combine_scene_videos(
|
||||
return CombineVideosResponse(
|
||||
success=True,
|
||||
task_id=task_id,
|
||||
message=f"Combining {len(request.video_urls)} videos...",
|
||||
message=f"Combining {len(request.scene_video_urls)} videos...",
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
"""
|
||||
Task Manager for YouTube Creator Studio
|
||||
|
||||
Reuses the Story Writer task manager pattern for async video rendering.
|
||||
Delegates to the hybrid DB-backed + in-memory YouTubeTaskManager.
|
||||
Maintains backward compatibility with the Story Writer TaskManager API.
|
||||
"""
|
||||
|
||||
from api.story_writer.task_manager import TaskManager
|
||||
|
||||
# Shared task manager instance
|
||||
task_manager = TaskManager()
|
||||
from services.youtube.youtube_task_manager import task_manager
|
||||
|
||||
__all__ = ["task_manager"]
|
||||
@@ -126,19 +126,14 @@ seo_tools_router = None
|
||||
if _is_full_mode():
|
||||
from routers.seo_tools import router as seo_tools_router
|
||||
|
||||
# Skip Facebook Writer, LinkedIn, and other non-essential routes in feature-only modes
|
||||
# Also skip other heavy services that trigger PersonaAnalysisService initialization
|
||||
# Skip heavy services in feature-only modes (PersonaAnalysisService, etc.)
|
||||
if _is_full_mode():
|
||||
from api.facebook_writer.routers import facebook_router
|
||||
from routers.linkedin import router as linkedin_router
|
||||
from api.linkedin_image_generation import router as linkedin_image_router
|
||||
from api.brainstorm import router as brainstorm_router
|
||||
from api.images import router as images_router
|
||||
from api.assets_serving import router as assets_serving_router
|
||||
from routers.image_studio import router as image_studio_router
|
||||
from routers.product_marketing import router as product_marketing_router
|
||||
from routers.campaign_creator import router as campaign_creator_router
|
||||
from routers.backlink_outreach import router as backlink_outreach_router
|
||||
else:
|
||||
# In feature-only modes, only load essential assets router
|
||||
from api.assets_serving import router as assets_serving_router
|
||||
@@ -147,7 +142,6 @@ else:
|
||||
image_studio_router = None
|
||||
product_marketing_router = None
|
||||
campaign_creator_router = None
|
||||
backlink_outreach_router = None
|
||||
|
||||
# Import hallucination detector router
|
||||
try:
|
||||
@@ -683,8 +677,6 @@ if _is_full_mode():
|
||||
app.include_router(product_marketing_router)
|
||||
if campaign_creator_router:
|
||||
app.include_router(campaign_creator_router)
|
||||
if backlink_outreach_router:
|
||||
app.include_router(backlink_outreach_router)
|
||||
|
||||
router_group_status["platform_extensions"] = {
|
||||
"mounted": True,
|
||||
@@ -799,12 +791,31 @@ async def startup_event():
|
||||
else:
|
||||
logger.info(f"[FEATURE-MODE] Skipping scheduler startup (features: {enabled_features})")
|
||||
|
||||
# Check Wix API key configuration
|
||||
# Recover stale YouTube tasks on startup
|
||||
if _is_feature_enabled("youtube"):
|
||||
try:
|
||||
from api.youtube.task_manager import task_manager
|
||||
from services.database import get_all_user_ids
|
||||
user_ids = get_all_user_ids()
|
||||
recovered = 0
|
||||
for uid in user_ids:
|
||||
try:
|
||||
count = task_manager.recover_stale_tasks(uid)
|
||||
recovered += count
|
||||
except Exception:
|
||||
pass
|
||||
if recovered > 0:
|
||||
logger.info(f"[STARTUP] Recovered {recovered} stale YouTube tasks across {len(user_ids)} users")
|
||||
except Exception as e:
|
||||
logger.warning(f"[STARTUP] YouTube task recovery skipped: {e}")
|
||||
|
||||
# Check Wix configuration (OAuth-based, API key optional)
|
||||
wix_api_key = os.getenv('WIX_API_KEY')
|
||||
if wix_api_key:
|
||||
logger.warning(f"✅ WIX_API_KEY loaded ({len(wix_api_key)} chars, starts with '{wix_api_key[:10]}...')")
|
||||
else:
|
||||
logger.warning("⚠️ WIX_API_KEY not found in environment - Wix publishing may fail")
|
||||
logger.info(f"WIX_API_KEY loaded ({len(wix_api_key)} chars)")
|
||||
wix_client_id = os.getenv('WIX_CLIENT_ID')
|
||||
if not wix_client_id:
|
||||
logger.warning("⚠️ WIX_CLIENT_ID not found in environment - Wix OAuth connection will fail")
|
||||
|
||||
elapsed = time.time() - startup_start
|
||||
logger.info(f"ALwrity backend started successfully in {elapsed:.1f}s")
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 525 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 200 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 307 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 377 KiB |
@@ -13,7 +13,7 @@ builtins.Union = typing.Union
|
||||
from models.onboarding import APIKey, WebsiteAnalysis, ResearchPreferences, PersonaData, CompetitorAnalysis
|
||||
|
||||
|
||||
from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks
|
||||
from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks, Query
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
@@ -137,6 +137,11 @@ from api.seo_dashboard import (
|
||||
get_sif_indexing_health,
|
||||
get_guardian_audit,
|
||||
get_keyword_gaps,
|
||||
get_serp_gaps,
|
||||
get_competitor_content,
|
||||
get_content_gap_radar,
|
||||
generate_content_from_gap,
|
||||
GenerateContentRequest,
|
||||
)
|
||||
|
||||
# Initialize FastAPI app
|
||||
@@ -391,6 +396,64 @@ async def keyword_gaps_endpoint(
|
||||
return await get_keyword_gaps(current_user, site_url)
|
||||
|
||||
|
||||
@app.get("/api/seo-dashboard/serp-gaps")
|
||||
async def serp_gaps_endpoint(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
topics: Optional[List[str]] = None,
|
||||
):
|
||||
"""
|
||||
Get SERP gap analysis — detect which competitors rank for given topics.
|
||||
|
||||
Uses Google Custom Search `site:` queries per competitor domain to detect
|
||||
ranking presence. If no topics are provided, derives them from the user's
|
||||
latest SIF semantic gap analysis (up to 12 topics).
|
||||
"""
|
||||
return await get_serp_gaps(current_user, topics)
|
||||
|
||||
|
||||
@app.get("/api/seo-dashboard/competitor-content")
|
||||
async def competitor_content_endpoint(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
topics: Optional[List[str]] = None,
|
||||
):
|
||||
"""
|
||||
Get competitor content deep-dive for gap topics using Exa.
|
||||
|
||||
Scopes Exa neural search to known competitor domains and returns
|
||||
full text, highlights, and summaries for competitive analysis.
|
||||
If no topics provided, derives up to 6 from the latest SIF semantic gaps.
|
||||
"""
|
||||
return await get_competitor_content(current_user, topics)
|
||||
|
||||
|
||||
@app.get("/api/seo-dashboard/content-gap-radar")
|
||||
async def content_gap_radar_endpoint(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
bypass_cache: bool = Query(False, description="Bypass 24h cache"),
|
||||
):
|
||||
"""
|
||||
Run the Content Gap Radar pipeline — full Phase 3 agent.
|
||||
|
||||
Orchestrates SIF semantic gap analysis, SERP ranking presence (Google CSE),
|
||||
competitor content deep-dive (Exa), and trend momentum scoring into a single
|
||||
ROI-ranked list of content opportunities.
|
||||
"""
|
||||
return await get_content_gap_radar(current_user, bypass_cache=bypass_cache)
|
||||
|
||||
|
||||
@app.post("/api/seo-dashboard/content-gap-radar/generate-content")
|
||||
async def generate_content_from_gap_endpoint(
|
||||
request: GenerateContentRequest,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
Generate a content brief from a content gap radar item and save it
|
||||
as a blog ContentAsset. Navigate to /blog-writer with the returned
|
||||
asset_id to resume in the full Blog Writer workflow.
|
||||
"""
|
||||
return await generate_content_from_gap(request, current_user)
|
||||
|
||||
|
||||
# Comprehensive SEO Analysis endpoints
|
||||
@app.post("/api/seo-dashboard/analyze-comprehensive")
|
||||
async def analyze_seo_comprehensive_endpoint(request: SEOAnalysisRequest):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Authentication middleware for ALwrity backend."""
|
||||
|
||||
import os
|
||||
import base64
|
||||
import inspect
|
||||
from typing import Optional, Dict, Any
|
||||
from fastapi import HTTPException, Depends, status, Request, Query
|
||||
@@ -50,6 +51,7 @@ class ClerkAuthMiddleware:
|
||||
# Cache for PyJWKClient to avoid repeated JWKS fetches
|
||||
self._jwks_client_cache = {}
|
||||
self._jwks_url_cache = None
|
||||
self._issuer_cache = None # Pre-configured Clerk issuer for iss validation
|
||||
|
||||
if not self.clerk_secret_key and not self.disable_auth:
|
||||
logger.warning("CLERK_SECRET_KEY not found, authentication may fail")
|
||||
@@ -58,14 +60,26 @@ class ClerkAuthMiddleware:
|
||||
if CLERK_AUTH_AVAILABLE and not self.disable_auth:
|
||||
try:
|
||||
if self.clerk_secret_key and self.clerk_publishable_key:
|
||||
# Extract instance from publishable key for JWKS URL
|
||||
# Extract instance from publishable key for JWKS URL and issuer validation
|
||||
# Format: pk_test_<instance>.<domain> or pk_live_<instance>.<domain>
|
||||
# Production keys may have base64-encoded instance IDs
|
||||
parts = self.clerk_publishable_key.replace('pk_test_', '').replace('pk_live_', '').split('.')
|
||||
if len(parts) >= 1:
|
||||
# Extract the domain from publishable key or use default
|
||||
# Clerk URLs are typically: https://<instance>.clerk.accounts.dev
|
||||
instance = parts[0]
|
||||
jwks_url = f"https://{instance}.clerk.accounts.dev/.well-known/jwks.json"
|
||||
# Attempt base64 decode (production Clerk keys encode the instance)
|
||||
raw_instance = parts[0]
|
||||
try:
|
||||
padded = raw_instance + '=' * (4 - len(raw_instance) % 4) if len(raw_instance) % 4 else raw_instance
|
||||
decoded_bytes = base64.b64decode(padded)
|
||||
instance = decoded_bytes.decode('utf-8').rstrip('\x00 $\n\r\t')
|
||||
except Exception:
|
||||
instance = raw_instance
|
||||
|
||||
# If decoded value contains a dot, it's already a full domain path
|
||||
if '.' in instance:
|
||||
issuer_url = f"https://{instance}"
|
||||
else:
|
||||
issuer_url = f"https://{instance}.clerk.accounts.dev"
|
||||
jwks_url = f"{issuer_url}/.well-known/jwks.json"
|
||||
|
||||
# Create Clerk configuration with JWKS URL
|
||||
clerk_config = ClerkConfig(
|
||||
@@ -76,6 +90,7 @@ class ClerkAuthMiddleware:
|
||||
self.clerk_bearer = ClerkHTTPBearer(clerk_config)
|
||||
logger.info(f"fastapi-clerk-auth initialized successfully with JWKS URL: {jwks_url}")
|
||||
self._jwks_url_cache = jwks_url
|
||||
self._issuer_cache = issuer_url # Pin issuer for VULN-001 fix
|
||||
else:
|
||||
logger.warning("Could not extract instance from publishable key")
|
||||
self.clerk_bearer = None
|
||||
@@ -118,19 +133,29 @@ class ClerkAuthMiddleware:
|
||||
import jwt
|
||||
from jwt import PyJWKClient
|
||||
|
||||
# Get the JWKS URL from the token header
|
||||
# Get the unverified header for key ID lookup
|
||||
unverified_header = jwt.get_unverified_header(token)
|
||||
|
||||
# Decode token to get issuer for JWKS URL
|
||||
# --- SECURITY FIX (VULN-001): Validate issuer before any JWKS fetch ---
|
||||
# Pre-configured issuer and JWKS URL derived from CLERK_PUBLISHABLE_KEY
|
||||
# NEVER use the token's 'iss' claim to construct the JWKS URL (GHSA-426f-p74m-73fv)
|
||||
expected_issuer = self._issuer_cache
|
||||
jwks_url = self._jwks_url_cache
|
||||
if not expected_issuer or not jwks_url:
|
||||
raise Exception("Clerk issuer/JWKS URL not configured at startup")
|
||||
|
||||
# Decode token to validate the issuer claim against the pre-configured value
|
||||
# WARNING: We must first validate 'iss' before trusting anything else
|
||||
unverified_claims = jwt.decode(token, options={"verify_signature": False})
|
||||
issuer = unverified_claims.get('iss', '')
|
||||
token_issuer = unverified_claims.get('iss', '')
|
||||
if token_issuer != expected_issuer:
|
||||
logger.error(
|
||||
f"Issuer mismatch: token claims '{token_issuer}' "
|
||||
f"but expected '{expected_issuer}'"
|
||||
)
|
||||
return None
|
||||
|
||||
# Construct JWKS URL from issuer
|
||||
jwks_url = f"{issuer}/.well-known/jwks.json" if issuer else self._jwks_url_cache or ""
|
||||
if not jwks_url:
|
||||
raise Exception("Unable to resolve JWKS URL for Clerk verification")
|
||||
|
||||
# Use cached PyJWKClient to avoid repeated JWKS fetches
|
||||
# Use cached PyJWKClient with pinned jwks_url (never derived from token)
|
||||
if jwks_url not in self._jwks_client_cache:
|
||||
logger.info(f"Creating new PyJWKClient for {jwks_url} with caching enabled")
|
||||
# Create client with caching enabled (cache_keys=True keeps keys in memory)
|
||||
@@ -145,11 +170,13 @@ class ClerkAuthMiddleware:
|
||||
|
||||
# Verify and decode the token with clock skew tolerance
|
||||
# Add 300 seconds (5 minutes) leeway to handle clock skew and token refresh delays
|
||||
# SECURITY: Always pass issuer= to verify the token's 'iss' matches expected (VULN-001)
|
||||
decoded_token = jwt.decode(
|
||||
token,
|
||||
signing_key.key,
|
||||
algorithms=["RS256"],
|
||||
options={"verify_signature": True, "verify_exp": True},
|
||||
issuer=expected_issuer,
|
||||
options={"verify_signature": True, "verify_exp": True, "verify_iss": True},
|
||||
leeway=300 # Allow 5 minutes leeway for token refresh during navigation
|
||||
)
|
||||
|
||||
@@ -273,7 +300,7 @@ async def get_current_user(
|
||||
user_agent = request.headers.get('user-agent', 'unknown')
|
||||
|
||||
if hasattr(request.headers, 'items'):
|
||||
all_headers = {k: v[:50] if len(v) > 50 else v for k, v in request.headers.items()}
|
||||
all_headers = {k: (v[:50] if len(v) > 50 else v) for k, v in request.headers.items() if k.lower() != 'authorization'}
|
||||
except:
|
||||
pass
|
||||
|
||||
@@ -285,7 +312,6 @@ async def get_current_user(
|
||||
f"🔒 AUTHENTICATION ERROR: No credentials provided for authenticated endpoint: {endpoint_path} "
|
||||
f"(client_ip={request.client.host if request.client else 'unknown'}, "
|
||||
f"auth_header_received={'YES' if auth_header else 'NO'}, "
|
||||
f"auth_header_value={auth_header[:50] + '...' if auth_header and len(auth_header) > 50 else (auth_header or 'None')}, "
|
||||
f"all_headers={list(all_headers.keys())}, "
|
||||
f"user_agent={user_agent})"
|
||||
)
|
||||
|
||||
@@ -46,6 +46,7 @@ class OutreachAttempt(Base):
|
||||
decision_reason = Column(Text, nullable=True)
|
||||
sent_at = Column(DateTime, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, index=True)
|
||||
message_id = Column(String(255), nullable=True, index=True)
|
||||
|
||||
|
||||
class OutreachReply(Base):
|
||||
|
||||
@@ -220,6 +220,8 @@ class BlogSectionRequest(BaseModel):
|
||||
tone: Optional[str] = None
|
||||
persona: Optional[PersonaInfo] = None
|
||||
mode: Optional[str] = "polished" # 'draft' | 'polished'
|
||||
research: Optional[BlogResearchResponse] = None
|
||||
competitive_advantage: Optional[str] = None
|
||||
|
||||
|
||||
class BlogSectionResponse(BaseModel):
|
||||
|
||||
@@ -36,6 +36,7 @@ class SearchEngine(str, Enum):
|
||||
METAPHOR = "metaphor"
|
||||
GOOGLE = "google"
|
||||
TAVILY = "tavily"
|
||||
EXA = "exa"
|
||||
|
||||
|
||||
class GroundingLevel(str, Enum):
|
||||
@@ -57,7 +58,7 @@ class LinkedInPostRequest(BaseModel):
|
||||
include_hashtags: bool = Field(default=True, description="Whether to include hashtags")
|
||||
include_call_to_action: bool = Field(default=True, description="Whether to include call to action")
|
||||
research_enabled: bool = Field(default=True, description="Whether to include research-backed content")
|
||||
search_engine: SearchEngine = Field(default=SearchEngine.GOOGLE, description="Search engine for research")
|
||||
search_engine: SearchEngine = Field(default=SearchEngine.EXA, description="Search engine for research")
|
||||
max_length: int = Field(default=3000, description="Maximum character count", ge=100, le=3000)
|
||||
grounding_level: GroundingLevel = Field(default=GroundingLevel.ENHANCED, description="Level of content grounding")
|
||||
include_citations: bool = Field(default=True, description="Whether to include inline citations")
|
||||
@@ -94,7 +95,7 @@ class LinkedInArticleRequest(BaseModel):
|
||||
include_images: bool = Field(default=True, description="Whether to generate image suggestions")
|
||||
seo_optimization: bool = Field(default=True, description="Whether to include SEO optimization")
|
||||
research_enabled: bool = Field(default=True, description="Whether to include research-backed content")
|
||||
search_engine: SearchEngine = Field(default=SearchEngine.GOOGLE, description="Search engine for research")
|
||||
search_engine: SearchEngine = Field(default=SearchEngine.EXA, description="Search engine for research")
|
||||
word_count: int = Field(default=1500, description="Target word count", ge=500, le=5000)
|
||||
grounding_level: GroundingLevel = Field(default=GroundingLevel.ENHANCED, description="Level of content grounding")
|
||||
include_citations: bool = Field(default=True, description="Whether to include inline citations")
|
||||
@@ -129,9 +130,11 @@ class LinkedInCarouselRequest(BaseModel):
|
||||
number_of_slides: int = Field(default=5, description="Number of slides", ge=3, le=10)
|
||||
include_cover_slide: bool = Field(default=True, description="Whether to include a cover slide")
|
||||
include_cta_slide: bool = Field(default=True, description="Whether to include a call-to-action slide")
|
||||
key_points: Optional[List[str]] = Field(None, description="Specific key points to cover", max_items=10)
|
||||
research_enabled: bool = Field(default=True, description="Whether to include research-backed content")
|
||||
search_engine: SearchEngine = Field(default=SearchEngine.GOOGLE, description="Search engine for research")
|
||||
search_engine: SearchEngine = Field(default=SearchEngine.EXA, description="Search engine for research")
|
||||
grounding_level: GroundingLevel = Field(default=GroundingLevel.ENHANCED, description="Level of content grounding")
|
||||
color_scheme: str = Field(default="professional", description="Color scheme for PDF rendering: professional, creative, industry, dark, minimal")
|
||||
include_citations: bool = Field(default=True, description="Whether to include inline citations")
|
||||
|
||||
class Config:
|
||||
@@ -144,9 +147,11 @@ class LinkedInCarouselRequest(BaseModel):
|
||||
"number_of_slides": 6,
|
||||
"include_cover_slide": True,
|
||||
"include_cta_slide": True,
|
||||
"key_points": ["Remote collaboration tools", "Work-life balance", "Productivity metrics"],
|
||||
"research_enabled": True,
|
||||
"search_engine": "google",
|
||||
"grounding_level": "enhanced",
|
||||
"color_scheme": "professional",
|
||||
"include_citations": True
|
||||
}
|
||||
}
|
||||
@@ -161,8 +166,9 @@ class LinkedInVideoScriptRequest(BaseModel):
|
||||
video_duration: int = Field(default=60, description="Target video duration in seconds", ge=30, le=300)
|
||||
include_captions: bool = Field(default=True, description="Whether to include captions")
|
||||
include_thumbnail_suggestions: bool = Field(default=True, description="Whether to include thumbnail suggestions")
|
||||
key_points: Optional[List[str]] = Field(None, description="Specific key points to cover in the video", max_items=10)
|
||||
research_enabled: bool = Field(default=True, description="Whether to include research-backed content")
|
||||
search_engine: SearchEngine = Field(default=SearchEngine.GOOGLE, description="Search engine for research")
|
||||
search_engine: SearchEngine = Field(default=SearchEngine.EXA, description="Search engine for research")
|
||||
grounding_level: GroundingLevel = Field(default=GroundingLevel.ENHANCED, description="Level of content grounding")
|
||||
include_citations: bool = Field(default=True, description="Whether to include inline citations")
|
||||
|
||||
@@ -176,6 +182,7 @@ class LinkedInVideoScriptRequest(BaseModel):
|
||||
"video_duration": 90,
|
||||
"include_captions": True,
|
||||
"include_thumbnail_suggestions": True,
|
||||
"key_points": ["Zero trust architecture", "Phishing prevention", "Incident response"],
|
||||
"research_enabled": True,
|
||||
"search_engine": "google",
|
||||
"grounding_level": "enhanced",
|
||||
@@ -193,7 +200,7 @@ class LinkedInCommentResponseRequest(BaseModel):
|
||||
response_length: str = Field(default="medium", description="Length of response: short, medium, long")
|
||||
include_questions: bool = Field(default=True, description="Whether to include engaging questions")
|
||||
research_enabled: bool = Field(default=False, description="Whether to include research-backed content")
|
||||
search_engine: SearchEngine = Field(default=SearchEngine.GOOGLE, description="Search engine for research")
|
||||
search_engine: SearchEngine = Field(default=SearchEngine.EXA, description="Search engine for research")
|
||||
grounding_level: GroundingLevel = Field(default=GroundingLevel.BASIC, description="Level of content grounding")
|
||||
|
||||
class Config:
|
||||
@@ -452,3 +459,23 @@ class LinkedInCommentResponseResult(BaseModel):
|
||||
generation_metadata: Dict[str, Any] = {}
|
||||
error: Optional[str] = None
|
||||
grounding_status: Optional[Dict[str, Any]] = Field(None, description="Grounding operation status")
|
||||
|
||||
|
||||
class LinkedInEditContentRequest(BaseModel):
|
||||
"""Request model for AI-powered LinkedIn content editing."""
|
||||
content: str = Field(..., description="Content to edit", min_length=1)
|
||||
edit_type: str = Field(..., description="Type of edit: professionalize, optimize_engagement, add_hashtags, adjust_tone, expand, condense, add_cta")
|
||||
industry: Optional[str] = Field(None, description="Industry context for the edit")
|
||||
tone: Optional[str] = Field(None, description="Target tone: professional, conversational, authoritative, educational, friendly")
|
||||
target_audience: Optional[str] = Field(None, description="Target audience for the content")
|
||||
parameters: Optional[Dict[str, Any]] = Field(None, description="Additional parameters specific to edit type")
|
||||
|
||||
|
||||
class LinkedInEditContentResponse(BaseModel):
|
||||
"""Response model for AI-powered LinkedIn content editing."""
|
||||
success: bool = True
|
||||
content: Optional[str] = None
|
||||
edit_type: str
|
||||
provider: Optional[str] = None
|
||||
model: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
@@ -318,7 +318,7 @@ class SIFIndexingTask(Base):
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
|
||||
user_id = Column(String(255), nullable=False, index=True)
|
||||
website_url = Column(String(500), nullable=False, index=True)
|
||||
website_url = Column(String(500), nullable=True, index=True)
|
||||
|
||||
status = Column(String(50), default='active', index=True)
|
||||
|
||||
@@ -331,7 +331,7 @@ class SIFIndexingTask(Base):
|
||||
failure_pattern = Column(JSON, nullable=True)
|
||||
|
||||
next_execution = Column(DateTime, nullable=True, index=True)
|
||||
frequency_hours = Column(Integer, default=48) # Default 48 hours
|
||||
frequency_hours = Column(Integer, default=48)
|
||||
|
||||
payload = Column(JSON, nullable=True)
|
||||
|
||||
@@ -346,6 +346,7 @@ class SIFIndexingTask(Base):
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_sif_indexing_tasks_user_site', 'user_id', 'website_url'),
|
||||
Index('idx_sif_indexing_tasks_user_only', 'user_id'),
|
||||
Index('idx_sif_indexing_tasks_next_execution', 'next_execution'),
|
||||
Index('idx_sif_indexing_tasks_status', 'status'),
|
||||
)
|
||||
@@ -387,7 +388,7 @@ class MarketTrendsTask(Base):
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
|
||||
user_id = Column(String(255), nullable=False, index=True)
|
||||
website_url = Column(String(500), nullable=False, index=True)
|
||||
website_url = Column(String(500), nullable=True, index=True)
|
||||
|
||||
status = Column(String(50), default="active", index=True)
|
||||
|
||||
@@ -415,6 +416,7 @@ class MarketTrendsTask(Base):
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_market_trends_tasks_user_site", "user_id", "website_url"),
|
||||
Index("idx_market_trends_tasks_user_only", "user_id"),
|
||||
Index("idx_market_trends_tasks_next_execution", "next_execution"),
|
||||
Index("idx_market_trends_tasks_status", "status"),
|
||||
)
|
||||
|
||||
63
backend/models/youtube_task_models.py
Normal file
63
backend/models/youtube_task_models.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""
|
||||
YouTube Video Task Models
|
||||
|
||||
Database models for persistent tracking of YouTube video render,
|
||||
combine, and publish tasks. Replaces the in-memory dict approach
|
||||
so tasks survive server restarts.
|
||||
"""
|
||||
|
||||
import enum
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import Column, Integer, String, DateTime, JSON, Text, Float, Enum, Index
|
||||
from models.subscription_models import Base
|
||||
|
||||
|
||||
class YouTubeTaskType(enum.Enum):
|
||||
RENDER = "render"
|
||||
SCENE_RENDER = "scene_render"
|
||||
COMBINE = "combine"
|
||||
PUBLISH = "publish"
|
||||
IMAGE_GENERATION = "image_generation"
|
||||
AUDIO_GENERATION = "audio_generation"
|
||||
|
||||
|
||||
class YouTubeTaskStatus(enum.Enum):
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class YouTubeVideoTask(Base):
|
||||
"""
|
||||
Persistent task tracking for YouTube Creator operations.
|
||||
|
||||
Stores task state in PostgreSQL so that in-progress renders,
|
||||
combines, and publishes survive server restarts. The frontend
|
||||
can resume polling after a restart and recover results.
|
||||
"""
|
||||
__tablename__ = "youtube_video_tasks"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
task_id = Column(String(36), unique=True, nullable=False, index=True)
|
||||
user_id = Column(String(255), nullable=False, index=True)
|
||||
|
||||
task_type = Column(Enum(YouTubeTaskType), nullable=False, default=YouTubeTaskType.RENDER)
|
||||
status = Column(Enum(YouTubeTaskStatus), nullable=False, default=YouTubeTaskStatus.PENDING)
|
||||
|
||||
progress = Column(Float, default=0.0)
|
||||
message = Column(String(500), nullable=True)
|
||||
|
||||
request_data = Column(JSON, nullable=True)
|
||||
result = Column(JSON, nullable=True)
|
||||
error = Column(Text, nullable=True)
|
||||
|
||||
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), nullable=False)
|
||||
updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc), nullable=False)
|
||||
completed_at = Column(DateTime, nullable=True)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_youtube_task_user_status', 'user_id', 'status'),
|
||||
Index('idx_youtube_task_user_type', 'user_id', 'task_type'),
|
||||
Index('idx_youtube_task_created', 'created_at'),
|
||||
)
|
||||
74
backend/requirements-linkedin.txt
Normal file
74
backend/requirements-linkedin.txt
Normal file
@@ -0,0 +1,74 @@
|
||||
# =====================================================
|
||||
# ALwrity LinkedIn-Only Requirements
|
||||
# Lean subset for linkedin-only demo mode
|
||||
# =====================================================
|
||||
|
||||
# Core Web Server
|
||||
fastapi>=0.115.14
|
||||
starlette>=0.40.0,<0.47.0
|
||||
sse-starlette<3.0.0
|
||||
uvicorn>=0.24.0
|
||||
uvicorn[standard]>=0.24.0
|
||||
gunicorn>=21.0.0
|
||||
|
||||
# Server utilities
|
||||
python-multipart>=0.0.6
|
||||
python-dotenv>=1.0.0
|
||||
loguru>=0.7.2
|
||||
tenacity>=8.2.3
|
||||
pydantic>=2.5.2,<3.0.0
|
||||
typing-extensions>=4.8.0
|
||||
setuptools>=65.0.0
|
||||
|
||||
# Auth & Database
|
||||
fastapi-clerk-auth>=0.0.7
|
||||
PyJWT>=2.8.0
|
||||
cryptography>=41.0.0
|
||||
sqlalchemy>=2.0.25
|
||||
|
||||
# Payment
|
||||
stripe>=8.0.0
|
||||
|
||||
# HTTP clients
|
||||
httpx>=0.28.1
|
||||
aiohttp>=3.9.0
|
||||
requests>=2.31.0
|
||||
|
||||
# AI - needed for content generation and image prompts
|
||||
openai>=1.3.0
|
||||
google-genai>=1.0.0
|
||||
exa-py==1.9.1
|
||||
|
||||
# Text processing
|
||||
markdown>=3.5.0
|
||||
beautifulsoup4>=4.12.0
|
||||
|
||||
# Data processing
|
||||
numpy>=1.24.0
|
||||
pandas>=2.0.0
|
||||
|
||||
# Image processing - needed for LinkedIn image generation/editing
|
||||
Pillow>=10.0.0
|
||||
|
||||
# Testing
|
||||
pytest>=7.4.0
|
||||
pytest-asyncio>=0.21.0
|
||||
|
||||
# Task scheduling - needed for content calendar
|
||||
apscheduler>=3.10.0
|
||||
|
||||
# Utilities
|
||||
redis>=5.0.0
|
||||
schedule>=1.2.0
|
||||
aiofiles>=23.2.0
|
||||
psutil>=5.9.0
|
||||
|
||||
# Google APIs
|
||||
google-api-python-client>=2.100.0
|
||||
google-auth>=2.23.0
|
||||
google-auth-oauthlib>=1.0.0
|
||||
|
||||
# Other utilities
|
||||
python-dateutil>=2.8.0
|
||||
jinja2>=3.1.0
|
||||
pydantic-settings>=2.0.0
|
||||
@@ -12,6 +12,8 @@ tenacity>=8.2.3
|
||||
pydantic>=2.5.2,<3.0.0
|
||||
typing-extensions>=4.8.0
|
||||
|
||||
reportlab-4.5.1
|
||||
|
||||
# Auth
|
||||
PyJWT>=2.8.0
|
||||
cryptography>=41.0.0
|
||||
|
||||
@@ -22,7 +22,10 @@ from services.backlink_outreach_models import (
|
||||
SuppressionAddRequest,
|
||||
)
|
||||
from services.backlink_outreach_service import backlink_outreach_service
|
||||
from services.backlink_outreach_storage import BacklinkOutreachStorageService
|
||||
from services.backlink_outreach_storage import (
|
||||
BacklinkCampaignNotFoundError,
|
||||
BacklinkOutreachStorageService,
|
||||
)
|
||||
from services.backlink_outreach_sender import backlink_outreach_sender
|
||||
from services.backlink_outreach_reply_monitor import backlink_outreach_reply_monitor
|
||||
from services.backlink_outreach_template_generator import (
|
||||
@@ -68,7 +71,7 @@ async def discover_backlink_opportunities(
|
||||
payload: BacklinkKeywordInput,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
return backlink_outreach_service.discover_opportunities(payload.keyword, payload.max_results)
|
||||
return await backlink_outreach_service.discover_opportunities_async(payload.keyword, payload.max_results)
|
||||
|
||||
|
||||
@router.get("/migration-coverage")
|
||||
@@ -84,12 +87,25 @@ async def get_backlink_migration_coverage(
|
||||
async def discover_deep_backlink_opportunities(
|
||||
payload: DeepKeywordInput,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
scrape_timeout_seconds: float = Query(15.0, ge=1.0, le=60.0),
|
||||
scrape_max_concurrency: int = Query(5, ge=1, le=20),
|
||||
):
|
||||
"""Enhanced discovery using Exa neural search + DuckDuckGo with full-page scraping."""
|
||||
user_id = _resolve_user_id(current_user)
|
||||
result = await backlink_outreach_service.deep_discover(payload.keyword, payload.max_results)
|
||||
storage = None
|
||||
if payload.campaign_id:
|
||||
storage = BacklinkOutreachStorageService()
|
||||
if not storage.get_campaign(payload.campaign_id, user_id):
|
||||
raise HTTPException(status_code=404, detail="Campaign not found")
|
||||
|
||||
result = await backlink_outreach_service.deep_discover(
|
||||
payload.keyword,
|
||||
payload.max_results,
|
||||
user_id=user_id,
|
||||
scrape_timeout_seconds=scrape_timeout_seconds,
|
||||
scrape_max_concurrency=scrape_max_concurrency,
|
||||
)
|
||||
if payload.campaign_id:
|
||||
saved = 0
|
||||
save_failed = 0
|
||||
for opp in result.get("opportunities", []):
|
||||
@@ -183,7 +199,9 @@ async def add_campaign_lead(
|
||||
notes=payload.notes,
|
||||
)
|
||||
return lead
|
||||
except Exception as e:
|
||||
except BacklinkCampaignNotFoundError:
|
||||
raise HTTPException(status_code=404, detail="Campaign not found")
|
||||
except Exception:
|
||||
raise HTTPException(status_code=500, detail="Failed to add lead")
|
||||
|
||||
|
||||
@@ -192,18 +210,48 @@ async def bulk_update_lead_status(
|
||||
payload: BulkStatusUpdateRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
):
|
||||
"""Bulk update lead statuses."""
|
||||
"""Bulk update lead statuses for leads owned by the current user."""
|
||||
user_id = _resolve_user_id(current_user)
|
||||
storage = BacklinkOutreachStorageService()
|
||||
access_issues = storage.get_lead_access_issues(
|
||||
payload.lead_ids, user_id, campaign_id=payload.campaign_id
|
||||
)
|
||||
if access_issues["unauthorized"]:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail={
|
||||
"message": "One or more leads do not belong to the current user",
|
||||
"lead_ids": access_issues["unauthorized"],
|
||||
},
|
||||
)
|
||||
if access_issues["missing"]:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={
|
||||
"message": "One or more leads were not found",
|
||||
"lead_ids": access_issues["missing"],
|
||||
},
|
||||
)
|
||||
|
||||
updated = 0
|
||||
failed: list[str] = []
|
||||
for lid in payload.lead_ids:
|
||||
try:
|
||||
lead = storage.update_lead_status(lid, user_id, payload.status, payload.notes)
|
||||
lead = storage.update_lead_status(
|
||||
lid,
|
||||
user_id,
|
||||
payload.status,
|
||||
payload.notes,
|
||||
campaign_id=payload.campaign_id,
|
||||
)
|
||||
if lead:
|
||||
updated += 1
|
||||
else:
|
||||
failed.append(lid)
|
||||
except PermissionError:
|
||||
raise HTTPException(
|
||||
status_code=403, detail="Lead does not belong to the current user"
|
||||
)
|
||||
except Exception:
|
||||
failed.append(lid)
|
||||
return BulkStatusUpdateResponse(updated=updated, failed=failed)
|
||||
@@ -218,7 +266,18 @@ async def update_lead_status(
|
||||
"""Update lead status (discovered -> contacted -> replied -> placed)."""
|
||||
user_id = _resolve_user_id(current_user)
|
||||
storage = BacklinkOutreachStorageService()
|
||||
lead = storage.update_lead_status(lead_id, user_id, payload.status, payload.notes)
|
||||
try:
|
||||
lead = storage.update_lead_status(
|
||||
lead_id,
|
||||
user_id,
|
||||
payload.status,
|
||||
payload.notes,
|
||||
campaign_id=payload.campaign_id,
|
||||
)
|
||||
except PermissionError:
|
||||
raise HTTPException(
|
||||
status_code=403, detail="Lead does not belong to the current user"
|
||||
)
|
||||
if not lead:
|
||||
raise HTTPException(status_code=404, detail="Lead not found")
|
||||
return lead
|
||||
@@ -260,42 +319,95 @@ async def send_outreach(
|
||||
subject = backlink_outreach_sender.personalize(tmpl.get("subject_template", subject), variables)
|
||||
body = backlink_outreach_sender.personalize(tmpl.get("body_template", body), variables)
|
||||
|
||||
sender_validation = backlink_outreach_sender.validate_sender_alias(payload.sender_email)
|
||||
if not sender_validation.authorized:
|
||||
return SendOutreachResponse(
|
||||
attempt_id="",
|
||||
status="failed",
|
||||
policy_allowed=False,
|
||||
policy_reasons=sender_validation.failure_reasons,
|
||||
effective_sender_email=sender_validation.effective_sender_email or None,
|
||||
)
|
||||
|
||||
try:
|
||||
result = backlink_outreach_service.send_outreach(
|
||||
SendOutreachRequest(
|
||||
lead_id=payload.lead_id,
|
||||
campaign_id=payload.campaign_id,
|
||||
user_id=user_id,
|
||||
workspace_id=payload.workspace_id,
|
||||
sender_email=payload.sender_email,
|
||||
sender_email=sender_validation.effective_sender_email,
|
||||
subject=subject,
|
||||
body=body,
|
||||
idempotency_key=payload.idempotency_key,
|
||||
sender_identity=payload.sender_identity,
|
||||
legal_basis=payload.legal_basis,
|
||||
contact_discovery_source=payload.contact_discovery_source,
|
||||
recipient_region=payload.recipient_region,
|
||||
recipient_region_source=payload.recipient_region_source,
|
||||
consent_status=payload.consent_status,
|
||||
approved_by_human=payload.approved_by_human,
|
||||
unsubscribe_url=payload.unsubscribe_url,
|
||||
one_click_unsubscribe=payload.one_click_unsubscribe,
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
existing = storage.get_attempt_by_idempotency_key(payload.idempotency_key, user_id=user_id)
|
||||
if existing:
|
||||
result = backlink_outreach_service.response_from_attempt(existing, duplicate=True)
|
||||
if sender_validation.effective_sender_email:
|
||||
result.effective_sender_email = sender_validation.effective_sender_email
|
||||
return result
|
||||
raise HTTPException(status_code=409, detail="Unable to reserve idempotency key")
|
||||
|
||||
result.effective_sender_email = sender_validation.effective_sender_email
|
||||
|
||||
lead_email = ""
|
||||
if result.attempt_id:
|
||||
if result.attempt_id and result.status == "approved" and not result.duplicate:
|
||||
lead = storage.get_lead(payload.lead_id, user_id=user_id)
|
||||
lead_email = (lead.get("email") or "") if lead else ""
|
||||
|
||||
if result.policy_allowed and lead_email:
|
||||
sent = await backlink_outreach_sender.send_email(
|
||||
if result.status == "approved" and result.policy_allowed and not result.duplicate and lead_email:
|
||||
domain = lead_email.split("@")[-1] if "@" in lead_email else "unknown"
|
||||
|
||||
user_within_cap, _ = storage.try_increment_user_send_counter(user_id)
|
||||
domain_within_cap, _ = storage.try_increment_domain_send_counter(domain, user_id=user_id)
|
||||
if not (user_within_cap and domain_within_cap):
|
||||
reasons = []
|
||||
if not user_within_cap:
|
||||
reasons.append("user_daily_cap_exceeded")
|
||||
if not domain_within_cap:
|
||||
reasons.append("domain_daily_cap_exceeded")
|
||||
reason_str = f"rate_limit_hit; retry_policy={backlink_outreach_service.SMTP_RETRY_POLICY}"
|
||||
storage.update_attempt_status(result.attempt_id, "blocked", decision_reason=reason_str, user_id=user_id)
|
||||
result.status = "blocked"
|
||||
result.policy_reasons = reasons
|
||||
else:
|
||||
send_result = await backlink_outreach_sender.send_email(
|
||||
to_email=lead_email,
|
||||
subject=subject,
|
||||
body=body,
|
||||
from_email=payload.sender_email,
|
||||
)
|
||||
status = "sent" if sent else "failed"
|
||||
storage.update_attempt_status(result.attempt_id, status, user_id=user_id)
|
||||
result.status = status
|
||||
if sent:
|
||||
if send_result.success:
|
||||
storage.update_attempt_status(result.attempt_id, "sent", user_id=user_id)
|
||||
result.status = "sent"
|
||||
result.effective_sender_email = send_result.effective_sender_email or result.effective_sender_email
|
||||
if send_result.message_id:
|
||||
storage.update_attempt_message_id(result.attempt_id, send_result.message_id, user_id=user_id)
|
||||
storage.mark_idempotency(payload.idempotency_key, user_id)
|
||||
storage.increment_user_send_counter(user_id)
|
||||
domain = lead_email.split("@")[-1] if "@" in lead_email else "unknown"
|
||||
storage.increment_domain_send_counter(domain, user_id=user_id)
|
||||
elif result.policy_allowed and not lead_email:
|
||||
storage.update_attempt_status(result.attempt_id, "failed", user_id=user_id)
|
||||
else:
|
||||
reason = f"smtp_send_failed; retry_policy={backlink_outreach_service.SMTP_RETRY_POLICY}"
|
||||
storage.update_attempt_status(result.attempt_id, "failed", decision_reason=reason, user_id=user_id)
|
||||
result.status = "failed"
|
||||
result.policy_reasons = ["smtp_send_failed"]
|
||||
result.retry_policy = backlink_outreach_service.SMTP_RETRY_POLICY
|
||||
elif result.status == "approved" and result.policy_allowed and not result.duplicate and not lead_email:
|
||||
reason = f"lead_has_no_email; retry_policy={backlink_outreach_service.SMTP_RETRY_POLICY}"
|
||||
storage.update_attempt_status(result.attempt_id, "failed", decision_reason=reason, user_id=user_id)
|
||||
result.status = "failed"
|
||||
result.policy_reasons = (result.policy_reasons or []) + ["lead_has_no_email"]
|
||||
result.retry_policy = backlink_outreach_service.SMTP_RETRY_POLICY
|
||||
|
||||
return result
|
||||
|
||||
@@ -350,7 +462,18 @@ async def poll_replies(
|
||||
if storage.reply_exists(from_email, subject, user_id=user_id):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
attempt_id = ""
|
||||
in_reply_to = raw.get("in_reply_to", "")
|
||||
references = raw.get("references", "")
|
||||
if in_reply_to:
|
||||
attempt_id = storage.find_attempt_by_message_id(in_reply_to, user_id=user_id) or ""
|
||||
if not attempt_id and references:
|
||||
mid = references.split()[-1]
|
||||
attempt_id = storage.find_attempt_by_message_id(mid, user_id=user_id) or ""
|
||||
if not attempt_id:
|
||||
attempt_id = storage.find_attempt_by_from_email(from_email, user_id=user_id) or ""
|
||||
|
||||
reply = storage.add_reply(
|
||||
attempt_id=attempt_id,
|
||||
from_email=from_email,
|
||||
|
||||
@@ -7,9 +7,10 @@ proper error handling, monitoring, and documentation.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.responses import JSONResponse, FileResponse
|
||||
from typing import Dict, Any, Optional
|
||||
import time
|
||||
import json
|
||||
from loguru import logger
|
||||
from pathlib import Path
|
||||
|
||||
@@ -17,11 +18,17 @@ from models.linkedin_models import (
|
||||
LinkedInPostRequest, LinkedInArticleRequest, LinkedInCarouselRequest,
|
||||
LinkedInVideoScriptRequest, LinkedInCommentResponseRequest,
|
||||
LinkedInPostResponse, LinkedInArticleResponse, LinkedInCarouselResponse,
|
||||
LinkedInVideoScriptResponse, LinkedInCommentResponseResult
|
||||
LinkedInVideoScriptResponse, LinkedInCommentResponseResult,
|
||||
LinkedInEditContentRequest, LinkedInEditContentResponse
|
||||
)
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from services.linkedin_service import LinkedInService
|
||||
from services.linkedin.carousel import LinkedInCarouselPDFRenderer
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from utils.text_asset_tracker import save_and_track_text_content
|
||||
from models.api_monitoring import APIRequest
|
||||
from sqlalchemy import func
|
||||
from collections import defaultdict
|
||||
|
||||
# Initialize the LinkedIn service instance
|
||||
linkedin_service = LinkedInService()
|
||||
@@ -29,6 +36,34 @@ from services.subscription.monitoring_middleware import DatabaseAPIMonitor
|
||||
from services.database import get_db as get_db_dependency
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
# Simple in-memory rate limiter: {user_id: [timestamp, ...]}
|
||||
_rate_limit_store: Dict[str, list] = defaultdict(list)
|
||||
RATE_LIMIT_MAX_REQUESTS = 30
|
||||
RATE_LIMIT_WINDOW = 60 # seconds
|
||||
|
||||
def check_rate_limit(user_id: str) -> Optional[int]:
|
||||
"""Returns retry-after seconds if rate limited, None otherwise."""
|
||||
now = time.time()
|
||||
window_start = now - RATE_LIMIT_WINDOW
|
||||
timestamps = _rate_limit_store[user_id]
|
||||
# Prune old entries
|
||||
_rate_limit_store[user_id] = [t for t in timestamps if t > window_start]
|
||||
if len(_rate_limit_store[user_id]) >= RATE_LIMIT_MAX_REQUESTS:
|
||||
return int(_rate_limit_store[user_id][0] + RATE_LIMIT_WINDOW - now)
|
||||
_rate_limit_store[user_id].append(now)
|
||||
return None
|
||||
|
||||
ERROR_CODES = {
|
||||
'VALIDATION': 'LINKEDIN_ERR_001',
|
||||
'GENERATION_FAILED': 'LINKEDIN_ERR_002',
|
||||
'RATE_LIMITED': 'LINKEDIN_ERR_003',
|
||||
'SAVE_FAILED': 'LINKEDIN_ERR_004',
|
||||
'NOT_FOUND': 'LINKEDIN_ERR_404',
|
||||
}
|
||||
|
||||
def error_response(code: str, message: str) -> dict:
|
||||
return {"code": code, "message": message}
|
||||
|
||||
# Initialize router
|
||||
router = APIRouter(
|
||||
prefix="/api/linkedin",
|
||||
@@ -112,10 +147,10 @@ async def generate_post(
|
||||
|
||||
# Validate request
|
||||
if not request.topic.strip():
|
||||
raise HTTPException(status_code=422, detail="Topic cannot be empty")
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Topic cannot be empty"))
|
||||
|
||||
if not request.industry.strip():
|
||||
raise HTTPException(status_code=422, detail="Industry cannot be empty")
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Industry cannot be empty"))
|
||||
|
||||
# Extract user_id
|
||||
user_id = None
|
||||
@@ -124,22 +159,30 @@ async def generate_post(
|
||||
if not user_id:
|
||||
user_id = http_request.headers.get("X-User-ID") or http_request.headers.get("Authorization")
|
||||
|
||||
# Rate limit check
|
||||
retry_after = check_rate_limit(user_id or 'anonymous')
|
||||
if retry_after:
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail=error_response(ERROR_CODES['RATE_LIMITED'], f"Rate limit exceeded. Retry after {retry_after} seconds."),
|
||||
headers={"Retry-After": str(retry_after)}
|
||||
)
|
||||
|
||||
# Generate post content
|
||||
response = await linkedin_service.generate_linkedin_post(request)
|
||||
|
||||
if not response.success:
|
||||
raise HTTPException(status_code=500, detail=error_response(ERROR_CODES['GENERATION_FAILED'], response.error or "Post generation failed"))
|
||||
|
||||
# Log successful request
|
||||
duration = time.time() - start_time
|
||||
background_tasks.add_task(
|
||||
log_api_request, http_request, db, duration, 200
|
||||
)
|
||||
|
||||
if not response.success:
|
||||
raise HTTPException(status_code=500, detail=response.error)
|
||||
|
||||
# Save and track text content (non-blocking)
|
||||
# Save and track text content
|
||||
if user_id and response.data and response.data.content:
|
||||
try:
|
||||
# Combine all text content
|
||||
text_content = response.data.content
|
||||
if response.data.call_to_action:
|
||||
text_content += f"\n\nCall to Action: {response.data.call_to_action}"
|
||||
@@ -166,7 +209,7 @@ async def generate_post(
|
||||
subdirectory="posts"
|
||||
)
|
||||
except Exception as track_error:
|
||||
logger.warning(f"Failed to track LinkedIn post asset: {track_error}")
|
||||
logger.error(f"Failed to track LinkedIn post asset: {track_error}")
|
||||
|
||||
logger.info(f"Successfully generated LinkedIn post in {duration:.2f} seconds")
|
||||
return response
|
||||
@@ -177,14 +220,13 @@ async def generate_post(
|
||||
duration = time.time() - start_time
|
||||
logger.error(f"Error generating LinkedIn post: {str(e)}")
|
||||
|
||||
# Log failed request
|
||||
background_tasks.add_task(
|
||||
log_api_request, http_request, db, duration, 500
|
||||
)
|
||||
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to generate LinkedIn post: {str(e)}"
|
||||
detail=error_response(ERROR_CODES['GENERATION_FAILED'], f"Failed to generate LinkedIn post: {str(e)}")
|
||||
)
|
||||
|
||||
|
||||
@@ -222,10 +264,10 @@ async def generate_article(
|
||||
|
||||
# Validate request
|
||||
if not request.topic.strip():
|
||||
raise HTTPException(status_code=422, detail="Topic cannot be empty")
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Topic cannot be empty"))
|
||||
|
||||
if not request.industry.strip():
|
||||
raise HTTPException(status_code=422, detail="Industry cannot be empty")
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Industry cannot be empty"))
|
||||
|
||||
# Extract user_id
|
||||
user_id = None
|
||||
@@ -234,17 +276,16 @@ async def generate_article(
|
||||
if not user_id:
|
||||
user_id = http_request.headers.get("X-User-ID") or http_request.headers.get("Authorization")
|
||||
|
||||
# Rate limit check
|
||||
retry_after = check_rate_limit(user_id or 'anonymous')
|
||||
if retry_after:
|
||||
raise HTTPException(status_code=429, detail=error_response(ERROR_CODES['RATE_LIMITED'], f"Rate limit exceeded. Retry after {retry_after} seconds."), headers={"Retry-After": str(retry_after)})
|
||||
|
||||
# Generate article content
|
||||
response = await linkedin_service.generate_linkedin_article(request)
|
||||
|
||||
# Log successful request
|
||||
duration = time.time() - start_time
|
||||
background_tasks.add_task(
|
||||
log_api_request, http_request, db, duration, 200
|
||||
)
|
||||
|
||||
if not response.success:
|
||||
raise HTTPException(status_code=500, detail=response.error)
|
||||
raise HTTPException(status_code=500, detail=error_response(ERROR_CODES['GENERATION_FAILED'], response.error or "Article generation failed"))
|
||||
|
||||
# Save and track text content (non-blocking)
|
||||
if user_id and response.data:
|
||||
@@ -282,7 +323,7 @@ async def generate_article(
|
||||
file_extension=".md"
|
||||
)
|
||||
except Exception as track_error:
|
||||
logger.warning(f"Failed to track LinkedIn article asset: {track_error}")
|
||||
logger.error(f"Failed to track LinkedIn article asset: {track_error}")
|
||||
|
||||
logger.info(f"Successfully generated LinkedIn article in {duration:.2f} seconds")
|
||||
return response
|
||||
@@ -300,7 +341,7 @@ async def generate_article(
|
||||
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to generate LinkedIn article: {str(e)}"
|
||||
detail=error_response(ERROR_CODES['GENERATION_FAILED'], f"Failed to generate LinkedIn article: {str(e)}")
|
||||
)
|
||||
|
||||
|
||||
@@ -337,13 +378,13 @@ async def generate_carousel(
|
||||
|
||||
# Validate request
|
||||
if not request.topic.strip():
|
||||
raise HTTPException(status_code=422, detail="Topic cannot be empty")
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Topic cannot be empty"))
|
||||
|
||||
if not request.industry.strip():
|
||||
raise HTTPException(status_code=422, detail="Industry cannot be empty")
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Industry cannot be empty"))
|
||||
|
||||
if request.slide_count < 3 or request.slide_count > 15:
|
||||
raise HTTPException(status_code=422, detail="Slide count must be between 3 and 15")
|
||||
if request.number_of_slides < 3 or request.number_of_slides > 15:
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Number of slides must be between 3 and 15"))
|
||||
|
||||
# Extract user_id
|
||||
user_id = None
|
||||
@@ -352,18 +393,23 @@ async def generate_carousel(
|
||||
if not user_id:
|
||||
user_id = http_request.headers.get("X-User-ID") or http_request.headers.get("Authorization")
|
||||
|
||||
# Rate limit check
|
||||
retry_after = check_rate_limit(user_id or 'anonymous')
|
||||
if retry_after:
|
||||
raise HTTPException(status_code=429, detail=error_response(ERROR_CODES['RATE_LIMITED'], f"Rate limit exceeded. Retry after {retry_after} seconds."), headers={"Retry-After": str(retry_after)})
|
||||
|
||||
# Generate carousel content
|
||||
response = await linkedin_service.generate_linkedin_carousel(request)
|
||||
|
||||
if not response.success:
|
||||
raise HTTPException(status_code=500, detail=error_response(ERROR_CODES['GENERATION_FAILED'], response.error or "Carousel generation failed"))
|
||||
|
||||
# Log successful request
|
||||
duration = time.time() - start_time
|
||||
background_tasks.add_task(
|
||||
log_api_request, http_request, db, duration, 200
|
||||
)
|
||||
|
||||
if not response.success:
|
||||
raise HTTPException(status_code=500, detail=response.error)
|
||||
|
||||
# Save and track text content (non-blocking)
|
||||
if user_id and response.data:
|
||||
try:
|
||||
@@ -381,10 +427,10 @@ async def generate_carousel(
|
||||
source_module="linkedin_writer",
|
||||
title=f"LinkedIn Carousel: {response.data.title[:80] if response.data.title else request.topic[:80]}",
|
||||
description=f"LinkedIn carousel for {request.industry} industry",
|
||||
prompt=f"Topic: {request.topic}\nIndustry: {request.industry}\nSlides: {getattr(request, 'number_of_slides', request.slide_count if hasattr(request, 'slide_count') else 5)}",
|
||||
prompt=f"Topic: {request.topic}\nIndustry: {request.industry}\nSlides: {request.number_of_slides}",
|
||||
tags=["linkedin", "carousel", request.industry.lower().replace(' ', '_')],
|
||||
asset_metadata={
|
||||
"slide_count": len(response.data.slides),
|
||||
"number_of_slides": len(response.data.slides),
|
||||
"has_cover": response.data.cover_slide is not None,
|
||||
"has_cta": response.data.cta_slide is not None
|
||||
},
|
||||
@@ -392,7 +438,7 @@ async def generate_carousel(
|
||||
file_extension=".md"
|
||||
)
|
||||
except Exception as track_error:
|
||||
logger.warning(f"Failed to track LinkedIn carousel asset: {track_error}")
|
||||
logger.error(f"Failed to track LinkedIn carousel asset: {track_error}")
|
||||
|
||||
logger.info(f"Successfully generated LinkedIn carousel in {duration:.2f} seconds")
|
||||
return response
|
||||
@@ -410,10 +456,82 @@ async def generate_carousel(
|
||||
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to generate LinkedIn carousel: {str(e)}"
|
||||
detail=error_response(ERROR_CODES['GENERATION_FAILED'], f"Failed to generate LinkedIn carousel: {str(e)}")
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/generate-carousel-pdf",
|
||||
summary="Render Carousel as PDF",
|
||||
description="""
|
||||
Render previously generated LinkedIn carousel content as a PDF document.
|
||||
|
||||
Takes carousel content (slides with title, content, visual_elements) and
|
||||
renders them into visually appealing slide images composed into a PDF
|
||||
ready for LinkedIn upload (1.91:1 aspect ratio, max 300 slides, max 100MB).
|
||||
"""
|
||||
)
|
||||
async def generate_carousel_pdf(
|
||||
request: LinkedInCarouselRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
http_request: Request,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Optional[Dict[str, Any]] = Depends(get_current_user)
|
||||
):
|
||||
"""Generate carousel content and render as PDF."""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
user_id = None
|
||||
if current_user:
|
||||
user_id = str(current_user.get('id', '') or current_user.get('sub', ''))
|
||||
if not user_id:
|
||||
user_id = http_request.headers.get("X-User-ID") or http_request.headers.get("Authorization")
|
||||
|
||||
# First generate carousel content
|
||||
content_result = await linkedin_service.generate_linkedin_carousel(request)
|
||||
|
||||
if not content_result.success or not content_result.data:
|
||||
raise HTTPException(status_code=500, detail=content_result.error or "Carousel generation failed")
|
||||
|
||||
carousel_data = content_result.data.model_dump()
|
||||
|
||||
# Then render to PDF
|
||||
renderer = LinkedInCarouselPDFRenderer()
|
||||
pdf_result = await renderer.render_carousel_to_pdf(
|
||||
carousel_data=carousel_data,
|
||||
color_scheme=request.color_scheme,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
if not pdf_result.get('success'):
|
||||
raise HTTPException(status_code=500, detail=pdf_result.get('error', 'PDF rendering failed'))
|
||||
|
||||
duration = time.time() - start_time
|
||||
background_tasks.add_task(log_api_request, http_request, db, duration, 200)
|
||||
|
||||
pdf_path = pdf_result.get('pdf_path')
|
||||
if pdf_path:
|
||||
return FileResponse(
|
||||
path=pdf_path,
|
||||
media_type="application/pdf",
|
||||
filename=f"linkedin_carousel_{request.topic[:30].replace(' ', '_')}.pdf"
|
||||
)
|
||||
|
||||
return JSONResponse(content={
|
||||
'success': True,
|
||||
'pdf_bytes': pdf_result.get('pdf_bytes'),
|
||||
'metadata': pdf_result.get('metadata'),
|
||||
})
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
duration = time.time() - start_time
|
||||
logger.error(f"Error generating carousel PDF: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=error_response(ERROR_CODES['GENERATION_FAILED'], f"Failed to generate carousel PDF: {str(e)}"))
|
||||
|
||||
|
||||
@router.post(
|
||||
"/generate-video-script",
|
||||
response_model=LinkedInVideoScriptResponse,
|
||||
@@ -447,14 +565,14 @@ async def generate_video_script(
|
||||
|
||||
# Validate request
|
||||
if not request.topic.strip():
|
||||
raise HTTPException(status_code=422, detail="Topic cannot be empty")
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Topic cannot be empty"))
|
||||
|
||||
if not request.industry.strip():
|
||||
raise HTTPException(status_code=422, detail="Industry cannot be empty")
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Industry cannot be empty"))
|
||||
|
||||
video_duration = getattr(request, 'video_duration', getattr(request, 'video_length', 60))
|
||||
if video_duration < 15 or video_duration > 300:
|
||||
raise HTTPException(status_code=422, detail="Video length must be between 15 and 300 seconds")
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Video length must be between 15 and 300 seconds"))
|
||||
|
||||
# Extract user_id
|
||||
user_id = None
|
||||
@@ -463,18 +581,23 @@ async def generate_video_script(
|
||||
if not user_id:
|
||||
user_id = http_request.headers.get("X-User-ID") or http_request.headers.get("Authorization")
|
||||
|
||||
# Rate limit check
|
||||
retry_after = check_rate_limit(user_id or 'anonymous')
|
||||
if retry_after:
|
||||
raise HTTPException(status_code=429, detail=error_response(ERROR_CODES['RATE_LIMITED'], f"Rate limit exceeded. Retry after {retry_after} seconds."), headers={"Retry-After": str(retry_after)})
|
||||
|
||||
# Generate video script content
|
||||
response = await linkedin_service.generate_linkedin_video_script(request)
|
||||
|
||||
if not response.success:
|
||||
raise HTTPException(status_code=500, detail=error_response(ERROR_CODES['GENERATION_FAILED'], response.error or "Video script generation failed"))
|
||||
|
||||
# Log successful request
|
||||
duration = time.time() - start_time
|
||||
background_tasks.add_task(
|
||||
log_api_request, http_request, db, duration, 200
|
||||
)
|
||||
|
||||
if not response.success:
|
||||
raise HTTPException(status_code=500, detail=response.error)
|
||||
|
||||
# Save and track text content (non-blocking)
|
||||
if user_id and response.data:
|
||||
try:
|
||||
@@ -514,7 +637,7 @@ async def generate_video_script(
|
||||
file_extension=".md"
|
||||
)
|
||||
except Exception as track_error:
|
||||
logger.warning(f"Failed to track LinkedIn video script asset: {track_error}")
|
||||
logger.error(f"Failed to track LinkedIn video script asset: {track_error}")
|
||||
|
||||
logger.info(f"Successfully generated LinkedIn video script in {duration:.2f} seconds")
|
||||
return response
|
||||
@@ -532,7 +655,7 @@ async def generate_video_script(
|
||||
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to generate LinkedIn video script: {str(e)}"
|
||||
detail=error_response(ERROR_CODES['GENERATION_FAILED'], f"Failed to generate LinkedIn video script: {str(e)}")
|
||||
)
|
||||
|
||||
|
||||
@@ -572,10 +695,10 @@ async def generate_comment_response(
|
||||
post_context = getattr(request, 'post_context', getattr(request, 'original_post', ''))
|
||||
|
||||
if not original_comment.strip():
|
||||
raise HTTPException(status_code=422, detail="Original comment cannot be empty")
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Original comment cannot be empty"))
|
||||
|
||||
if not post_context.strip():
|
||||
raise HTTPException(status_code=422, detail="Post context cannot be empty")
|
||||
raise HTTPException(status_code=422, detail=error_response(ERROR_CODES['VALIDATION'], "Post context cannot be empty"))
|
||||
|
||||
# Extract user_id
|
||||
user_id = None
|
||||
@@ -584,18 +707,23 @@ async def generate_comment_response(
|
||||
if not user_id:
|
||||
user_id = http_request.headers.get("X-User-ID") or http_request.headers.get("Authorization")
|
||||
|
||||
# Rate limit check
|
||||
retry_after = check_rate_limit(user_id or 'anonymous')
|
||||
if retry_after:
|
||||
raise HTTPException(status_code=429, detail=error_response(ERROR_CODES['RATE_LIMITED'], f"Rate limit exceeded. Retry after {retry_after} seconds."), headers={"Retry-After": str(retry_after)})
|
||||
|
||||
# Generate comment response
|
||||
response = await linkedin_service.generate_linkedin_comment_response(request)
|
||||
|
||||
if not response.success:
|
||||
raise HTTPException(status_code=500, detail=error_response(ERROR_CODES['GENERATION_FAILED'], response.error or "Comment response generation failed"))
|
||||
|
||||
# Log successful request
|
||||
duration = time.time() - start_time
|
||||
background_tasks.add_task(
|
||||
log_api_request, http_request, db, duration, 200
|
||||
)
|
||||
|
||||
if not response.success:
|
||||
raise HTTPException(status_code=500, detail=response.error)
|
||||
|
||||
# Save and track text content (non-blocking)
|
||||
if user_id and hasattr(response, 'response') and response.response:
|
||||
try:
|
||||
@@ -626,7 +754,7 @@ async def generate_comment_response(
|
||||
file_extension=".md"
|
||||
)
|
||||
except Exception as track_error:
|
||||
logger.warning(f"Failed to track LinkedIn comment response asset: {track_error}")
|
||||
logger.error(f"Failed to track LinkedIn comment response asset: {track_error}")
|
||||
|
||||
logger.info(f"Successfully generated LinkedIn comment response in {duration:.2f} seconds")
|
||||
return response
|
||||
@@ -644,7 +772,7 @@ async def generate_comment_response(
|
||||
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to generate LinkedIn comment response: {str(e)}"
|
||||
detail=error_response(ERROR_CODES['GENERATION_FAILED'], f"Failed to generate LinkedIn comment response: {str(e)}")
|
||||
)
|
||||
|
||||
|
||||
@@ -691,6 +819,128 @@ async def get_content_types():
|
||||
}
|
||||
|
||||
|
||||
@router.post(
|
||||
"/edit-content",
|
||||
response_model=LinkedInEditContentResponse,
|
||||
summary="Edit LinkedIn Content with AI",
|
||||
description="""
|
||||
Apply AI-powered edits to LinkedIn content.
|
||||
|
||||
Supported edit types:
|
||||
- professionalize: Rewrite content with professional business language
|
||||
- optimize_engagement: Optimize hook and structure for maximum engagement
|
||||
- add_hashtags: Generate relevant, industry-specific hashtags
|
||||
- adjust_tone: Rewrite content in a different tone (professional, conversational, authoritative, etc.)
|
||||
- expand: Add depth, examples, and insights to content
|
||||
- condense: Shorten content while preserving key messages
|
||||
- add_cta: Generate a contextual call-to-action
|
||||
"""
|
||||
)
|
||||
async def edit_linkedin_content(
|
||||
request: LinkedInEditContentRequest,
|
||||
current_user: Optional[Dict[str, Any]] = Depends(get_current_user)
|
||||
):
|
||||
"""Edit LinkedIn content using AI-powered text generation."""
|
||||
try:
|
||||
# Extract user_id for subscription checking
|
||||
user_id = None
|
||||
if current_user:
|
||||
user_id = str(current_user.get('id', '') or current_user.get('sub', ''))
|
||||
|
||||
if not request.content.strip():
|
||||
return LinkedInEditContentResponse(
|
||||
success=False, error="Content cannot be empty", edit_type=request.edit_type
|
||||
)
|
||||
|
||||
# Build the system prompt based on edit type
|
||||
system_prompts = {
|
||||
"professionalize": "You are a professional business writer. Rewrite the following LinkedIn content to be more professional, polished, and industry-appropriate. Maintain the original message but use sophisticated business language, improve sentence structure, and ensure a confident executive presence.",
|
||||
"optimize_engagement": "You are a LinkedIn engagement strategist. Rewrite the following content to maximize engagement. Strengthen the hook in the first 2 lines, add thought-provoking elements, improve readability with shorter sentences, and ensure the content encourages comments and shares.",
|
||||
"add_hashtags": "You are a LinkedIn hashtag strategist. Generate 5 highly relevant, industry-specific hashtags for the following content. Return the original content unchanged, followed by two newlines and the hashtags on a single line.",
|
||||
"adjust_tone": "You are a LinkedIn tone specialist. Rewrite the following content in the specified tone while preserving all key information and the overall message.",
|
||||
"expand": "You are a LinkedIn content strategist. Expand the following content by adding relevant examples, data points, actionable insights, and deeper analysis. Maintain the original structure but add substantial value while keeping it LinkedIn-appropriate (under 3000 characters).",
|
||||
"condense": "You are a LinkedIn editing specialist. Condense the following content to be more concise and impactful. Remove filler words, tighten sentences, and preserve only the strongest points. Keep the core message intact.",
|
||||
"add_cta": "You are a LinkedIn conversion strategist. Add a compelling, contextual call-to-action to the following content. The CTA should feel natural, not salesy, and should encourage meaningful engagement (comments, connections, or discussions)."
|
||||
}
|
||||
|
||||
system_prompt = system_prompts.get(request.edit_type)
|
||||
if not system_prompt:
|
||||
return LinkedInEditContentResponse(
|
||||
success=False, error=f"Unknown edit type: {request.edit_type}", edit_type=request.edit_type
|
||||
)
|
||||
|
||||
# Build the user prompt with context
|
||||
user_prompt = f"Content to edit:\n\n{request.content}\n\n"
|
||||
if request.industry:
|
||||
user_prompt += f"Industry: {request.industry}\n"
|
||||
if request.tone:
|
||||
user_prompt += f"Target tone: {request.tone}\n"
|
||||
if request.target_audience:
|
||||
user_prompt += f"Target audience: {request.target_audience}\n"
|
||||
if request.parameters:
|
||||
user_prompt += f"Additional context: {json.dumps(request.parameters)}\n"
|
||||
|
||||
user_prompt += "\nReturn ONLY the edited content without any explanations, labels, or markdown formatting."
|
||||
|
||||
# Generate edited content using provider-agnostic gateway
|
||||
temperature = {
|
||||
"professionalize": 0.3,
|
||||
"optimize_engagement": 0.7,
|
||||
"add_hashtags": 0.4,
|
||||
"adjust_tone": 0.5,
|
||||
"expand": 0.7,
|
||||
"condense": 0.3,
|
||||
"add_cta": 0.6,
|
||||
}.get(request.edit_type, 0.5)
|
||||
|
||||
max_tokens = {
|
||||
"expand": 2048,
|
||||
"professionalize": 1024,
|
||||
"optimize_engagement": 1024,
|
||||
"adjust_tone": 1024,
|
||||
"condense": 1024,
|
||||
"add_cta": 1024,
|
||||
"add_hashtags": 512,
|
||||
}.get(request.edit_type, 1024)
|
||||
|
||||
edited = llm_text_gen(
|
||||
prompt=user_prompt,
|
||||
system_prompt=system_prompt,
|
||||
user_id=user_id,
|
||||
flow_type=f"linkedin_edit_{request.edit_type}",
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature
|
||||
)
|
||||
|
||||
if not edited:
|
||||
return LinkedInEditContentResponse(
|
||||
success=False, error="AI editing returned empty result", edit_type=request.edit_type
|
||||
)
|
||||
|
||||
edited = edited.strip()
|
||||
|
||||
# For add_hashtags, ensure hashtags are separated from content
|
||||
if request.edit_type == "add_hashtags":
|
||||
if not edited.endswith("\n\n"):
|
||||
# Hashtags might be inline; separate them
|
||||
pass
|
||||
|
||||
logger.info(f"LinkedIn content edited successfully via {request.edit_type}")
|
||||
return LinkedInEditContentResponse(
|
||||
success=True,
|
||||
content=edited,
|
||||
edit_type=request.edit_type,
|
||||
provider="llm_text_gen",
|
||||
model="provider-agnostic"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error editing LinkedIn content: {str(e)}", exc_info=True)
|
||||
return LinkedInEditContentResponse(
|
||||
success=False, error=f"Editing failed: {str(e)}", edit_type=request.edit_type
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/usage-stats",
|
||||
summary="Get Usage Statistics",
|
||||
@@ -699,30 +949,29 @@ async def get_content_types():
|
||||
async def get_usage_stats(db: Session = Depends(get_db)):
|
||||
"""Get usage statistics for LinkedIn content generation."""
|
||||
try:
|
||||
# This would query the database for actual usage stats
|
||||
# For now, returning mock data
|
||||
base = db.query(APIRequest).filter(APIRequest.path.like('/api/linkedin/%'))
|
||||
total = base.count()
|
||||
successful = base.filter(APIRequest.status_code < 400).count()
|
||||
|
||||
avg_dur = base.with_entities(func.avg(APIRequest.duration)).scalar() or 0
|
||||
|
||||
content_types = {
|
||||
"posts": base.filter(APIRequest.path.like('%generate-post')).count(),
|
||||
"articles": base.filter(APIRequest.path.like('%generate-article')).count(),
|
||||
"carousels": base.filter(APIRequest.path.like('%generate-carousel')).count(),
|
||||
"video_scripts": base.filter(APIRequest.path.like('%generate-video-script')).count(),
|
||||
"comment_responses": base.filter(APIRequest.path.like('%generate-comment-response')).count(),
|
||||
}
|
||||
|
||||
return {
|
||||
"total_requests": 1250,
|
||||
"content_types": {
|
||||
"posts": 650,
|
||||
"articles": 320,
|
||||
"carousels": 180,
|
||||
"video_scripts": 70,
|
||||
"comment_responses": 30
|
||||
},
|
||||
"success_rate": 0.96,
|
||||
"average_generation_time": 4.2,
|
||||
"top_industries": [
|
||||
"Technology",
|
||||
"Healthcare",
|
||||
"Finance",
|
||||
"Marketing",
|
||||
"Education"
|
||||
]
|
||||
"total_requests": total,
|
||||
"content_types": content_types,
|
||||
"success_rate": round(successful / max(total, 1), 2),
|
||||
"average_generation_time": round(float(avg_dur), 2),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving usage stats: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Failed to retrieve usage statistics"
|
||||
detail=error_response(ERROR_CODES['GENERATION_FAILED'], "Failed to retrieve usage statistics")
|
||||
)
|
||||
@@ -30,6 +30,7 @@ from services.seo_tools.on_page_seo_service import OnPageSEOService
|
||||
from services.seo_tools.technical_seo_service import TechnicalSEOService
|
||||
from services.seo_tools.enterprise_seo_service import EnterpriseSEOService
|
||||
from services.seo_tools.gsc_analyzer_service import GSCAnalyzerService
|
||||
from services.seo_tools.gsc_strategy_insights_service import GSCStrategyInsightsService
|
||||
from services.seo_tools.content_strategy_service import ContentStrategyService
|
||||
from services.seo_tools.llm_insights_service import LLMInsightsService
|
||||
from services.database import get_session_for_user
|
||||
@@ -199,6 +200,34 @@ class KeywordExpansionRequest(BaseModel):
|
||||
content_analysis: Dict[str, Any] = Field(..., description="Content analysis data")
|
||||
target_difficulty: Optional[str] = Field(None, description="Target difficulty (low/medium/high)")
|
||||
|
||||
# ==================== GSC STRATEGY INSIGHTS REQUEST MODELS ====================
|
||||
|
||||
class GSCStrategyInsightsRequest(BaseModel):
|
||||
"""Request model for GSC strategy insights (dashboard context)"""
|
||||
site_url: HttpUrl = Field(..., description="Website URL registered in GSC")
|
||||
include_trends: bool = Field(default=True, description="Include trend analysis")
|
||||
include_competitive: bool = Field(default=False, description="Include competitive analysis (Phase 2)")
|
||||
top_n: int = Field(default=20, ge=5, le=100, description="Number of top opportunities to return")
|
||||
|
||||
class GSCOpportunityRankingRequest(BaseModel):
|
||||
"""Request model for ROI-ranked opportunities"""
|
||||
site_url: HttpUrl = Field(..., description="Website URL registered in GSC")
|
||||
ranking_metric: str = Field(default="roi_score", description="Metric to rank by (roi_score/effort/impact/timeline)")
|
||||
severity_filter: Optional[str] = Field(None, description="Filter by severity (critical/high/medium/low/watch)")
|
||||
limit: int = Field(default=20, ge=5, le=100, description="Number of opportunities to return")
|
||||
|
||||
class GSCTrendAnalysisRequest(BaseModel):
|
||||
"""Request model for performance trend analysis"""
|
||||
site_url: HttpUrl = Field(..., description="Website URL registered in GSC")
|
||||
metric: str = Field(default="all", description="Metric to analyze (position/impressions/clicks/ctr/all)")
|
||||
days_back: int = Field(default=90, ge=7, le=365, description="Days of historical data to analyze")
|
||||
|
||||
class GSCHealthMetricsRequest(BaseModel):
|
||||
"""Request model for health metrics calculation"""
|
||||
site_url: HttpUrl = Field(..., description="Website URL registered in GSC")
|
||||
include_distribution: bool = Field(default=True, description="Include keyword distribution breakdown")
|
||||
include_trends: bool = Field(default=True, description="Include trend comparison")
|
||||
|
||||
# Exception Handler
|
||||
async def handle_seo_tool_exception(func_name: str, error: Exception, request_data: Dict) -> ErrorResponse:
|
||||
"""Handle exceptions from SEO tools with intelligent logging"""
|
||||
@@ -1102,6 +1131,236 @@ async def get_content_opportunities_report(
|
||||
return await handle_seo_tool_exception("get_content_opportunities_report", e, request.dict())
|
||||
|
||||
|
||||
# ==================== GSC STRATEGY INSIGHTS ENDPOINTS (Dashboard-Focused) ====================
|
||||
|
||||
@router.post("/gsc/strategy-insights", response_model=BaseResponse)
|
||||
@log_api_call
|
||||
async def get_gsc_strategy_insights(
|
||||
request: GSCStrategyInsightsRequest,
|
||||
current_user: dict = Depends(get_current_user)
|
||||
) -> Union[BaseResponse, ErrorResponse]:
|
||||
"""
|
||||
Get comprehensive strategy insights from GSC data for SEO Dashboard.
|
||||
|
||||
Provides strategic insights optimized for dashboard display:
|
||||
- Ranked opportunities by ROI score (0-100)
|
||||
- Health metrics with trend comparison
|
||||
- Quick summary of key insights
|
||||
- Optional: Performance trends and competitive positioning
|
||||
|
||||
ROI Scoring Formula:
|
||||
ROI = 0.40×traffic_impact + 0.30×ease + 0.20×competitive + 0.10×momentum
|
||||
|
||||
Severity Levels:
|
||||
- CRITICAL: 80-100 (immediate action)
|
||||
- HIGH: 60-79 (high priority)
|
||||
- MEDIUM: 40-59 (medium priority)
|
||||
- LOW: 20-39 (low priority)
|
||||
- WATCH: <20 (monitoring)
|
||||
"""
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
try:
|
||||
user_id = str(current_user.get("id")) if current_user else None
|
||||
|
||||
service = GSCStrategyInsightsService()
|
||||
insights = await service.get_dashboard_strategy(
|
||||
user_id=user_id,
|
||||
site_url=str(request.site_url),
|
||||
include_trends=request.include_trends,
|
||||
include_competitive=request.include_competitive,
|
||||
top_n=request.top_n
|
||||
)
|
||||
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
|
||||
return BaseResponse(
|
||||
success=True,
|
||||
message="GSC strategy insights generated successfully",
|
||||
execution_time=execution_time,
|
||||
data=insights
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"GSC strategy insights failed: {str(e)}", exc_info=True)
|
||||
return await handle_seo_tool_exception("get_gsc_strategy_insights", e, request.dict())
|
||||
|
||||
|
||||
@router.post("/gsc/opportunity-ranking", response_model=BaseResponse)
|
||||
@log_api_call
|
||||
async def get_ranked_opportunities(
|
||||
request: GSCOpportunityRankingRequest,
|
||||
current_user: dict = Depends(get_current_user)
|
||||
) -> Union[BaseResponse, ErrorResponse]:
|
||||
"""
|
||||
Get ROI-ranked opportunities from GSC data.
|
||||
|
||||
Returns opportunities sorted by specified metric:
|
||||
- roi_score: ROI-weighted score (recommended)
|
||||
- effort: Easiest to implement first
|
||||
- impact: Highest traffic impact first
|
||||
- timeline: Fastest results first
|
||||
|
||||
Optional filtering by severity level:
|
||||
- critical: 80-100 ROI (immediate action required)
|
||||
- high: 60-79 ROI (high priority)
|
||||
- medium: 40-59 ROI (medium priority)
|
||||
- low: 20-39 ROI (low priority)
|
||||
- watch: <20 ROI (monitoring)
|
||||
|
||||
Each opportunity includes:
|
||||
- ROI score and severity level
|
||||
- Implementation effort (hours)
|
||||
- Timeline to impact (weeks)
|
||||
- Recommendations
|
||||
- Related keywords
|
||||
"""
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
try:
|
||||
user_id = str(current_user.get("id")) if current_user else None
|
||||
|
||||
service = GSCStrategyInsightsService()
|
||||
opportunities = await service._get_ranked_opportunities(
|
||||
site_url=str(request.site_url),
|
||||
top_n=request.limit
|
||||
)
|
||||
|
||||
# Filter by severity if specified
|
||||
if request.severity_filter and opportunities.get('status') == 'success':
|
||||
filtered = [
|
||||
opp for opp in opportunities.get('opportunities', [])
|
||||
if opp.get('severity') == request.severity_filter
|
||||
]
|
||||
opportunities['opportunities'] = filtered
|
||||
|
||||
# Sort by metric
|
||||
if opportunities.get('status') == 'success' and request.ranking_metric != 'roi_score':
|
||||
opps = opportunities.get('opportunities', [])
|
||||
if request.ranking_metric == 'effort':
|
||||
opps.sort(key=lambda x: x.get('effort_hours', 0))
|
||||
elif request.ranking_metric == 'impact':
|
||||
opps.sort(key=lambda x: x.get('estimated_impact', 0), reverse=True)
|
||||
elif request.ranking_metric == 'timeline':
|
||||
opps.sort(key=lambda x: x.get('timeline_weeks', 0))
|
||||
opportunities['opportunities'] = opps
|
||||
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
|
||||
return BaseResponse(
|
||||
success=True,
|
||||
message="Ranked opportunities retrieved successfully",
|
||||
execution_time=execution_time,
|
||||
data=opportunities
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Ranked opportunities failed: {str(e)}", exc_info=True)
|
||||
return await handle_seo_tool_exception("get_ranked_opportunities", e, request.dict())
|
||||
|
||||
|
||||
@router.post("/gsc/health-metrics", response_model=BaseResponse)
|
||||
@log_api_call
|
||||
async def get_health_metrics(
|
||||
request: GSCHealthMetricsRequest,
|
||||
current_user: dict = Depends(get_current_user)
|
||||
) -> Union[BaseResponse, ErrorResponse]:
|
||||
"""
|
||||
Get comprehensive health metrics for SEO Dashboard.
|
||||
|
||||
Returns overall SEO health with:
|
||||
- Health score (0-100)
|
||||
- Health trend (up/down/stable)
|
||||
- Keyword position distribution
|
||||
- Average metrics (position, CTR, etc.)
|
||||
- Optional: Trend comparison vs period ago
|
||||
|
||||
Health Score Calculation:
|
||||
Score = 0.60×(Page1_Keywords%) + 0.30×CTR_vs_Benchmark + 0.10×Growth_Rate
|
||||
|
||||
Interpretation:
|
||||
- 80-100: Excellent SEO health
|
||||
- 60-79: Good SEO health
|
||||
- 40-59: Needs improvement
|
||||
- 0-39: Critical issues
|
||||
"""
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
try:
|
||||
user_id = str(current_user.get("id")) if current_user else None
|
||||
|
||||
service = GSCStrategyInsightsService()
|
||||
metrics = await service._calculate_health_metrics(
|
||||
site_url=str(request.site_url)
|
||||
)
|
||||
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
|
||||
return BaseResponse(
|
||||
success=True,
|
||||
message="Health metrics calculated successfully",
|
||||
execution_time=execution_time,
|
||||
data=metrics
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Health metrics calculation failed: {str(e)}", exc_info=True)
|
||||
return await handle_seo_tool_exception("get_health_metrics", e, request.dict())
|
||||
|
||||
|
||||
@router.post("/gsc/trend-analysis", response_model=BaseResponse)
|
||||
@log_api_call
|
||||
async def analyze_gsc_trends(
|
||||
request: GSCTrendAnalysisRequest,
|
||||
current_user: dict = Depends(get_current_user)
|
||||
) -> Union[BaseResponse, ErrorResponse]:
|
||||
"""
|
||||
Analyze performance trends from GSC data.
|
||||
|
||||
Returns trend analysis for specified metrics:
|
||||
- position: Ranking trend for keywords
|
||||
- impressions: Search volume trends
|
||||
- clicks: Click trend
|
||||
- ctr: Click-through rate trend
|
||||
- all: All metrics combined
|
||||
|
||||
For each metric includes:
|
||||
- Current value
|
||||
- Value from 30/90 days ago
|
||||
- Trend direction (up/down/stable)
|
||||
- Trend percentage change
|
||||
- Momentum (acceleration of trend)
|
||||
- Seasonal patterns
|
||||
- Anomalies detected
|
||||
|
||||
Note: This feature requires historical data collection.
|
||||
Phase 1: Manual trend calculation from snapshots.
|
||||
Phase 2: Automated historical tracking.
|
||||
"""
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
try:
|
||||
user_id = str(current_user.get("id")) if current_user else None
|
||||
|
||||
service = GSCStrategyInsightsService()
|
||||
trends = await service._analyze_performance_trends(
|
||||
site_url=str(request.site_url)
|
||||
)
|
||||
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
|
||||
return BaseResponse(
|
||||
success=True,
|
||||
message="Trend analysis completed",
|
||||
execution_time=execution_time,
|
||||
data=trends
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Trend analysis failed: {str(e)}", exc_info=True)
|
||||
return await handle_seo_tool_exception("analyze_gsc_trends", e, request.dict())
|
||||
|
||||
|
||||
@router.get("/enterprise/health", response_model=BaseResponse)
|
||||
@log_api_call
|
||||
async def check_enterprise_services_health() -> BaseResponse:
|
||||
|
||||
86
backend/scripts/create_youtube_tasks_tables.py
Normal file
86
backend/scripts/create_youtube_tasks_tables.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
Create YouTube Video Tasks Table
|
||||
|
||||
Standalone script to create the youtube_video_tasks table in all user
|
||||
databases. Also recovers stale in-flight tasks by marking them as failed.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
from loguru import logger
|
||||
from models.youtube_task_models import YouTubeVideoTask, Base
|
||||
from models.subscription_models import Base as SubscriptionBase
|
||||
from services.database import get_engine_for_user, _user_engines
|
||||
from sqlalchemy import inspect
|
||||
|
||||
|
||||
def create_youtube_tasks_tables():
|
||||
"""Create youtube_video_tasks table for all existing user databases."""
|
||||
from services.database import get_all_user_dbs
|
||||
created = 0
|
||||
skipped = 0
|
||||
recovered = 0
|
||||
|
||||
try:
|
||||
user_dbs = get_all_user_dbs()
|
||||
except Exception:
|
||||
user_dbs = []
|
||||
|
||||
if not user_dbs:
|
||||
logger.warning("No user databases found. Creating table in default database.")
|
||||
user_dbs = [None]
|
||||
|
||||
for user_id in user_dbs:
|
||||
try:
|
||||
if user_id:
|
||||
engine = get_engine_for_user(user_id)
|
||||
else:
|
||||
from services.database import default_engine
|
||||
if not default_engine:
|
||||
logger.error("No default engine available")
|
||||
continue
|
||||
engine = default_engine
|
||||
|
||||
SubscriptionBase.metadata.create_all(bind=engine, checkfirst=True)
|
||||
|
||||
# Recover stale tasks
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
SessionLocal = sessionmaker(bind=engine)
|
||||
db = SessionLocal()
|
||||
try:
|
||||
stale = db.query(YouTubeVideoTask).filter(
|
||||
YouTubeVideoTask.status.in_([
|
||||
'pending', 'processing',
|
||||
])
|
||||
).all()
|
||||
|
||||
for task in stale:
|
||||
task.status = 'failed'
|
||||
task.error = 'Task interrupted by server restart'
|
||||
task.message = 'Recovered on table creation'
|
||||
recovered += 1
|
||||
|
||||
if stale:
|
||||
db.commit()
|
||||
logger.info(f"Recovered {len(stale)} stale tasks for user {user_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to recover stale tasks for user {user_id}: {e}")
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
created += 1
|
||||
logger.info(f"Created youtube_video_tasks table for user {user_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create table for user {user_id}: {e}")
|
||||
skipped += 1
|
||||
|
||||
logger.info(f"YouTube task table creation complete: {created} created, {skipped} skipped, {recovered} recovered")
|
||||
return created
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_youtube_tasks_tables()
|
||||
@@ -1,7 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pydantic import BaseModel, Field, HttpUrl, EmailStr
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
from typing import Dict, List, Optional
|
||||
from typing_extensions import Literal
|
||||
|
||||
|
||||
class BacklinkKeywordInput(BaseModel):
|
||||
@@ -10,7 +11,7 @@ class BacklinkKeywordInput(BaseModel):
|
||||
|
||||
|
||||
class OpportunityContactInfo(BaseModel):
|
||||
email: Optional[EmailStr] = None
|
||||
email: Optional[str] = None
|
||||
contact_page: Optional[HttpUrl] = None
|
||||
|
||||
|
||||
@@ -93,8 +94,9 @@ class LeadListResponse(BaseModel):
|
||||
|
||||
|
||||
class LeadStatusUpdateRequest(BaseModel):
|
||||
status: str = Field(..., min_length=1)
|
||||
status: Literal["discovered", "contacted", "replied", "placed", "bounced", "unsubscribed"]
|
||||
notes: Optional[str] = None
|
||||
campaign_id: Optional[str] = Field(default=None, min_length=1)
|
||||
|
||||
|
||||
class CampaignDetailResponse(BaseModel):
|
||||
@@ -148,6 +150,21 @@ class OutreachStatusRecord(BaseModel):
|
||||
notes: Optional[str] = None
|
||||
|
||||
|
||||
|
||||
class SenderIdentity(BaseModel):
|
||||
name: str = Field(default="", description="Human sender name displayed to the recipient")
|
||||
email: str = Field(default="")
|
||||
organization: str = Field(default="", description="Organization or brand responsible for the outreach")
|
||||
physical_mailing_address: str = Field(default="", description="Postal address required for commercial outreach compliance")
|
||||
reply_to_email: Optional[str] = Field(None, description="Optional reply-to mailbox if different from sender email")
|
||||
|
||||
|
||||
class OneClickUnsubscribe(BaseModel):
|
||||
enabled: bool = Field(default=False)
|
||||
mailto: Optional[str] = Field(None, description="Mailbox for one-click unsubscribe requests")
|
||||
header_value: Optional[str] = Field(None, description="List-Unsubscribe / one-click unsubscribe header value")
|
||||
|
||||
|
||||
class SendOutreachRequest(BaseModel):
|
||||
lead_id: str = Field(..., min_length=1)
|
||||
campaign_id: str = Field(..., min_length=1)
|
||||
@@ -157,6 +174,15 @@ class SendOutreachRequest(BaseModel):
|
||||
subject: str = Field(..., min_length=1)
|
||||
body: str = Field(..., min_length=1)
|
||||
idempotency_key: str = Field(..., min_length=8)
|
||||
sender_identity: Optional[SenderIdentity] = None
|
||||
legal_basis: str = Field(default="")
|
||||
contact_discovery_source: str = Field(default="")
|
||||
recipient_region: str = Field(default="unknown")
|
||||
recipient_region_source: str = Field(default="user_attested", min_length=2)
|
||||
consent_status: str = Field(default="unknown", min_length=2)
|
||||
approved_by_human: bool = False
|
||||
unsubscribe_url: Optional[HttpUrl] = None
|
||||
one_click_unsubscribe: Optional[OneClickUnsubscribe] = None
|
||||
template_id: Optional[str] = Field(None, description="Optional template ID for personalization")
|
||||
template_variables: Optional[dict] = Field(None, description="Variable values for template personalization")
|
||||
|
||||
@@ -166,6 +192,9 @@ class SendOutreachResponse(BaseModel):
|
||||
status: str
|
||||
policy_allowed: bool
|
||||
policy_reasons: List[str] = Field(default_factory=list)
|
||||
effective_sender_email: Optional[str] = None
|
||||
duplicate: bool = False
|
||||
retry_policy: Optional[str] = None
|
||||
|
||||
|
||||
class OutreachAttemptRecord(BaseModel):
|
||||
@@ -240,10 +269,15 @@ class PolicyValidationRequest(BaseModel):
|
||||
recipient_email: str = Field(..., min_length=1)
|
||||
recipient_domain: str
|
||||
recipient_region: str = Field(default="unknown")
|
||||
legal_basis: str = Field(..., min_length=2)
|
||||
recipient_region_source: str = Field(default="user_attested", min_length=2)
|
||||
legal_basis: str = Field(default="")
|
||||
contact_discovery_source: str = Field(default="")
|
||||
consent_status: str = Field(default="unknown", min_length=2)
|
||||
approved_by_human: bool = False
|
||||
unsubscribe_url: Optional[HttpUrl] = None
|
||||
sender_identity: str = Field(..., min_length=3)
|
||||
one_click_unsubscribe: Optional[OneClickUnsubscribe] = None
|
||||
sender_identity: Optional[SenderIdentity] = None
|
||||
sender_email: Optional[str] = Field(None, description="Transport sender email, if separate from identity")
|
||||
idempotency_key: str = Field(..., min_length=8)
|
||||
|
||||
|
||||
@@ -296,8 +330,9 @@ class ConversionFunnelResponse(BaseModel):
|
||||
|
||||
class BulkStatusUpdateRequest(BaseModel):
|
||||
lead_ids: List[str] = Field(..., min_length=1)
|
||||
status: str = Field(..., min_length=1)
|
||||
status: Literal["discovered", "contacted", "replied", "placed", "bounced", "unsubscribed"]
|
||||
notes: Optional[str] = None
|
||||
campaign_id: Optional[str] = Field(default=None, min_length=1)
|
||||
|
||||
|
||||
class BulkStatusUpdateResponse(BaseModel):
|
||||
|
||||
@@ -104,6 +104,8 @@ class BacklinkOutreachReplyMonitor:
|
||||
from_email = parsed_msg.get("From", "")
|
||||
subject = parsed_msg.get("Subject", "")
|
||||
received_at = parsed_msg.get("Date", "")
|
||||
in_reply_to = parsed_msg.get("In-Reply-To", "")
|
||||
references = parsed_msg.get("References", "")
|
||||
|
||||
# Extract body
|
||||
body = ""
|
||||
@@ -137,6 +139,8 @@ class BacklinkOutreachReplyMonitor:
|
||||
"body": body[:5000],
|
||||
"classification": classification,
|
||||
"received_at": received_at_iso,
|
||||
"in_reply_to": in_reply_to,
|
||||
"references": references,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse reply: {e}")
|
||||
|
||||
@@ -8,11 +8,10 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import quote, urlparse
|
||||
|
||||
import requests
|
||||
import httpx
|
||||
from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
|
||||
@@ -34,26 +33,47 @@ class BacklinkOutreachScraper:
|
||||
# -- Public API --
|
||||
|
||||
async def deep_discover(
|
||||
self, keyword: str, max_results: int = 15
|
||||
self,
|
||||
keyword: str,
|
||||
max_results: int = 15,
|
||||
scrape_timeout_seconds: float = 15.0,
|
||||
scrape_max_concurrency: int = 5,
|
||||
) -> Dict[str, Any]:
|
||||
"""Discover guest-post opportunities using Exa, falling back to DuckDuckGo."""
|
||||
if self._is_exa_available():
|
||||
logger.info(f"[BacklinkScraper] Using Exa for keyword: {keyword}")
|
||||
return await self._discover_with_exa(keyword, max_results)
|
||||
logger.info(f"[BacklinkScraper] Exa unavailable, falling back to DuckDuckGo for: {keyword}")
|
||||
return await self._discover_with_duckduckgo(keyword, max_results)
|
||||
return await self._discover_with_duckduckgo(
|
||||
keyword,
|
||||
max_results,
|
||||
scrape_timeout_seconds=scrape_timeout_seconds,
|
||||
scrape_max_concurrency=scrape_max_concurrency,
|
||||
)
|
||||
|
||||
def scrape_urls(self, urls: List[str]) -> List[Dict[str, Any]]:
|
||||
"""Fetch full page content for a list of URLs using Exa get_contents."""
|
||||
async def scrape_urls(
|
||||
self,
|
||||
urls: List[str],
|
||||
timeout_seconds: float = 15.0,
|
||||
max_concurrency: int = 5,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Fetch full page content with non-blocking fallbacks and bounded concurrency."""
|
||||
exa = self._get_exa_sdk()
|
||||
if not exa:
|
||||
return self._scrape_urls_fallback(urls)
|
||||
return await self._scrape_urls_fallback(
|
||||
urls, timeout_seconds=timeout_seconds, max_concurrency=max_concurrency
|
||||
)
|
||||
loop = asyncio.get_running_loop()
|
||||
try:
|
||||
result = exa.get_contents(urls, text={"max_characters": 5000})
|
||||
result = await loop.run_in_executor(
|
||||
None, lambda: exa.get_contents(urls, text={"max_characters": 5000})
|
||||
)
|
||||
return self._parse_get_contents_result(result)
|
||||
except Exception as e:
|
||||
logger.warning(f"[BacklinkScraper] Exa get_contents failed: {e}")
|
||||
return self._scrape_urls_fallback(urls)
|
||||
return await self._scrape_urls_fallback(
|
||||
urls, timeout_seconds=timeout_seconds, max_concurrency=max_concurrency
|
||||
)
|
||||
|
||||
# -- Availability --
|
||||
|
||||
@@ -207,12 +227,19 @@ class BacklinkOutreachScraper:
|
||||
|
||||
# -- DuckDuckGo Fallback Discovery --
|
||||
|
||||
async def _discover_with_duckduckgo(self, keyword: str, max_results: int) -> Dict[str, Any]:
|
||||
async def _discover_with_duckduckgo(
|
||||
self,
|
||||
keyword: str,
|
||||
max_results: int,
|
||||
scrape_timeout_seconds: float = 15.0,
|
||||
scrape_max_concurrency: int = 5,
|
||||
) -> Dict[str, Any]:
|
||||
queries = self._generate_search_queries(keyword)
|
||||
dedup: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(12.0), follow_redirects=True) as client:
|
||||
for query in queries[:4]:
|
||||
rows = self._duckduckgo_search(query)
|
||||
rows = await self._duckduckgo_search(query, client=client)
|
||||
for row in rows:
|
||||
norm_url = self._normalize_url(row.get("url", ""))
|
||||
if not norm_url or norm_url in dedup:
|
||||
@@ -220,11 +247,15 @@ class BacklinkOutreachScraper:
|
||||
dedup[norm_url] = row
|
||||
if len(dedup) >= max_results:
|
||||
break
|
||||
time.sleep(0.4)
|
||||
await asyncio.sleep(0.4)
|
||||
|
||||
# Scrape discovered URLs with Exa get_contents (or fallback)
|
||||
urls_to_scrape = list(dedup.keys())[:max_results]
|
||||
scraped = self.scrape_urls(urls_to_scrape)
|
||||
scraped = await self.scrape_urls(
|
||||
urls_to_scrape,
|
||||
timeout_seconds=scrape_timeout_seconds,
|
||||
max_concurrency=scrape_max_concurrency,
|
||||
)
|
||||
scraped_map = {self._normalize_url(s.get("url", "")): s for s in scraped}
|
||||
|
||||
# Merge DDG results with scraped content
|
||||
@@ -250,13 +281,20 @@ class BacklinkOutreachScraper:
|
||||
"opportunities": opportunities,
|
||||
}
|
||||
|
||||
def _duckduckgo_search(self, query: str, retries: int = 2) -> List[Dict[str, Any]]:
|
||||
encoded = requests.utils.quote(query)
|
||||
async def _duckduckgo_search(
|
||||
self,
|
||||
query: str,
|
||||
retries: int = 2,
|
||||
client: Optional[httpx.AsyncClient] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
encoded = quote(query)
|
||||
url = f"https://duckduckgo.com/html/?q={encoded}"
|
||||
headers = {"User-Agent": "Mozilla/5.0 ALwrityBacklinkBot/1.0"}
|
||||
|
||||
async def _request(active_client: httpx.AsyncClient) -> List[Dict[str, Any]]:
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
resp = requests.get(url, headers=headers, timeout=12)
|
||||
resp = await active_client.get(url, headers=headers)
|
||||
resp.raise_for_status()
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
results = []
|
||||
@@ -272,29 +310,47 @@ class BacklinkOutreachScraper:
|
||||
"highlights": [],
|
||||
})
|
||||
return results
|
||||
except Exception:
|
||||
except (httpx.HTTPError, httpx.TimeoutException):
|
||||
if attempt == retries:
|
||||
return []
|
||||
time.sleep(0.6 * (attempt + 1))
|
||||
await asyncio.sleep(0.6 * (attempt + 1))
|
||||
return []
|
||||
|
||||
def _scrape_urls_fallback(self, urls: List[str]) -> List[Dict[str, Any]]:
|
||||
"""Basic HTTP scrape when Exa is unavailable."""
|
||||
results = []
|
||||
if client is not None:
|
||||
return await _request(client)
|
||||
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(12.0), follow_redirects=True) as owned_client:
|
||||
return await _request(owned_client)
|
||||
|
||||
async def _scrape_urls_fallback(
|
||||
self,
|
||||
urls: List[str],
|
||||
timeout_seconds: float = 15.0,
|
||||
max_concurrency: int = 5,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Basic async HTTP scrape when Exa is unavailable."""
|
||||
headers = {"User-Agent": "Mozilla/5.0 ALwrityBacklinkBot/1.0"}
|
||||
for url in urls[:5]:
|
||||
semaphore = asyncio.Semaphore(max(1, max_concurrency))
|
||||
timeout = httpx.Timeout(timeout_seconds)
|
||||
|
||||
async def scrape_one(client: httpx.AsyncClient, url: str) -> Optional[Dict[str, Any]]:
|
||||
async with semaphore:
|
||||
try:
|
||||
resp = requests.get(url, headers=headers, timeout=15)
|
||||
resp = await client.get(url, headers=headers)
|
||||
resp.raise_for_status()
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
for tag in soup(["script", "style", "nav", "footer", "header"]):
|
||||
tag.decompose()
|
||||
text = soup.get_text(separator=" ", strip=True)
|
||||
title = soup.title.get_text(strip=True) if soup.title else ""
|
||||
results.append({"url": url, "title": title, "text": text[:5000], "highlights": [], "summary": ""})
|
||||
except Exception:
|
||||
continue
|
||||
return results
|
||||
return {"url": url, "title": title, "text": text[:5000], "highlights": [], "summary": ""}
|
||||
except (httpx.HTTPError, httpx.TimeoutException):
|
||||
return None
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
||||
tasks = [scrape_one(client, url) for url in urls]
|
||||
scraped = await asyncio.gather(*tasks)
|
||||
return [row for row in scraped if row]
|
||||
|
||||
# -- Enrichment Pipeline --
|
||||
|
||||
|
||||
@@ -6,9 +6,11 @@ import os
|
||||
import ssl
|
||||
import smtplib
|
||||
import asyncio
|
||||
from dataclasses import dataclass, field
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from typing import Optional
|
||||
from typing import List, Optional, Set
|
||||
from uuid import uuid4
|
||||
from loguru import logger
|
||||
|
||||
|
||||
@@ -17,11 +19,27 @@ SMTP_PORT = int(os.getenv("SMTP_PORT", "587"))
|
||||
SMTP_USERNAME = os.getenv("SMTP_USERNAME", "")
|
||||
SMTP_PASSWORD = os.getenv("SMTP_PASSWORD", "")
|
||||
SMTP_FROM_EMAIL = os.getenv("SMTP_FROM_EMAIL", SMTP_USERNAME)
|
||||
SMTP_ALLOWED_FROM_EMAILS = os.getenv("SMTP_ALLOWED_FROM_EMAILS", "")
|
||||
SMTP_USE_TLS = os.getenv("SMTP_USE_TLS", "true").lower() in ("true", "1", "yes")
|
||||
SMTP_VERIFY_TLS = os.getenv("SMTP_VERIFY_TLS", "true").lower() in ("true", "1", "yes")
|
||||
SMTP_SEND_TIMEOUT = int(os.getenv("SMTP_SEND_TIMEOUT", "30"))
|
||||
|
||||
|
||||
@dataclass
|
||||
class SenderAuthorizationResult:
|
||||
authorized: bool
|
||||
effective_sender_email: str = ""
|
||||
failure_reasons: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SendEmailResult:
|
||||
success: bool
|
||||
effective_sender_email: str = ""
|
||||
message_id: str = ""
|
||||
failure_reasons: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
class BacklinkOutreachSender:
|
||||
def __init__(self):
|
||||
self._host = SMTP_HOST
|
||||
@@ -29,6 +47,7 @@ class BacklinkOutreachSender:
|
||||
self._username = SMTP_USERNAME
|
||||
self._password = SMTP_PASSWORD
|
||||
self._from_email = SMTP_FROM_EMAIL or SMTP_USERNAME
|
||||
self._allowed_from_emails = SMTP_ALLOWED_FROM_EMAILS
|
||||
self._use_tls = SMTP_USE_TLS
|
||||
self._verify_tls = SMTP_VERIFY_TLS
|
||||
self._timeout = SMTP_SEND_TIMEOUT
|
||||
@@ -36,23 +55,75 @@ class BacklinkOutreachSender:
|
||||
def is_configured(self) -> bool:
|
||||
return bool(self._username and self._password)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_email(email: Optional[str]) -> str:
|
||||
return (email or "").strip().lower()
|
||||
|
||||
def _allowed_sender_aliases(self) -> Set[str]:
|
||||
aliases = {
|
||||
self._normalize_email(alias)
|
||||
for alias in self._allowed_from_emails.split(",")
|
||||
if self._normalize_email(alias)
|
||||
}
|
||||
for configured_sender in (self._from_email, self._username):
|
||||
normalized = self._normalize_email(configured_sender)
|
||||
if normalized:
|
||||
aliases.add(normalized)
|
||||
return aliases
|
||||
|
||||
def validate_sender_alias(self, from_email: Optional[str] = None) -> SenderAuthorizationResult:
|
||||
default_sender = self._normalize_email(self._from_email or self._username)
|
||||
requested_sender = self._normalize_email(from_email) or default_sender
|
||||
|
||||
if not self.is_configured():
|
||||
return SenderAuthorizationResult(
|
||||
authorized=False,
|
||||
effective_sender_email=requested_sender,
|
||||
failure_reasons=["smtp_not_configured"],
|
||||
)
|
||||
if not requested_sender:
|
||||
return SenderAuthorizationResult(
|
||||
authorized=False,
|
||||
failure_reasons=["smtp_sender_missing"],
|
||||
)
|
||||
|
||||
allowed_aliases = self._allowed_sender_aliases()
|
||||
if requested_sender not in allowed_aliases:
|
||||
return SenderAuthorizationResult(
|
||||
authorized=False,
|
||||
effective_sender_email=requested_sender,
|
||||
failure_reasons=["sender_alias_not_authorized"],
|
||||
)
|
||||
|
||||
return SenderAuthorizationResult(
|
||||
authorized=True,
|
||||
effective_sender_email=requested_sender,
|
||||
)
|
||||
|
||||
async def send_email(
|
||||
self,
|
||||
to_email: str,
|
||||
subject: str,
|
||||
body: str,
|
||||
from_email: Optional[str] = None,
|
||||
) -> bool:
|
||||
if not self.is_configured():
|
||||
logger.error("SMTP not configured: set SMTP_USERNAME and SMTP_PASSWORD")
|
||||
return False
|
||||
) -> SendEmailResult:
|
||||
sender_validation = self.validate_sender_alias(from_email)
|
||||
if not sender_validation.authorized:
|
||||
logger.error(f"SMTP sender validation failed: {sender_validation.failure_reasons}")
|
||||
return SendEmailResult(
|
||||
success=False,
|
||||
effective_sender_email=sender_validation.effective_sender_email,
|
||||
failure_reasons=sender_validation.failure_reasons,
|
||||
)
|
||||
|
||||
sender = from_email or self._from_email
|
||||
sender = sender_validation.effective_sender_email
|
||||
|
||||
msg_id = f"<{uuid4().hex}@{sender.split('@')[-1] if '@' in sender else 'outreach.local'}>"
|
||||
msg = MIMEMultipart("alternative")
|
||||
msg["From"] = sender
|
||||
msg["To"] = to_email
|
||||
msg["Subject"] = subject
|
||||
msg["Message-ID"] = msg_id
|
||||
msg.attach(MIMEText(body, "plain"))
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
@@ -78,7 +149,13 @@ class BacklinkOutreachSender:
|
||||
logger.error(f"Unexpected error sending to {to_email}: {e}")
|
||||
return False
|
||||
|
||||
return await loop.run_in_executor(None, _send)
|
||||
success = await loop.run_in_executor(None, _send)
|
||||
return SendEmailResult(
|
||||
success=success,
|
||||
effective_sender_email=sender,
|
||||
message_id=msg_id if success else "",
|
||||
failure_reasons=[] if success else ["smtp_send_failed"],
|
||||
)
|
||||
|
||||
def personalize(self, template: str, variables: dict) -> str:
|
||||
"""Replace {placeholder} variables in a template string."""
|
||||
|
||||
@@ -4,10 +4,11 @@ from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import quote
|
||||
import asyncio
|
||||
import re
|
||||
import time
|
||||
|
||||
import requests
|
||||
import httpx
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import csv
|
||||
@@ -22,9 +23,6 @@ from services.backlink_outreach_models import (
|
||||
)
|
||||
from services.backlink_outreach_storage import BacklinkOutreachStorageService
|
||||
|
||||
DEFAULT_USER_DAILY_CAP = 100
|
||||
DEFAULT_DOMAIN_DAILY_CAP = 20
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
url: str
|
||||
@@ -55,14 +53,22 @@ class BacklinkOutreachService:
|
||||
f"{normalized} + 'Submit article'",
|
||||
]
|
||||
|
||||
def search_for_urls(self, query: str, timeout_seconds: int = 12, retries: int = 2) -> List[SearchResult]:
|
||||
encoded_query = requests.utils.quote(query)
|
||||
async def search_for_urls(
|
||||
self,
|
||||
query: str,
|
||||
timeout_seconds: int = 12,
|
||||
retries: int = 2,
|
||||
client: Optional[httpx.AsyncClient] = None,
|
||||
) -> List[SearchResult]:
|
||||
"""Search DuckDuckGo HTML using a non-blocking HTTP client."""
|
||||
encoded_query = quote(query)
|
||||
url = f"https://duckduckgo.com/html/?q={encoded_query}"
|
||||
headers = {"User-Agent": "Mozilla/5.0 ALwrityBacklinkBot/1.0"}
|
||||
|
||||
async def _request(active_client: httpx.AsyncClient) -> List[SearchResult]:
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=timeout_seconds)
|
||||
response = await active_client.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
rows: List[SearchResult] = []
|
||||
@@ -79,18 +85,26 @@ class BacklinkOutreachService:
|
||||
)
|
||||
)
|
||||
return rows
|
||||
except Exception:
|
||||
except (httpx.HTTPError, httpx.TimeoutException):
|
||||
if attempt == retries:
|
||||
return []
|
||||
time.sleep(0.6 * (attempt + 1))
|
||||
await asyncio.sleep(0.6 * (attempt + 1))
|
||||
return []
|
||||
|
||||
def discover_opportunities(self, keyword: str, max_results: int = 10) -> Dict[str, Any]:
|
||||
if client is not None:
|
||||
return await _request(client)
|
||||
|
||||
timeout = httpx.Timeout(timeout_seconds)
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as owned_client:
|
||||
return await _request(owned_client)
|
||||
|
||||
async def discover_opportunities_async(self, keyword: str, max_results: int = 10) -> Dict[str, Any]:
|
||||
queries = self.generate_guest_post_queries(keyword)[:4]
|
||||
dedup: Dict[str, SearchResult] = {}
|
||||
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(12.0), follow_redirects=True) as client:
|
||||
for query in queries:
|
||||
for result in self.search_for_urls(query):
|
||||
for result in await self.search_for_urls(query, client=client):
|
||||
normalized_url = self._normalize_url(result.url)
|
||||
if not normalized_url or normalized_url in dedup:
|
||||
continue
|
||||
@@ -99,7 +113,7 @@ class BacklinkOutreachService:
|
||||
break
|
||||
if len(dedup) >= max_results:
|
||||
break
|
||||
time.sleep(0.4)
|
||||
await asyncio.sleep(0.4)
|
||||
|
||||
opportunities: List[OpportunityRecord] = []
|
||||
for normalized_url, row in dedup.items():
|
||||
@@ -118,6 +132,10 @@ class BacklinkOutreachService:
|
||||
|
||||
return {"keyword": keyword, "queries": queries, "opportunities": opportunities}
|
||||
|
||||
def discover_opportunities(self, keyword: str, max_results: int = 10) -> Dict[str, Any]:
|
||||
"""Synchronous compatibility wrapper for non-async callers."""
|
||||
return asyncio.run(self.discover_opportunities_async(keyword, max_results))
|
||||
|
||||
def _normalize_url(self, url: str) -> str:
|
||||
u = (url or "").strip()
|
||||
if not u:
|
||||
@@ -144,32 +162,76 @@ class BacklinkOutreachService:
|
||||
def _get_storage(self) -> BacklinkOutreachStorageService:
|
||||
return BacklinkOutreachStorageService()
|
||||
|
||||
CONSENT_REQUIRED_REGIONS = {"eu", "eea", "uk", "ca"}
|
||||
MANUAL_REVIEW_REGIONS = {"unknown", "br", "cn", "jp", "kr"}
|
||||
LOW_CONFIDENCE_REGION_SOURCES = {"tld_inference", "domain_tld", "inferred", "unknown"}
|
||||
VALID_LEGAL_BASES = {"legitimate_interest", "consent", "contract"}
|
||||
VALID_CONSENT_STATUSES = {"explicit", "implied", "not_required", "unknown"}
|
||||
|
||||
@staticmethod
|
||||
def _has_one_click_unsubscribe(payload: PolicyValidationRequest) -> bool:
|
||||
one_click = payload.one_click_unsubscribe
|
||||
if not one_click or not one_click.enabled:
|
||||
return False
|
||||
return bool(one_click.mailto or (one_click.header_value or "").strip())
|
||||
|
||||
def validate_send_policy(self, payload: PolicyValidationRequest) -> PolicyValidationResponse:
|
||||
reasons: List[str] = []
|
||||
storage = self._get_storage()
|
||||
|
||||
legal_basis = payload.legal_basis.strip().lower()
|
||||
recipient_region = payload.recipient_region.strip().lower()
|
||||
region_source = payload.recipient_region_source.strip().lower()
|
||||
consent_status = payload.consent_status.strip().lower()
|
||||
discovery_source = payload.contact_discovery_source.strip()
|
||||
sender = payload.sender_identity
|
||||
|
||||
if payload.workspace_id.startswith("new-") and not payload.approved_by_human:
|
||||
reasons.append("human_review_required_for_new_workspace")
|
||||
if payload.legal_basis.lower() not in {"legitimate_interest", "consent", "contract"}:
|
||||
reasons.append("invalid_legal_basis")
|
||||
if payload.recipient_region.lower() in {"eu", "eea"} and payload.legal_basis.lower() != "consent":
|
||||
reasons.append("region_requires_explicit_consent")
|
||||
if not legal_basis:
|
||||
reasons.append("legal_basis_required")
|
||||
elif legal_basis not in self.VALID_LEGAL_BASES:
|
||||
reasons.append("invalid_legal_basis_recorded")
|
||||
if not discovery_source:
|
||||
reasons.append("contact_discovery_source_required")
|
||||
if consent_status not in self.VALID_CONSENT_STATUSES:
|
||||
reasons.append("invalid_consent_status")
|
||||
|
||||
if len(payload.sender_identity.strip()) < 3:
|
||||
reasons.append("sender_identity_required")
|
||||
has_unsubscribe = bool(payload.unsubscribe_url) or self._has_one_click_unsubscribe(payload)
|
||||
if not has_unsubscribe:
|
||||
reasons.append("unsubscribe_url_or_one_click_unsubscribe_required")
|
||||
|
||||
if not sender:
|
||||
reasons.append("complete_sender_identity_required")
|
||||
else:
|
||||
sender_email = str(sender.email).strip()
|
||||
if not sender.name.strip():
|
||||
reasons.append("sender_name_required")
|
||||
if not sender_email:
|
||||
reasons.append("sender_email_required")
|
||||
elif not re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", sender_email):
|
||||
reasons.append("sender_email_invalid")
|
||||
if not sender.organization.strip():
|
||||
reasons.append("sender_organization_required")
|
||||
if not sender.physical_mailing_address.strip():
|
||||
reasons.append("sender_physical_mailing_address_required")
|
||||
if payload.sender_email and sender_email.lower() != str(payload.sender_email).lower():
|
||||
reasons.append("sender_identity_email_mismatch")
|
||||
|
||||
if recipient_region in self.CONSENT_REQUIRED_REGIONS:
|
||||
if legal_basis != "consent" or consent_status != "explicit":
|
||||
reasons.append("region_requires_recorded_explicit_consent")
|
||||
elif recipient_region in self.MANUAL_REVIEW_REGIONS and not payload.approved_by_human:
|
||||
reasons.append("manual_review_required_for_recipient_region")
|
||||
|
||||
if region_source in self.LOW_CONFIDENCE_REGION_SOURCES and not payload.approved_by_human:
|
||||
reasons.append("manual_review_required_for_tld_or_unknown_region_source")
|
||||
|
||||
if storage.is_suppressed(str(payload.recipient_email), payload.recipient_domain, user_id=payload.user_id):
|
||||
reasons.append("recipient_suppressed")
|
||||
if storage.check_idempotency(payload.idempotency_key, user_id=payload.user_id):
|
||||
reasons.append("duplicate_idempotency_key")
|
||||
|
||||
user_count = storage.get_user_send_count(payload.user_id)
|
||||
domain_count = storage.get_domain_send_count(payload.recipient_domain, user_id=payload.user_id)
|
||||
if user_count >= DEFAULT_USER_DAILY_CAP:
|
||||
reasons.append("user_daily_cap_exceeded")
|
||||
if domain_count >= DEFAULT_DOMAIN_DAILY_CAP:
|
||||
reasons.append("domain_daily_cap_exceeded")
|
||||
|
||||
allowed = len(reasons) == 0
|
||||
final_status = "approved" if allowed else "blocked"
|
||||
|
||||
@@ -199,15 +261,82 @@ class BacklinkOutreachService:
|
||||
return "au"
|
||||
return "unknown"
|
||||
|
||||
|
||||
SMTP_RETRY_POLICY = "manual_retry_with_new_idempotency_key"
|
||||
|
||||
@staticmethod
|
||||
def _decision_parts(attempt: Optional[dict]) -> List[str]:
|
||||
if not attempt:
|
||||
return []
|
||||
reason = attempt.get("decision_reason") or ""
|
||||
return [part.strip() for part in reason.split(";") if part.strip()]
|
||||
|
||||
def response_from_attempt(self, attempt: Optional[dict], duplicate: bool = False) -> SendOutreachResponse:
|
||||
if not attempt:
|
||||
return SendOutreachResponse(
|
||||
attempt_id="",
|
||||
status="duplicate",
|
||||
policy_allowed=False,
|
||||
policy_reasons=["duplicate_idempotency_key"],
|
||||
duplicate=True,
|
||||
)
|
||||
|
||||
status = attempt.get("status", "failed")
|
||||
parts = self._decision_parts(attempt)
|
||||
retry_policy = next((part.split("=", 1)[1] for part in parts if part.startswith("retry_policy=")), None)
|
||||
reasons = [part for part in parts if not part.startswith("retry_policy=")]
|
||||
if not retry_policy and ("smtp_send_failed" in reasons or "lead_has_no_email" in reasons):
|
||||
retry_policy = self.SMTP_RETRY_POLICY
|
||||
policy_allowed = status in {"queued", "approved", "sent", "failed"} and not any(
|
||||
reason.startswith("human_review_required")
|
||||
or reason in {
|
||||
"invalid_legal_basis",
|
||||
"region_requires_explicit_consent",
|
||||
"sender_identity_required",
|
||||
"recipient_suppressed",
|
||||
"user_daily_cap_exceeded",
|
||||
"domain_daily_cap_exceeded",
|
||||
}
|
||||
for reason in reasons
|
||||
)
|
||||
if status == "blocked":
|
||||
policy_allowed = False
|
||||
return SendOutreachResponse(
|
||||
attempt_id=attempt.get("attempt_id", ""),
|
||||
status=status,
|
||||
policy_allowed=policy_allowed,
|
||||
policy_reasons=reasons,
|
||||
duplicate=duplicate,
|
||||
retry_policy=retry_policy,
|
||||
)
|
||||
|
||||
def send_outreach(self, request: SendOutreachRequest) -> SendOutreachResponse:
|
||||
storage = self._get_storage()
|
||||
lead = storage.get_lead(request.lead_id, user_id=request.user_id)
|
||||
if not lead:
|
||||
return SendOutreachResponse(attempt_id="", status="failed", policy_allowed=False, policy_reasons=["lead_not_found"])
|
||||
|
||||
reservation = storage.reserve_attempt_idempotency(
|
||||
lead_id=request.lead_id,
|
||||
campaign_id=request.campaign_id,
|
||||
idempotency_key=request.idempotency_key,
|
||||
sender_email=request.sender_email,
|
||||
subject=request.subject,
|
||||
body=request.body,
|
||||
user_id=request.user_id,
|
||||
)
|
||||
if not reservation.get("reserved"):
|
||||
return self.response_from_attempt(reservation.get("attempt"), duplicate=True)
|
||||
|
||||
attempt = reservation.get("attempt") or {}
|
||||
attempt_id = attempt.get("attempt_id", "")
|
||||
domain = lead.get("domain", request.sender_email.split("@")[-1] if "@" in request.sender_email else "unknown")
|
||||
recipient_region = (request.recipient_region or "unknown").strip().lower()
|
||||
if recipient_region == "unknown":
|
||||
recipient_region = self._infer_region(domain)
|
||||
legal_basis = "consent" if recipient_region == "eu" else "legitimate_interest"
|
||||
region_source = "tld_inference" if recipient_region != "unknown" else request.recipient_region_source
|
||||
else:
|
||||
region_source = request.recipient_region_source
|
||||
|
||||
policy_req = PolicyValidationRequest(
|
||||
user_id=request.user_id,
|
||||
@@ -216,31 +345,32 @@ class BacklinkOutreachService:
|
||||
recipient_email=lead.get("email", ""),
|
||||
recipient_domain=domain,
|
||||
recipient_region=recipient_region,
|
||||
legal_basis=legal_basis,
|
||||
approved_by_human=False,
|
||||
unsubscribe_url=None,
|
||||
sender_identity=request.sender_email,
|
||||
recipient_region_source=region_source,
|
||||
legal_basis=request.legal_basis,
|
||||
contact_discovery_source=request.contact_discovery_source,
|
||||
consent_status=request.consent_status,
|
||||
approved_by_human=request.approved_by_human,
|
||||
unsubscribe_url=request.unsubscribe_url,
|
||||
one_click_unsubscribe=request.one_click_unsubscribe,
|
||||
sender_identity=request.sender_identity,
|
||||
sender_email=request.sender_email,
|
||||
idempotency_key=request.idempotency_key,
|
||||
)
|
||||
policy = self.validate_send_policy(policy_req)
|
||||
|
||||
attempt = storage.add_attempt(
|
||||
lead_id=request.lead_id,
|
||||
campaign_id=request.campaign_id,
|
||||
idempotency_key=request.idempotency_key,
|
||||
sender_email=request.sender_email,
|
||||
subject=request.subject,
|
||||
body=request.body,
|
||||
status="approved" if policy.allowed else "blocked",
|
||||
updated_attempt = storage.update_attempt_status(
|
||||
attempt_id,
|
||||
"approved" if policy.allowed else "blocked",
|
||||
decision_reason="; ".join(policy.reasons) if policy.reasons else None,
|
||||
user_id=request.user_id,
|
||||
)
|
||||
) or attempt
|
||||
|
||||
return SendOutreachResponse(
|
||||
attempt_id=attempt.get("attempt_id", ""),
|
||||
status=attempt.get("status", "failed"),
|
||||
attempt_id=updated_attempt.get("attempt_id", attempt_id),
|
||||
status=updated_attempt.get("status", "failed"),
|
||||
policy_allowed=policy.allowed,
|
||||
policy_reasons=policy.reasons,
|
||||
effective_sender_email=request.sender_email,
|
||||
)
|
||||
|
||||
def get_reporting_snapshot(self, user_id: str = "default") -> Dict[str, Any]:
|
||||
@@ -323,11 +453,23 @@ class BacklinkOutreachService:
|
||||
writer.writerows([{k: self._sanitize_csv_value(v) for k, v in row.items()}])
|
||||
return output.getvalue()
|
||||
|
||||
async def deep_discover(self, keyword: str, max_results: int = 15) -> Dict[str, Any]:
|
||||
async def deep_discover(
|
||||
self,
|
||||
keyword: str,
|
||||
max_results: int = 15,
|
||||
user_id: Optional[str] = None,
|
||||
scrape_timeout_seconds: float = 15.0,
|
||||
scrape_max_concurrency: int = 5,
|
||||
) -> Dict[str, Any]:
|
||||
"""Enhanced discovery using Exa neural search + DuckDuckGo with full-page scraping."""
|
||||
from services.backlink_outreach_scraper import BacklinkOutreachScraper
|
||||
scraper = BacklinkOutreachScraper(user_id=self._user_id if hasattr(self, '_user_id') else None)
|
||||
return await scraper.deep_discover(keyword, max_results)
|
||||
scraper = BacklinkOutreachScraper(user_id=user_id)
|
||||
return await scraper.deep_discover(
|
||||
keyword,
|
||||
max_results,
|
||||
scrape_timeout_seconds=scrape_timeout_seconds,
|
||||
scrape_max_concurrency=scrape_max_concurrency,
|
||||
)
|
||||
|
||||
def get_migration_coverage(self) -> Dict[str, Any]:
|
||||
implemented = [
|
||||
|
||||
@@ -6,6 +6,9 @@ from datetime import datetime, date
|
||||
from uuid import uuid4
|
||||
from typing import List, Optional
|
||||
from sqlalchemy import text as sql_text, func as sa_func
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
LEAD_VALID_STATUSES = frozenset({"discovered", "contacted", "replied", "placed", "bounced", "unsubscribed"})
|
||||
|
||||
from services.database import get_session_for_user
|
||||
from models.backlink_outreach_models import (
|
||||
@@ -16,6 +19,14 @@ from models.backlink_outreach_models import (
|
||||
)
|
||||
|
||||
|
||||
class BacklinkCampaignNotFoundError(RuntimeError):
|
||||
"""Raised when a backlink campaign is missing or not owned by the user."""
|
||||
|
||||
|
||||
DEFAULT_USER_DAILY_CAP = 100
|
||||
DEFAULT_DOMAIN_DAILY_CAP = 20
|
||||
|
||||
|
||||
class BacklinkOutreachStorageService:
|
||||
_NEW_LEAD_COLUMNS = [
|
||||
"url", "page_title", "snippet", "confidence_score", "discovery_source", "notes"
|
||||
@@ -120,6 +131,14 @@ class BacklinkOutreachStorageService:
|
||||
|
||||
# -- Lead CRUD --
|
||||
|
||||
def _campaign_belongs_to_user(self, db, campaign_id: str, user_id: str) -> bool:
|
||||
return (
|
||||
db.query(BacklinkCampaign)
|
||||
.filter(BacklinkCampaign.id == campaign_id, BacklinkCampaign.user_id == user_id)
|
||||
.first()
|
||||
is not None
|
||||
)
|
||||
|
||||
def add_lead(
|
||||
self,
|
||||
campaign_id: str,
|
||||
@@ -138,6 +157,17 @@ class BacklinkOutreachStorageService:
|
||||
if not db:
|
||||
raise RuntimeError("Database session unavailable")
|
||||
try:
|
||||
if not self._campaign_belongs_to_user(db, campaign_id, user_id):
|
||||
raise BacklinkCampaignNotFoundError("Campaign not found")
|
||||
|
||||
existing = (
|
||||
db.query(BacklinkLead)
|
||||
.filter(BacklinkLead.campaign_id == campaign_id, BacklinkLead.url == url)
|
||||
.first()
|
||||
)
|
||||
if existing:
|
||||
return self._lead_to_dict(existing)
|
||||
|
||||
lead = BacklinkLead(
|
||||
id=f"bl_{uuid4().hex[:16]}",
|
||||
campaign_id=campaign_id,
|
||||
@@ -164,12 +194,25 @@ class BacklinkOutreachStorageService:
|
||||
if not db:
|
||||
raise RuntimeError("Database session unavailable")
|
||||
try:
|
||||
if not self._campaign_belongs_to_user(db, campaign_id, user_id):
|
||||
raise BacklinkCampaignNotFoundError("Campaign not found")
|
||||
|
||||
existing_urls = {
|
||||
row[0]
|
||||
for row in db.query(BacklinkLead.url)
|
||||
.filter(BacklinkLead.campaign_id == campaign_id)
|
||||
.all()
|
||||
}
|
||||
|
||||
added = []
|
||||
for data in leads_data:
|
||||
url = data.get("url", "")
|
||||
if url in existing_urls:
|
||||
continue
|
||||
lead = BacklinkLead(
|
||||
id=f"bl_{uuid4().hex[:16]}",
|
||||
campaign_id=campaign_id,
|
||||
url=data.get("url", ""),
|
||||
url=url,
|
||||
domain=data.get("domain", ""),
|
||||
page_title=data.get("page_title", ""),
|
||||
snippet=data.get("snippet", ""),
|
||||
@@ -182,6 +225,7 @@ class BacklinkOutreachStorageService:
|
||||
)
|
||||
db.add(lead)
|
||||
added.append(lead)
|
||||
existing_urls.add(url)
|
||||
db.commit()
|
||||
return [self._lead_to_dict(l) for l in added]
|
||||
finally:
|
||||
@@ -204,8 +248,16 @@ class BacklinkOutreachStorageService:
|
||||
db.close()
|
||||
|
||||
def update_lead_status(
|
||||
self, lead_id: str, user_id: str, status: str, notes: Optional[str] = None
|
||||
self,
|
||||
lead_id: str,
|
||||
user_id: str,
|
||||
status: str,
|
||||
notes: Optional[str] = None,
|
||||
campaign_id: Optional[str] = None,
|
||||
) -> Optional[dict]:
|
||||
if status not in LEAD_VALID_STATUSES:
|
||||
raise ValueError(f"Invalid status '{status}'. Valid values: {sorted(LEAD_VALID_STATUSES)}")
|
||||
|
||||
self._ensure_tables(user_id)
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
@@ -214,6 +266,18 @@ class BacklinkOutreachStorageService:
|
||||
lead = db.query(BacklinkLead).filter(BacklinkLead.id == lead_id).first()
|
||||
if not lead:
|
||||
return None
|
||||
|
||||
campaign = (
|
||||
db.query(BacklinkCampaign)
|
||||
.filter(BacklinkCampaign.id == lead.campaign_id, BacklinkCampaign.user_id == user_id)
|
||||
.first()
|
||||
)
|
||||
if not campaign:
|
||||
raise PermissionError("Lead does not belong to the current user")
|
||||
|
||||
if campaign_id and lead.campaign_id != campaign_id:
|
||||
return None
|
||||
|
||||
lead.status = status
|
||||
if notes is not None:
|
||||
lead.notes = notes
|
||||
@@ -222,6 +286,44 @@ class BacklinkOutreachStorageService:
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def get_lead_access_issues(
|
||||
self, lead_ids: List[str], user_id: str, campaign_id: Optional[str] = None
|
||||
) -> dict:
|
||||
self._ensure_tables(user_id)
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
return {"missing": list(dict.fromkeys(lead_ids)), "unauthorized": []}
|
||||
try:
|
||||
unique_lead_ids = list(dict.fromkeys(lead_ids))
|
||||
access_rows = self._get_lead_access_rows(db, unique_lead_ids)
|
||||
missing: List[str] = []
|
||||
unauthorized: List[str] = []
|
||||
for lid in unique_lead_ids:
|
||||
access = access_rows.get(lid)
|
||||
if not access:
|
||||
missing.append(lid)
|
||||
elif access["user_id"] != user_id:
|
||||
unauthorized.append(lid)
|
||||
elif campaign_id and access["campaign_id"] != campaign_id:
|
||||
missing.append(lid)
|
||||
return {"missing": missing, "unauthorized": unauthorized}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def _get_lead_access_rows(self, db, lead_ids: List[str]) -> dict:
|
||||
if not lead_ids:
|
||||
return {}
|
||||
rows = (
|
||||
db.query(BacklinkLead.id, BacklinkLead.campaign_id, BacklinkCampaign.user_id)
|
||||
.outerjoin(BacklinkCampaign, BacklinkLead.campaign_id == BacklinkCampaign.id)
|
||||
.filter(BacklinkLead.id.in_(lead_ids))
|
||||
.all()
|
||||
)
|
||||
return {
|
||||
row.id: {"campaign_id": row.campaign_id, "user_id": row.user_id}
|
||||
for row in rows
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _lead_to_dict(lead) -> dict:
|
||||
return {
|
||||
@@ -241,6 +343,79 @@ class BacklinkOutreachStorageService:
|
||||
|
||||
# -- Outreach Attempt CRUD --
|
||||
|
||||
|
||||
def get_attempt_by_idempotency_key(self, idempotency_key: str, user_id: str = "default") -> Optional[dict]:
|
||||
"""Return the existing attempt for an idempotency key visible to the user."""
|
||||
self._ensure_tables(user_id)
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
return None
|
||||
try:
|
||||
attempt = (
|
||||
db.query(OutreachAttempt)
|
||||
.join(BacklinkCampaign, OutreachAttempt.campaign_id == BacklinkCampaign.id)
|
||||
.filter(
|
||||
OutreachAttempt.idempotency_key == idempotency_key,
|
||||
BacklinkCampaign.user_id == user_id,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
return self._attempt_to_dict(attempt) if attempt else None
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def reserve_attempt_idempotency(
|
||||
self,
|
||||
lead_id: str,
|
||||
campaign_id: str,
|
||||
idempotency_key: str,
|
||||
sender_email: str = "",
|
||||
subject: str = "",
|
||||
body: str = "",
|
||||
user_id: str = "default",
|
||||
) -> dict:
|
||||
"""Atomically reserve an outreach idempotency key by creating the attempt row.
|
||||
|
||||
Returns {"reserved": True, "attempt": attempt_dict} for the caller that won
|
||||
the reservation, or {"reserved": False, "attempt": existing_attempt_or_none}
|
||||
when the unique key already exists. Duplicate rows are detected by the
|
||||
database unique constraint so concurrent requests do not both proceed to
|
||||
policy approval or SMTP delivery.
|
||||
"""
|
||||
self._ensure_tables(user_id)
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
raise RuntimeError("Database session unavailable")
|
||||
try:
|
||||
attempt = OutreachAttempt(
|
||||
id=f"att_{uuid4().hex[:16]}",
|
||||
lead_id=lead_id,
|
||||
campaign_id=campaign_id,
|
||||
idempotency_key=idempotency_key,
|
||||
sender_email=sender_email,
|
||||
subject=subject,
|
||||
body=body,
|
||||
status="queued",
|
||||
created_at=datetime.utcnow(),
|
||||
)
|
||||
db.add(attempt)
|
||||
db.commit()
|
||||
return {"reserved": True, "attempt": self._attempt_to_dict(attempt)}
|
||||
except IntegrityError:
|
||||
db.rollback()
|
||||
existing = (
|
||||
db.query(OutreachAttempt)
|
||||
.join(BacklinkCampaign, OutreachAttempt.campaign_id == BacklinkCampaign.id)
|
||||
.filter(
|
||||
OutreachAttempt.idempotency_key == idempotency_key,
|
||||
BacklinkCampaign.user_id == user_id,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
return {"reserved": False, "attempt": self._attempt_to_dict(existing) if existing else None}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def add_attempt(
|
||||
self,
|
||||
lead_id: str,
|
||||
@@ -273,6 +448,20 @@ class BacklinkOutreachStorageService:
|
||||
db.add(attempt)
|
||||
db.commit()
|
||||
return self._attempt_to_dict(attempt)
|
||||
except IntegrityError:
|
||||
db.rollback()
|
||||
existing = (
|
||||
db.query(OutreachAttempt)
|
||||
.join(BacklinkCampaign, OutreachAttempt.campaign_id == BacklinkCampaign.id)
|
||||
.filter(
|
||||
OutreachAttempt.idempotency_key == idempotency_key,
|
||||
BacklinkCampaign.user_id == user_id,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
if existing:
|
||||
return self._attempt_to_dict(existing)
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -325,6 +514,7 @@ class BacklinkOutreachStorageService:
|
||||
"decision_reason": attempt.decision_reason,
|
||||
"sent_at": attempt.sent_at.isoformat() if attempt.sent_at else None,
|
||||
"created_at": attempt.created_at.isoformat() if attempt.created_at else None,
|
||||
"message_id": attempt.message_id or "",
|
||||
}
|
||||
|
||||
def find_attempt_by_from_email(self, from_email: str, user_id: str = "default") -> Optional[str]:
|
||||
@@ -346,6 +536,37 @@ class BacklinkOutreachStorageService:
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def update_attempt_message_id(self, attempt_id: str, message_id: str, user_id: str = "default") -> Optional[dict]:
|
||||
self._ensure_tables(user_id)
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
return None
|
||||
try:
|
||||
attempt = db.query(OutreachAttempt).filter(OutreachAttempt.id == attempt_id).first()
|
||||
if not attempt:
|
||||
return None
|
||||
attempt.message_id = message_id
|
||||
db.commit()
|
||||
return self._attempt_to_dict(attempt)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def find_attempt_by_message_id(self, message_id: str, user_id: str = "default") -> Optional[str]:
|
||||
self._ensure_tables(user_id)
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
return None
|
||||
try:
|
||||
clean = message_id.strip()
|
||||
attempt = (
|
||||
db.query(OutreachAttempt)
|
||||
.filter(OutreachAttempt.message_id == clean)
|
||||
.first()
|
||||
)
|
||||
return attempt.id if attempt else None
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# -- Outreach Reply CRUD --
|
||||
|
||||
def reply_exists(self, from_email: str, subject: str, user_id: str = "default") -> bool:
|
||||
@@ -678,6 +899,9 @@ class BacklinkOutreachStorageService:
|
||||
db.add(entry)
|
||||
db.commit()
|
||||
return {"idempotency_key": idempotency_key}
|
||||
except IntegrityError:
|
||||
db.rollback()
|
||||
return {"idempotency_key": idempotency_key}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -686,27 +910,6 @@ class BacklinkOutreachStorageService:
|
||||
def _today(self) -> date:
|
||||
return date.today()
|
||||
|
||||
def increment_user_send_counter(self, user_id: str) -> int:
|
||||
self._ensure_tables(user_id)
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
return 0
|
||||
try:
|
||||
today = self._today()
|
||||
row_id = f"scu_{uuid4().hex[:16]}"
|
||||
db.execute(sql_text(
|
||||
"INSERT INTO backlink_send_counters_user (id, user_id, date, count) "
|
||||
"VALUES (:id, :uid, :dt, 1) "
|
||||
"ON CONFLICT (user_id, date) DO UPDATE SET count = count + 1"
|
||||
), {"id": row_id, "uid": user_id, "dt": today})
|
||||
db.commit()
|
||||
result = db.query(SendCounterUser.count).filter(
|
||||
SendCounterUser.user_id == user_id, SendCounterUser.date == today
|
||||
).first()
|
||||
return result[0] if result else 0
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def get_user_send_count(self, user_id: str) -> int:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
@@ -722,28 +925,6 @@ class BacklinkOutreachStorageService:
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def increment_domain_send_counter(self, domain: str, user_id: str = "default") -> int:
|
||||
self._ensure_tables(user_id)
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
return 0
|
||||
try:
|
||||
today = self._today()
|
||||
domain_lower = domain.lower()
|
||||
row_id = f"scd_{uuid4().hex[:16]}"
|
||||
db.execute(sql_text(
|
||||
"INSERT INTO backlink_send_counters_domain (id, domain, date, count) "
|
||||
"VALUES (:id, :dom, :dt, 1) "
|
||||
"ON CONFLICT (domain, date) DO UPDATE SET count = count + 1"
|
||||
), {"id": row_id, "dom": domain_lower, "dt": today})
|
||||
db.commit()
|
||||
result = db.query(SendCounterDomain.count).filter(
|
||||
SendCounterDomain.domain == domain_lower, SendCounterDomain.date == today
|
||||
).first()
|
||||
return result[0] if result else 0
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def get_domain_send_count(self, domain: str, user_id: str = "default") -> int:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
@@ -759,6 +940,73 @@ class BacklinkOutreachStorageService:
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def try_increment_user_send_counter(self, user_id: str) -> tuple:
|
||||
"""Atomically check cap and increment. Returns (within_cap, new_count)."""
|
||||
self._ensure_tables(user_id)
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
return True, 0
|
||||
try:
|
||||
today = self._today()
|
||||
current = (
|
||||
db.query(SendCounterUser.count)
|
||||
.filter(SendCounterUser.user_id == user_id, SendCounterUser.date == today)
|
||||
.scalar()
|
||||
) or 0
|
||||
if current >= DEFAULT_USER_DAILY_CAP:
|
||||
db.close()
|
||||
return False, current
|
||||
row_id = f"scu_{uuid4().hex[:16]}"
|
||||
db.execute(sql_text(
|
||||
"INSERT INTO backlink_send_counters_user (id, user_id, date, count) "
|
||||
"VALUES (:id, :uid, :dt, 1) "
|
||||
"ON CONFLICT (user_id, date) DO UPDATE SET count = count + 1"
|
||||
), {"id": row_id, "uid": user_id, "dt": today})
|
||||
db.commit()
|
||||
result = db.query(SendCounterUser.count).filter(
|
||||
SendCounterUser.user_id == user_id, SendCounterUser.date == today
|
||||
).first()
|
||||
return True, result[0] if result else 0
|
||||
except Exception:
|
||||
db.rollback()
|
||||
return True, 0
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def try_increment_domain_send_counter(self, domain: str, user_id: str = "default") -> tuple:
|
||||
"""Atomically check cap and increment. Returns (within_cap, new_count)."""
|
||||
self._ensure_tables(user_id)
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
return True, 0
|
||||
try:
|
||||
today = self._today()
|
||||
domain_lower = domain.lower()
|
||||
current = (
|
||||
db.query(SendCounterDomain.count)
|
||||
.filter(SendCounterDomain.domain == domain_lower, SendCounterDomain.date == today)
|
||||
.scalar()
|
||||
) or 0
|
||||
if current >= DEFAULT_DOMAIN_DAILY_CAP:
|
||||
db.close()
|
||||
return False, current
|
||||
row_id = f"scd_{uuid4().hex[:16]}"
|
||||
db.execute(sql_text(
|
||||
"INSERT INTO backlink_send_counters_domain (id, domain, date, count) "
|
||||
"VALUES (:id, :dom, :dt, 1) "
|
||||
"ON CONFLICT (domain, date) DO UPDATE SET count = count + 1"
|
||||
), {"id": row_id, "dom": domain_lower, "dt": today})
|
||||
db.commit()
|
||||
result = db.query(SendCounterDomain.count).filter(
|
||||
SendCounterDomain.domain == domain_lower, SendCounterDomain.date == today
|
||||
).first()
|
||||
return True, result[0] if result else 0
|
||||
except Exception:
|
||||
db.rollback()
|
||||
return True, 0
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# -- Audit Log --
|
||||
|
||||
def add_audit_log(
|
||||
|
||||
@@ -6,7 +6,7 @@ Provider parity:
|
||||
- No direct provider coupling here; Google grounding remains in research only
|
||||
"""
|
||||
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from .source_url_manager import SourceURLManager
|
||||
@@ -22,11 +22,12 @@ class EnhancedContentGenerator:
|
||||
self.transitioner = TransitionGenerator()
|
||||
self.flow = FlowAnalyzer()
|
||||
|
||||
async def generate_section(self, section: Any, research: Any, mode: str = "polished", user_id: str = None) -> Dict[str, Any]:
|
||||
async def generate_section(self, section: Any, research: Any = None, mode: str = "polished", user_id: str = None, competitive_advantage: str = "") -> Dict[str, Any]:
|
||||
prev_summary = self.memory.build_previous_sections_summary(limit=2)
|
||||
urls = self.url_manager.pick_relevant_urls(section, research)
|
||||
prompt = self._build_prompt(section, research, prev_summary, urls)
|
||||
# Provider-agnostic text generation (respect GPT_PROVIDER & circuit-breaker)
|
||||
research_context, section_sources = self._build_research_context(section)
|
||||
urls = self.url_manager.pick_relevant_urls(section, research) if not research_context else []
|
||||
global_research_context = self._build_global_research_context(research, competitive_advantage)
|
||||
prompt = self._build_prompt(section, prev_summary, research_context, urls, global_research_context)
|
||||
content_text: str = ""
|
||||
try:
|
||||
ai_resp = llm_text_gen(
|
||||
@@ -40,29 +41,22 @@ class EnhancedContentGenerator:
|
||||
elif isinstance(ai_resp, str):
|
||||
content_text = ai_resp
|
||||
else:
|
||||
# Fallback best-effort extraction
|
||||
content_text = str(ai_resp or "")
|
||||
except Exception as e:
|
||||
content_text = ""
|
||||
|
||||
result = {
|
||||
"content": content_text,
|
||||
"sources": [{"title": u.get("title", ""), "url": u.get("url", "")} for u in urls] if urls else [],
|
||||
"sources": section_sources,
|
||||
}
|
||||
# Generate transition and compute intelligent flow metrics
|
||||
previous_text = prev_summary
|
||||
current_text = result.get("content", "")
|
||||
transition = self.transitioner.generate_transition(previous_text, getattr(section, 'heading', 'This section'), use_llm=True)
|
||||
metrics = self.flow.assess_flow(previous_text, current_text, use_llm=True)
|
||||
|
||||
# Update memory for subsequent sections and store continuity snapshot
|
||||
if current_text:
|
||||
self.memory.update_with_section(getattr(section, 'id', 'unknown'), current_text, use_llm=True)
|
||||
|
||||
# Return enriched result
|
||||
result["transition"] = transition
|
||||
result["continuity_metrics"] = metrics
|
||||
# Persist a lightweight continuity snapshot for API access
|
||||
try:
|
||||
sid = getattr(section, 'id', 'unknown')
|
||||
if not hasattr(self, "_last_continuity"):
|
||||
@@ -72,22 +66,188 @@ class EnhancedContentGenerator:
|
||||
pass
|
||||
return result
|
||||
|
||||
def _build_prompt(self, section: Any, research: Any, prev_summary: str, urls: list) -> str:
|
||||
def _build_research_context(self, section: Any) -> tuple:
|
||||
"""Build a rich research context block from the section's mapped sources.
|
||||
|
||||
Returns (context_string, sources_list) where context_string is the
|
||||
formatted research context for the prompt, and sources_list contains
|
||||
{title, url} dicts for downstream use.
|
||||
|
||||
When section.references is empty, returns ("", []) — the caller should
|
||||
handle this as a research gap and avoid generating unsupported claims.
|
||||
"""
|
||||
references = getattr(section, 'references', []) or []
|
||||
if not references:
|
||||
return ("", [])
|
||||
|
||||
context_parts = []
|
||||
sources_out = []
|
||||
for i, ref in enumerate(references, 1):
|
||||
if isinstance(ref, dict):
|
||||
title = ref.get('title', '')
|
||||
excerpt = ref.get('excerpt', '')
|
||||
highlights = ref.get('highlights', []) or []
|
||||
summary = ref.get('summary', '')
|
||||
url = ref.get('url', '')
|
||||
content = ref.get('content', '') or ''
|
||||
author = ref.get('author', '') or ''
|
||||
source_type = ref.get('source_type', '') or ''
|
||||
credibility_score = ref.get('credibility_score')
|
||||
published_at = ref.get('published_at', '') or ''
|
||||
else:
|
||||
title = getattr(ref, 'title', '')
|
||||
excerpt = getattr(ref, 'excerpt', '')
|
||||
highlights = getattr(ref, 'highlights', []) or []
|
||||
summary = getattr(ref, 'summary', '')
|
||||
url = getattr(ref, 'url', '')
|
||||
content = getattr(ref, 'content', '') or ''
|
||||
author = getattr(ref, 'author', '') or ''
|
||||
source_type = getattr(ref, 'source_type', '') or ''
|
||||
credibility_score = getattr(ref, 'credibility_score', None)
|
||||
published_at = getattr(ref, 'published_at', '') or ''
|
||||
|
||||
sources_out.append({"title": title, "url": url})
|
||||
|
||||
attribution_parts = []
|
||||
if author:
|
||||
attribution_parts.append(f"by {author}")
|
||||
if source_type:
|
||||
attribution_parts.append(f"[{source_type}]")
|
||||
attribution = " ".join(attribution_parts)
|
||||
credibility_tag = ""
|
||||
if credibility_score is not None:
|
||||
try:
|
||||
score = float(credibility_score)
|
||||
if score >= 0.9:
|
||||
credibility_tag = " (high-credibility)"
|
||||
elif score >= 0.75:
|
||||
credibility_tag = " (credible)"
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
recency_tag = ""
|
||||
if published_at:
|
||||
recency_tag = f" (published {published_at[:10]})" if len(published_at) >= 10 else f" (published {published_at})"
|
||||
|
||||
header = f"Source {i}: {title}"
|
||||
if attribution:
|
||||
header += f" {attribution}"
|
||||
header += f"{credibility_tag}{recency_tag}"
|
||||
part = header + "\n"
|
||||
if summary:
|
||||
part += f" Summary: {summary[:1000]}\n"
|
||||
if excerpt:
|
||||
part += f" Key excerpt: {excerpt[:1000]}\n"
|
||||
if content and not summary and not excerpt:
|
||||
part += f" Content: {content[:800]}\n"
|
||||
if highlights:
|
||||
part += " Key findings:\n"
|
||||
for h in highlights[:3]:
|
||||
h_text = h[:500] if h else ''
|
||||
if h_text:
|
||||
part += f" - {h_text}\n"
|
||||
|
||||
context_parts.append(part)
|
||||
|
||||
return ("\n".join(context_parts), sources_out)
|
||||
|
||||
def _build_global_research_context(self, research: Any, competitive_advantage: str = "") -> str:
|
||||
"""Build global research context from the full BlogResearchResponse object.
|
||||
|
||||
Extracts keyword_analysis, competitor_analysis, search_queries,
|
||||
and competitive_advantage into a compact context block that provides
|
||||
the LLM with strategic direction beyond per-section sources.
|
||||
"""
|
||||
if research is None:
|
||||
return ""
|
||||
parts = []
|
||||
|
||||
ka = getattr(research, 'keyword_analysis', None) or {}
|
||||
if ka:
|
||||
primary = ka.get('primary', [])
|
||||
secondary = ka.get('secondary', [])
|
||||
search_intent = ka.get('search_intent', '')
|
||||
kw_lines = []
|
||||
if primary:
|
||||
kw_lines.append(f"Primary keywords: {', '.join(primary[:10])}")
|
||||
if secondary:
|
||||
kw_lines.append(f"Secondary keywords: {', '.join(secondary[:10])}")
|
||||
if search_intent:
|
||||
kw_lines.append(f"Search intent: {search_intent}")
|
||||
if kw_lines:
|
||||
parts.append("=== KEYWORD & SEARCH STRATEGY ===\n" + "\n".join(kw_lines))
|
||||
|
||||
ca = getattr(research, 'competitor_analysis', None) or {}
|
||||
if ca:
|
||||
ca_lines = []
|
||||
content_gaps = ca.get('content_gaps', [])
|
||||
if content_gaps:
|
||||
ca_lines.append(f"Content gaps (address these): {', '.join(content_gaps[:5])}")
|
||||
industry_leaders = ca.get('industry_leaders', [])
|
||||
if industry_leaders:
|
||||
ca_lines.append(f"Industry leaders: {', '.join(industry_leaders[:5])}")
|
||||
opportunities = ca.get('opportunities', [])
|
||||
if opportunities:
|
||||
ca_lines.append(f"Opportunities: {', '.join(opportunities[:5])}")
|
||||
if ca_lines:
|
||||
parts.append("=== COMPETITIVE LANDSCAPE ===\n" + "\n".join(ca_lines))
|
||||
|
||||
sq = getattr(research, 'search_queries', None) or []
|
||||
if sq:
|
||||
parts.append(f"=== SEARCH INTENT SIGNALS ===\nOriginal search queries: {', '.join(sq[:8])}")
|
||||
|
||||
if competitive_advantage:
|
||||
parts.append(f"=== COMPETITIVE ADVANTAGE ===\nEmphasize this differentiator: {competitive_advantage}")
|
||||
|
||||
return "\n\n".join(parts) if parts else ""
|
||||
|
||||
def _build_prompt(self, section: Any, prev_summary: str, research_context: str, urls: list, global_research_context: str = "") -> str:
|
||||
heading = getattr(section, 'heading', 'Section')
|
||||
key_points = getattr(section, 'key_points', [])
|
||||
keywords = getattr(section, 'keywords', [])
|
||||
subheadings = getattr(section, 'subheadings', []) or []
|
||||
target_words = getattr(section, 'target_words', 300)
|
||||
url_block = "\n".join([f"- {u.get('title','')} ({u.get('url','')})" for u in urls]) if urls else "(no specific URLs provided)"
|
||||
|
||||
return (
|
||||
prompt = (
|
||||
f"You are writing the blog section '{heading}'.\n\n"
|
||||
f"Context summary (previous sections): {prev_summary}\n\n"
|
||||
f"Authoring requirements:\n"
|
||||
f"- Target word count: ~{target_words}\n"
|
||||
f"- Use the following key points: {', '.join(key_points)}\n"
|
||||
f"- Include these keywords naturally: {', '.join(keywords)}\n"
|
||||
f"- Cite insights from these sources when relevant (do not output raw URLs):\n{url_block}\n\n"
|
||||
"Write engaging, well-structured markdown with clear paragraphs (2-4 sentences each) separated by double line breaks."
|
||||
)
|
||||
|
||||
if subheadings:
|
||||
prompt += f"- Cover these subtopics: {', '.join(subheadings)}\n"
|
||||
|
||||
if global_research_context:
|
||||
prompt += f"\n{global_research_context}\n\n"
|
||||
|
||||
if research_context:
|
||||
prompt += (
|
||||
f"\nResearch sources for this section (use these facts, statistics, "
|
||||
f"and insights to support your writing):\n{research_context}\n\n"
|
||||
"IMPORTANT: Base your writing on the research sources above. "
|
||||
"Use specific facts, statistics, and data from these sources. "
|
||||
"Do not invent numbers, statistics, or claims not supported by the research.\n"
|
||||
)
|
||||
elif urls:
|
||||
import logging
|
||||
logging.getLogger('content_generator').warning(
|
||||
f"No research context for section '{heading}' — falling back to bare URLs"
|
||||
)
|
||||
url_lines = []
|
||||
for u in urls:
|
||||
if isinstance(u, dict):
|
||||
url_lines.append(f"- {u.get('title','')} ({u.get('url','')})")
|
||||
else:
|
||||
url_lines.append(f"- {u}")
|
||||
prompt += f"\nReference URLs (consult for additional context):\n" + "\n".join(url_lines) + "\n"
|
||||
|
||||
prompt += (
|
||||
"\nWrite engaging, well-structured markdown with clear paragraphs "
|
||||
"(2-4 sentences each) separated by double line breaks."
|
||||
)
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
|
||||
@@ -7,10 +7,9 @@ Uses Gemini API for intelligent analysis while minimizing API calls through cach
|
||||
from typing import Dict, Optional
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
# Import the common gemini provider
|
||||
from services.llm_providers.gemini_provider import gemini_structured_json_response
|
||||
# Provider-agnostic LLM dispatcher (respects GPT_PROVIDER env var)
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
class FlowAnalyzer:
|
||||
@@ -21,7 +20,7 @@ class FlowAnalyzer:
|
||||
self._rule_cache: Dict[str, Dict[str, float]] = {}
|
||||
logger.info("✅ FlowAnalyzer initialized with LLM-based analysis")
|
||||
|
||||
def assess_flow(self, previous_text: str, current_text: str, use_llm: bool = True) -> Dict[str, float]:
|
||||
def assess_flow(self, previous_text: str, current_text: str, use_llm: bool = True, user_id: str = None) -> Dict[str, float]:
|
||||
"""
|
||||
Return flow metrics in range 0..1.
|
||||
|
||||
@@ -29,6 +28,7 @@ class FlowAnalyzer:
|
||||
previous_text: Previous section content
|
||||
current_text: Current section content
|
||||
use_llm: Whether to use LLM analysis (default: True for significant content)
|
||||
user_id: Clerk user ID for subscription checking
|
||||
"""
|
||||
if not current_text:
|
||||
return {"flow": 0.0, "consistency": 0.0, "progression": 0.0}
|
||||
@@ -46,7 +46,7 @@ class FlowAnalyzer:
|
||||
|
||||
if should_use_llm:
|
||||
try:
|
||||
metrics = self._llm_flow_analysis(previous_text, current_text)
|
||||
metrics = self._llm_flow_analysis(previous_text, current_text, user_id=user_id)
|
||||
self._cache[cache_key] = metrics
|
||||
logger.info("LLM-based flow analysis completed")
|
||||
return metrics
|
||||
@@ -71,8 +71,8 @@ class FlowAnalyzer:
|
||||
# Use LLM if: substantial content (>100 words) OR has meaningful previous context
|
||||
return word_count > 100 or has_previous
|
||||
|
||||
def _llm_flow_analysis(self, previous_text: str, current_text: str) -> Dict[str, float]:
|
||||
"""Use Gemini API for intelligent flow analysis."""
|
||||
def _llm_flow_analysis(self, previous_text: str, current_text: str, user_id: str = None) -> Dict[str, float]:
|
||||
"""Use LLM for intelligent flow analysis (provider-agnostic)."""
|
||||
|
||||
# Truncate content to minimize tokens while keeping context
|
||||
prev_truncated = (previous_text[-300:] if previous_text else "") if previous_text else ""
|
||||
@@ -103,22 +103,20 @@ Return ONLY a JSON object with these exact keys: flow, consistency, progression
|
||||
}
|
||||
|
||||
try:
|
||||
result = gemini_structured_json_response(
|
||||
result = llm_text_gen(
|
||||
prompt=prompt,
|
||||
schema=schema,
|
||||
temperature=0.2, # Low temperature for consistent scoring
|
||||
max_tokens=1000 # Increased tokens for better analysis
|
||||
json_struct=schema,
|
||||
system_prompt=None,
|
||||
user_id=user_id,
|
||||
temperature=0.2,
|
||||
max_tokens=1000
|
||||
)
|
||||
|
||||
if result.parsed:
|
||||
return {
|
||||
"flow": float(result.parsed.get("flow", 0.6)),
|
||||
"consistency": float(result.parsed.get("consistency", 0.6)),
|
||||
"progression": float(result.parsed.get("progression", 0.6))
|
||||
"flow": float(result.get("flow", 0.6)),
|
||||
"consistency": float(result.get("consistency", 0.6)),
|
||||
"progression": float(result.get("progression", 0.6))
|
||||
}
|
||||
else:
|
||||
logger.warning("LLM response parsing failed, using fallback")
|
||||
return self._rule_based_analysis(previous_text, current_text)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LLM flow analysis error: {e}")
|
||||
|
||||
@@ -28,18 +28,17 @@ class IntroductionGenerator:
|
||||
) -> str:
|
||||
"""Build a prompt for generating blog introductions."""
|
||||
|
||||
# Extract key research insights
|
||||
keyword_analysis = research.keyword_analysis or {}
|
||||
content_angles = research.suggested_angles or []
|
||||
competitor_analysis = research.competitor_analysis or {}
|
||||
search_queries = research.search_queries or []
|
||||
|
||||
# Get a summary of the first few sections for context
|
||||
section_summaries = []
|
||||
for i, section in enumerate(outline[:3], 1):
|
||||
section_id = section.id
|
||||
content = sections_content.get(section_id, '')
|
||||
if content:
|
||||
# Take first 200 chars as summary
|
||||
summary = content[:200] + '...' if len(content) > 200 else content
|
||||
summary = content[:300] + '...' if len(content) > 300 else content
|
||||
section_summaries.append(f"{i}. {section.heading}: {summary}")
|
||||
|
||||
sections_text = '\n'.join(section_summaries) if section_summaries else "Content sections are being generated."
|
||||
@@ -47,13 +46,56 @@ class IntroductionGenerator:
|
||||
primary_kw_text = ', '.join(primary_keywords) if primary_keywords else "the topic"
|
||||
content_angle_text = ', '.join(content_angles[:3]) if content_angles else "General insights"
|
||||
|
||||
return f"""Generate exactly 3 varied blog introductions for the following blog post.
|
||||
# Build keyword strategy block from actual keyword_analysis
|
||||
keyword_block = ""
|
||||
all_keywords = []
|
||||
if keyword_analysis:
|
||||
primary_kw = keyword_analysis.get('primary', [])
|
||||
secondary_kw = keyword_analysis.get('secondary', [])
|
||||
if primary_kw:
|
||||
all_keywords.extend(primary_kw[:5])
|
||||
if secondary_kw:
|
||||
all_keywords.extend(secondary_kw[:5])
|
||||
si = keyword_analysis.get('search_intent', '')
|
||||
if si:
|
||||
keyword_block += f"\nSearch intent: {si}"
|
||||
if all_keywords:
|
||||
keyword_block = f"Target keywords: {', '.join(all_keywords)}" + keyword_block
|
||||
|
||||
# Build competitive landscape block
|
||||
competitive_block = ""
|
||||
if competitor_analysis:
|
||||
gaps = competitor_analysis.get('content_gaps', [])
|
||||
leaders = competitor_analysis.get('industry_leaders', [])
|
||||
opportunities = competitor_analysis.get('opportunities', [])
|
||||
advantages = competitor_analysis.get('competitive_advantages', [])
|
||||
comp_lines = []
|
||||
if advantages:
|
||||
comp_lines.append(f"Key differentiators: {', '.join(advantages[:3])}")
|
||||
if gaps:
|
||||
comp_lines.append(f"Content gaps to address: {', '.join(gaps[:3])}")
|
||||
if leaders:
|
||||
comp_lines.append(f"Industry leaders: {', '.join(leaders[:3])}")
|
||||
if opportunities:
|
||||
comp_lines.append(f"Opportunities: {', '.join(opportunities[:3])}")
|
||||
if comp_lines:
|
||||
competitive_block = "\n".join(comp_lines)
|
||||
|
||||
# Build search intent context
|
||||
search_block = ""
|
||||
if search_queries:
|
||||
search_block = f"Original search queries: {', '.join(search_queries[:5])}"
|
||||
|
||||
prompt = f"""Generate exactly 3 varied blog introductions for the following blog post.
|
||||
|
||||
BLOG TITLE: {blog_title}
|
||||
|
||||
PRIMARY KEYWORDS: {primary_kw_text}
|
||||
SEARCH INTENT: {search_intent}
|
||||
CONTENT ANGLES: {content_angle_text}
|
||||
{keyword_block}
|
||||
{f"COMPETITIVE LANDSCAPE:\n{competitive_block}" if competitive_block else ""}
|
||||
{f"SEARCH CONTEXT:\n{search_block}" if search_block else ""}
|
||||
|
||||
BLOG CONTENT SUMMARY:
|
||||
{sections_text}
|
||||
@@ -69,6 +111,7 @@ REQUIREMENTS FOR EACH INTRODUCTION:
|
||||
3. Third: Story/statistic-focused (use a compelling fact or narrative hook)
|
||||
- Maintain a professional yet engaging tone
|
||||
- Avoid generic phrases - be specific and benefit-driven
|
||||
- Where possible, incorporate specific insights from the competitive landscape and search intent above
|
||||
|
||||
Return ONLY a JSON array of exactly 3 introductions:
|
||||
[
|
||||
@@ -76,6 +119,7 @@ Return ONLY a JSON array of exactly 3 introductions:
|
||||
"Second introduction (80-120 words, benefit-focused)",
|
||||
"Third introduction (80-120 words, story/statistic-focused)"
|
||||
]"""
|
||||
return prompt
|
||||
|
||||
def get_introduction_schema(self) -> Dict[str, Any]:
|
||||
"""Get the JSON schema for introduction generation."""
|
||||
|
||||
@@ -129,9 +129,9 @@ class BlogWriterService:
|
||||
"""Enhance a section using AI."""
|
||||
return await self.outline_service.enhance_section_with_ai(section, focus)
|
||||
|
||||
async def optimize_outline_with_ai(self, outline: List[BlogOutlineSection], focus: str = "general optimization") -> List[BlogOutlineSection]:
|
||||
async def optimize_outline_with_ai(self, outline: List[BlogOutlineSection], focus: str = "general optimization", research_context: str = "") -> List[BlogOutlineSection]:
|
||||
"""Optimize entire outline for better flow and SEO."""
|
||||
return await self.outline_service.optimize_outline_with_ai(outline, focus)
|
||||
return await self.outline_service.optimize_outline_with_ai(outline, focus, research_context=research_context)
|
||||
|
||||
def rebalance_word_counts(self, outline: List[BlogOutlineSection], target_words: int) -> List[BlogOutlineSection]:
|
||||
"""Rebalance word count distribution across sections."""
|
||||
@@ -140,14 +140,15 @@ class BlogWriterService:
|
||||
# Content Generation Methods
|
||||
async def generate_section(self, request: BlogSectionRequest, user_id: str = None) -> BlogSectionResponse:
|
||||
"""Generate section content from outline."""
|
||||
# Compose research-lite object with minimal continuity summary if available
|
||||
research_ctx: Any = getattr(request, 'research', None)
|
||||
research_ctx = request.research
|
||||
competitive_advantage = request.competitive_advantage
|
||||
try:
|
||||
ai_result = await self.content_generator.generate_section(
|
||||
section=request.section,
|
||||
research=research_ctx,
|
||||
mode=(request.mode or "polished"),
|
||||
user_id=user_id
|
||||
user_id=user_id,
|
||||
competitive_advantage=competitive_advantage,
|
||||
)
|
||||
markdown = ai_result.get('content') or ai_result.get('markdown') or ''
|
||||
citations = []
|
||||
@@ -339,8 +340,19 @@ class BlogWriterService:
|
||||
)
|
||||
|
||||
async def publish(self, request: BlogPublishRequest) -> BlogPublishResponse:
|
||||
"""Publish content to specified platform."""
|
||||
# TODO: Move to content module
|
||||
"""Publish content to specified platform.
|
||||
|
||||
NOTE: This endpoint is a STUB / placeholder. The actual publish flow
|
||||
bypasses this method entirely — the frontend calls platform-specific
|
||||
endpoints directly:
|
||||
- Wix: POST /api/wix/publish (wix_routes.py)
|
||||
- WordPress: POST /api/wordpress/publish (routers/wordpress.py)
|
||||
|
||||
TODO: Either remove this stub or wire it as a unified dispatcher that
|
||||
routes to the correct platform service. Keep alive until the new
|
||||
unified publish flow (pre-publish checklist + schedule + history) is
|
||||
built and this becomes the single entry point for all publishing.
|
||||
"""
|
||||
return BlogPublishResponse(success=True, platform=request.platform, url="https://example.com/post")
|
||||
|
||||
async def generate_medium_blog_with_progress(self, req: MediumBlogGenerateRequest, task_id: str, user_id: str, db: Session = None) -> MediumBlogGenerateResult:
|
||||
@@ -359,9 +371,11 @@ class BlogWriterService:
|
||||
async def analyze_flow_basic(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Analyze flow metrics for entire blog using single AI call (cost-effective)."""
|
||||
try:
|
||||
import asyncio
|
||||
# Extract blog content from request
|
||||
sections = request.get("sections", [])
|
||||
title = request.get("title", "Untitled Blog")
|
||||
user_id = request.get("user_id")
|
||||
|
||||
if not sections:
|
||||
return {"error": "No sections provided for analysis"}
|
||||
@@ -397,8 +411,7 @@ class BlogWriterService:
|
||||
Provide detailed analysis with specific, actionable suggestions for improvement.
|
||||
"""
|
||||
|
||||
# Use Gemini for structured analysis
|
||||
from services.llm_providers.gemini_provider import gemini_structured_json_response
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
@@ -440,12 +453,17 @@ class BlogWriterService:
|
||||
"required": ["overall_flow_score", "overall_consistency_score", "overall_progression_score", "overall_coherence_score", "sections", "overall_suggestions"]
|
||||
}
|
||||
|
||||
result = gemini_structured_json_response(
|
||||
prompt=analysis_prompt,
|
||||
schema=schema,
|
||||
temperature=0.3,
|
||||
max_tokens=4096,
|
||||
system_prompt=system_prompt
|
||||
result = await asyncio.to_thread(
|
||||
llm_text_gen,
|
||||
analysis_prompt,
|
||||
system_prompt,
|
||||
schema,
|
||||
user_id,
|
||||
None, # preferred_hf_models
|
||||
None, # preferred_provider
|
||||
None, # flow_type
|
||||
4096, # max_tokens
|
||||
0.3 # temperature
|
||||
)
|
||||
|
||||
if result and not result.get("error"):
|
||||
@@ -466,6 +484,7 @@ class BlogWriterService:
|
||||
# Use the existing enhanced content generator for detailed analysis
|
||||
sections = request.get("sections", [])
|
||||
title = request.get("title", "Untitled Blog")
|
||||
user_id = request.get("user_id")
|
||||
|
||||
if not sections:
|
||||
return {"error": "No sections provided for analysis"}
|
||||
@@ -485,7 +504,8 @@ class BlogWriterService:
|
||||
flow_metrics = self.content_generator.flow.assess_flow(
|
||||
prev_section_content,
|
||||
section_content,
|
||||
use_llm=True
|
||||
use_llm=True,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
results.append({
|
||||
|
||||
@@ -40,8 +40,10 @@ class GroundingContextEngine:
|
||||
}
|
||||
|
||||
# Temporal relevance patterns
|
||||
cy = str(datetime.now().year)
|
||||
ny = str(datetime.now().year + 1)
|
||||
self.temporal_patterns = {
|
||||
'recent': ['2024', '2025', 'latest', 'new', 'recent', 'current', 'updated'],
|
||||
'recent': [cy, ny, 'latest', 'new', 'recent', 'current', 'updated'],
|
||||
'trending': ['trend', 'emerging', 'growing', 'increasing', 'rising'],
|
||||
'evergreen': ['fundamental', 'basic', 'principles', 'foundation', 'core']
|
||||
}
|
||||
@@ -239,9 +241,23 @@ class GroundingContextEngine:
|
||||
else:
|
||||
authority_distribution['low'] += 1
|
||||
|
||||
# Extract actual high-authority sources from chunks
|
||||
high_authority_sources = []
|
||||
for chunk in grounding_metadata.grounding_chunks:
|
||||
chunk_authority = self._calculate_chunk_authority(chunk)
|
||||
if chunk_authority >= 0.8:
|
||||
high_authority_sources.append({
|
||||
'title': chunk.title if chunk.title else 'Unknown Source',
|
||||
'url': chunk.url if chunk.url else '',
|
||||
'score': round(chunk_authority, 3)
|
||||
})
|
||||
# Sort by authority score descending, keep top 5
|
||||
high_authority_sources.sort(key=lambda x: x['score'], reverse=True)
|
||||
high_authority_sources = high_authority_sources[:5]
|
||||
|
||||
return {
|
||||
'average_authority_score': sum(authority_scores) / len(authority_scores) if authority_scores else 0.0,
|
||||
'high_authority_sources': [{'title': 'High Authority Source', 'url': 'example.com', 'score': 0.9}], # Placeholder
|
||||
'high_authority_sources': high_authority_sources,
|
||||
'authority_distribution': dict(authority_distribution)
|
||||
}
|
||||
|
||||
|
||||
@@ -137,6 +137,15 @@ class KeywordCurator:
|
||||
lines.append(f"### Competitive advantage signal (must weave into narrative): {content_gap[0]}")
|
||||
lines.append(" → This is your primary differentiation hook. Surface it prominently in the unique value section.")
|
||||
|
||||
lines.append("")
|
||||
lines.append("### SUGGESTED SECTION → KEYWORD MAPPING")
|
||||
lines.append("Map each outline section's keyword focus according to its narrative role:")
|
||||
lines.append("- Hook / Introduction → lead with primary and trending keywords for timeliness & relevance")
|
||||
lines.append("- Problem / Pain Point → anchor on secondary and long-tail keywords (informational intent)")
|
||||
lines.append("- Solution / How-To → weave in primary and secondary keywords for solution-oriented search")
|
||||
lines.append("- Comparison / Analysis → embed semantic keywords to prevent topical drift into tangents")
|
||||
lines.append("- Case Studies / Evidence → surface content gap keywords as differentiation proof points")
|
||||
lines.append("- Future / Trends → leverage trending and content gap keywords for forward-looking authority")
|
||||
lines.append("")
|
||||
lines.append("GUIDELINE: Treat these as the primary keyword anchors. You may include closely related")
|
||||
lines.append("intent-matching variations where natural, but avoid inserting every raw research keyword.")
|
||||
@@ -176,7 +185,11 @@ class KeywordCurator:
|
||||
slot_key: Optional[str] = None,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Pick up to N items from a keyword list.
|
||||
Pick up to N items from a keyword list with diversity sampling.
|
||||
|
||||
When the raw list is significantly larger than the limit, selects
|
||||
evenly-spaced entries to capture semantic diversity rather than
|
||||
just the first N entries.
|
||||
|
||||
Args:
|
||||
data: The raw keyword_analysis dict.
|
||||
@@ -184,11 +197,24 @@ class KeywordCurator:
|
||||
slot_key: The internal slot name for looking up the limit.
|
||||
Falls back to source_key if not provided.
|
||||
Returns:
|
||||
Sliced list of at most N strings.
|
||||
List of at most N strings with diversity sampling.
|
||||
"""
|
||||
limit_key = slot_key or source_key
|
||||
limit = self.SLOTS.get(limit_key, 5)
|
||||
raw: Any = data.get(source_key, [])
|
||||
if not isinstance(raw, list):
|
||||
return []
|
||||
if len(raw) <= limit:
|
||||
return raw
|
||||
if len(raw) <= limit * 2:
|
||||
return raw[:limit]
|
||||
indices = set()
|
||||
if limit >= 2:
|
||||
indices.add(0)
|
||||
indices.add(len(raw) - 1)
|
||||
step = (len(raw) - 1) / max(limit - 1, 1)
|
||||
for i in range(1, limit - 1):
|
||||
indices.add(int(round(i * step)))
|
||||
else:
|
||||
indices.add(0)
|
||||
return [raw[i] for i in sorted(indices) if i < len(raw)][:limit]
|
||||
|
||||
@@ -52,6 +52,44 @@ class OutlineGenerator:
|
||||
raw_analysis = research.keyword_analysis if research else {}
|
||||
return self.keyword_curator.curate(raw_analysis)
|
||||
|
||||
def _build_optimization_context(self, research) -> str:
|
||||
"""Build a compact research context for the outline optimizer.
|
||||
Provides keywords, competitor data, and top source summaries so
|
||||
the optimizer doesn't run blind to the research."""
|
||||
if not research:
|
||||
return ""
|
||||
parts = []
|
||||
kw = research.keyword_analysis if research.keyword_analysis else {}
|
||||
primary = kw.get('primary', [])
|
||||
if primary:
|
||||
parts.append(f"Primary keywords: {', '.join(primary[:5])}")
|
||||
search_intent = kw.get('search_intent', '')
|
||||
if search_intent:
|
||||
parts.append(f"Search intent: {search_intent}")
|
||||
comp = research.competitor_analysis if research.competitor_analysis else {}
|
||||
top_competitors = comp.get('top_competitors', [])
|
||||
if top_competitors:
|
||||
parts.append(f"Top competitors: {', '.join(str(c) for c in top_competitors[:5])}")
|
||||
content_gaps = kw.get('content_gaps', [])
|
||||
if content_gaps:
|
||||
parts.append(f"Content gaps: {'; '.join(str(g) for g in content_gaps[:5])}")
|
||||
opportunities = comp.get('opportunities', [])
|
||||
if opportunities:
|
||||
parts.append(f"Opportunities: {'; '.join(str(o) for o in opportunities[:5])}")
|
||||
sources = research.sources if research.sources else []
|
||||
if sources:
|
||||
top_sources = sorted(sources, key=lambda s: s.credibility_score or 0.8, reverse=True)[:5]
|
||||
source_lines = []
|
||||
for s in top_sources:
|
||||
line = f"- {s.title}"
|
||||
if s.summary:
|
||||
line += f": {s.summary[:150]}"
|
||||
elif s.excerpt:
|
||||
line += f": {s.excerpt[:150]}"
|
||||
source_lines.append(line)
|
||||
parts.append("Key research sources:\n" + "\n".join(source_lines))
|
||||
return "\n".join(parts)
|
||||
|
||||
async def generate(self, request: BlogOutlineRequest, user_id: str) -> BlogOutlineResponse:
|
||||
"""
|
||||
Generate AI-powered outline using research results.
|
||||
@@ -102,7 +140,7 @@ class OutlineGenerator:
|
||||
|
||||
# Run parallel processing for speed optimization (user_id required)
|
||||
mapped_sections, grounding_insights = await self.parallel_processor.run_parallel_processing_async(
|
||||
outline_sections, research, user_id
|
||||
outline_sections, research, user_id, competitive_advantage=selected_competitive_advantage or ""
|
||||
)
|
||||
|
||||
# Enhance sections with grounding insights
|
||||
@@ -113,7 +151,8 @@ class OutlineGenerator:
|
||||
|
||||
# Optimize outline for better flow, SEO, and engagement (user_id required)
|
||||
logger.info("Optimizing outline for better flow and engagement...")
|
||||
optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id)
|
||||
optimization_context = self._build_optimization_context(research)
|
||||
optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id, research_context=optimization_context)
|
||||
|
||||
# Rebalance word counts for optimal distribution
|
||||
target_words = request.word_count or 1500
|
||||
@@ -124,7 +163,8 @@ class OutlineGenerator:
|
||||
content_angle_titles = self.title_generator.extract_content_angle_titles(research)
|
||||
|
||||
# Combine AI-generated titles with content angles (full primary keywords for title variety)
|
||||
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
|
||||
research_topic = getattr(request, 'topic', '') or ''
|
||||
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords, research_topic)
|
||||
|
||||
logger.info(f"Generated optimized outline with {len(balanced_sections)} sections and {len(title_options)} title options")
|
||||
|
||||
@@ -201,7 +241,7 @@ class OutlineGenerator:
|
||||
|
||||
# Run parallel processing for speed optimization (user_id required for subscription checks)
|
||||
mapped_sections, grounding_insights = await self.parallel_processor.run_parallel_processing(
|
||||
outline_sections, research, user_id, task_id
|
||||
outline_sections, research, user_id, task_id, competitive_advantage=selected_competitive_advantage or ""
|
||||
)
|
||||
|
||||
# Enhance sections with grounding insights (depends on both previous tasks)
|
||||
@@ -212,7 +252,8 @@ class OutlineGenerator:
|
||||
|
||||
# Optimize outline for better flow, SEO, and engagement (user_id required for subscription checks)
|
||||
await task_manager.update_progress(task_id, "🎯 Optimizing outline for better flow and engagement...")
|
||||
optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id)
|
||||
optimization_context = self._build_optimization_context(research)
|
||||
optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id, research_context=optimization_context)
|
||||
|
||||
# Rebalance word counts for optimal distribution
|
||||
await task_manager.update_progress(task_id, "⚖️ Rebalancing word count distribution...")
|
||||
@@ -224,7 +265,8 @@ class OutlineGenerator:
|
||||
content_angle_titles = self.title_generator.extract_content_angle_titles(research)
|
||||
|
||||
# Combine AI-generated titles with content angles (full primary keywords for title variety)
|
||||
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
|
||||
research_topic = getattr(request, 'topic', '') or ''
|
||||
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords, research_topic)
|
||||
|
||||
await task_manager.update_progress(task_id, "✅ Outline generation and optimization completed successfully!")
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ Outline Optimizer - AI-powered outline optimization and rebalancing.
|
||||
Optimizes outlines for better flow, SEO, and engagement.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from typing import List, Dict, Any, Optional
|
||||
from loguru import logger
|
||||
|
||||
from models.blog_models import BlogOutlineSection
|
||||
@@ -13,13 +13,14 @@ from models.blog_models import BlogOutlineSection
|
||||
class OutlineOptimizer:
|
||||
"""Optimizes outlines for better flow, SEO, and engagement."""
|
||||
|
||||
async def optimize(self, outline: List[BlogOutlineSection], focus: str, user_id: str) -> List[BlogOutlineSection]:
|
||||
async def optimize(self, outline: List[BlogOutlineSection], focus: str, user_id: str, research_context: str = "") -> List[BlogOutlineSection]:
|
||||
"""Optimize entire outline for better flow, SEO, and engagement.
|
||||
|
||||
Args:
|
||||
outline: List of outline sections to optimize
|
||||
focus: Optimization focus (e.g., "general optimization")
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
research_context: Optional research context to ground optimization
|
||||
|
||||
Returns:
|
||||
List of optimized outline sections
|
||||
@@ -40,19 +41,28 @@ Current Outline:
|
||||
Optimization Focus: {focus}
|
||||
|
||||
Goals: Improve narrative flow, enhance SEO, increase engagement, ensure comprehensive coverage.
|
||||
"""
|
||||
if research_context:
|
||||
optimization_prompt += f"""
|
||||
Research Context (use this to ground your optimization in real data):
|
||||
{research_context}
|
||||
|
||||
Ensure the optimized outline reflects the research insights above — headings should address the key topics, keywords should align with search intent, and sections should cover the most important angles from the research.
|
||||
"""
|
||||
|
||||
optimization_prompt += """
|
||||
Return JSON format:
|
||||
{{
|
||||
{
|
||||
"outline": [
|
||||
{{
|
||||
{
|
||||
"heading": "Optimized heading",
|
||||
"subheadings": ["subheading 1", "subheading 2"],
|
||||
"key_points": ["point 1", "point 2"],
|
||||
"target_words": 300,
|
||||
"keywords": ["keyword1", "keyword2"]
|
||||
}}
|
||||
}
|
||||
]
|
||||
}}"""
|
||||
}"""
|
||||
|
||||
try:
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
@@ -112,26 +122,34 @@ Return JSON format:
|
||||
return outline
|
||||
|
||||
def rebalance_word_counts(self, outline: List[BlogOutlineSection], target_words: int) -> List[BlogOutlineSection]:
|
||||
"""Rebalance word count distribution across sections."""
|
||||
"""Rebalance word count distribution across sections, weighting by source count."""
|
||||
total_sections = len(outline)
|
||||
if total_sections == 0:
|
||||
return outline
|
||||
|
||||
# Calculate target distribution
|
||||
intro_words = int(target_words * 0.12) # 12% for intro
|
||||
conclusion_words = int(target_words * 0.12) # 12% for conclusion
|
||||
intro_words = int(target_words * 0.12)
|
||||
conclusion_words = int(target_words * 0.12)
|
||||
main_content_words = target_words - intro_words - conclusion_words
|
||||
|
||||
# Distribute main content words across sections
|
||||
words_per_section = main_content_words // total_sections
|
||||
remainder = main_content_words % total_sections
|
||||
# Weight sections by research density (sections with more sources get more words)
|
||||
main_sections = outline[1:-1] if total_sections > 2 else outline
|
||||
source_weights = []
|
||||
for section in main_sections:
|
||||
ref_count = len(getattr(section, 'references', []) or [])
|
||||
source_weights.append(1.0 + ref_count * 0.5)
|
||||
|
||||
total_weight = sum(source_weights) if source_weights else len(main_sections)
|
||||
|
||||
for i, section in enumerate(outline):
|
||||
if i == 0: # First section (intro)
|
||||
if i == 0 and total_sections > 2:
|
||||
section.target_words = intro_words
|
||||
elif i == total_sections - 1: # Last section (conclusion)
|
||||
elif i == total_sections - 1 and total_sections > 2:
|
||||
section.target_words = conclusion_words
|
||||
else: # Main content sections
|
||||
section.target_words = words_per_section + (1 if i < remainder else 0)
|
||||
else:
|
||||
main_idx = i - 1 if total_sections > 2 else i
|
||||
if main_idx < len(source_weights):
|
||||
section.target_words = int(main_content_words * source_weights[main_idx] / total_weight)
|
||||
else:
|
||||
section.target_words = main_content_words // max(len(main_sections), 1)
|
||||
|
||||
return outline
|
||||
|
||||
@@ -233,9 +233,9 @@ class OutlineService:
|
||||
"""Enhance a section using AI with research context."""
|
||||
return await self.section_enhancer.enhance(section, focus)
|
||||
|
||||
async def optimize_outline_with_ai(self, outline: List[BlogOutlineSection], focus: str = "general optimization") -> List[BlogOutlineSection]:
|
||||
async def optimize_outline_with_ai(self, outline: List[BlogOutlineSection], focus: str = "general optimization", research_context: str = "") -> List[BlogOutlineSection]:
|
||||
"""Optimize entire outline for better flow, SEO, and engagement."""
|
||||
return await self.outline_optimizer.optimize(outline, focus)
|
||||
return await self.outline_optimizer.optimize(outline, focus, research_context=research_context)
|
||||
|
||||
def rebalance_word_counts(self, outline: List[BlogOutlineSection], target_words: int) -> List[BlogOutlineSection]:
|
||||
"""Rebalance word count distribution across sections."""
|
||||
|
||||
@@ -17,7 +17,7 @@ class ParallelProcessor:
|
||||
self.source_mapper = source_mapper
|
||||
self.grounding_engine = grounding_engine
|
||||
|
||||
async def run_parallel_processing(self, outline_sections, research, user_id: str, task_id: str = None) -> Tuple[Any, Any]:
|
||||
async def run_parallel_processing(self, outline_sections, research, user_id: str, task_id: str = None, competitive_advantage: str = "") -> Tuple[Any, Any]:
|
||||
"""
|
||||
Run source mapping and grounding insights extraction in parallel.
|
||||
|
||||
@@ -26,6 +26,7 @@ class ParallelProcessor:
|
||||
research: Research data object
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
task_id: Optional task ID for progress updates
|
||||
competitive_advantage: Selected competitive advantage for preferential source matching
|
||||
|
||||
Returns:
|
||||
Tuple of (mapped_sections, grounding_insights)
|
||||
@@ -44,7 +45,7 @@ class ParallelProcessor:
|
||||
|
||||
# Run these tasks in parallel to save time
|
||||
source_mapping_task = asyncio.create_task(
|
||||
self._run_source_mapping(outline_sections, research, task_id, user_id)
|
||||
self._run_source_mapping(outline_sections, research, task_id, user_id, competitive_advantage)
|
||||
)
|
||||
|
||||
grounding_insights_task = asyncio.create_task(
|
||||
@@ -59,7 +60,7 @@ class ParallelProcessor:
|
||||
|
||||
return mapped_sections, grounding_insights
|
||||
|
||||
async def run_parallel_processing_async(self, outline_sections, research, user_id: str) -> Tuple[Any, Any]:
|
||||
async def run_parallel_processing_async(self, outline_sections, research, user_id: str, competitive_advantage: str = "") -> Tuple[Any, Any]:
|
||||
"""
|
||||
Run parallel processing without progress updates (for non-progress methods).
|
||||
|
||||
@@ -67,6 +68,7 @@ class ParallelProcessor:
|
||||
outline_sections: List of outline sections to process
|
||||
research: Research data object
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
competitive_advantage: Selected competitive advantage for preferential source matching
|
||||
|
||||
Returns:
|
||||
Tuple of (mapped_sections, grounding_insights)
|
||||
@@ -81,7 +83,7 @@ class ParallelProcessor:
|
||||
|
||||
# Run these tasks in parallel to save time
|
||||
source_mapping_task = asyncio.create_task(
|
||||
self._run_source_mapping_async(outline_sections, research, user_id)
|
||||
self._run_source_mapping_async(outline_sections, research, user_id, competitive_advantage)
|
||||
)
|
||||
|
||||
grounding_insights_task = asyncio.create_task(
|
||||
@@ -96,12 +98,12 @@ class ParallelProcessor:
|
||||
|
||||
return mapped_sections, grounding_insights
|
||||
|
||||
async def _run_source_mapping(self, outline_sections, research, task_id, user_id: str):
|
||||
async def _run_source_mapping(self, outline_sections, research, task_id, user_id: str, competitive_advantage: str = ""):
|
||||
"""Run source mapping in parallel."""
|
||||
if task_id:
|
||||
from api.blog_writer.task_manager import task_manager
|
||||
await task_manager.update_progress(task_id, "🔗 Applying intelligent source-to-section mapping...")
|
||||
return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id)
|
||||
return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id, competitive_advantage=competitive_advantage)
|
||||
|
||||
async def _run_grounding_insights_extraction(self, research, task_id):
|
||||
"""Run grounding insights extraction in parallel."""
|
||||
@@ -110,10 +112,10 @@ class ParallelProcessor:
|
||||
await task_manager.update_progress(task_id, "🧠 Extracting grounding metadata insights...")
|
||||
return self.grounding_engine.extract_contextual_insights(research.grounding_metadata)
|
||||
|
||||
async def _run_source_mapping_async(self, outline_sections, research, user_id: str):
|
||||
async def _run_source_mapping_async(self, outline_sections, research, user_id: str, competitive_advantage: str = ""):
|
||||
"""Run source mapping in parallel (async version without progress updates)."""
|
||||
logger.info("Applying intelligent source-to-section mapping...")
|
||||
return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id)
|
||||
return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id, competitive_advantage=competitive_advantage)
|
||||
|
||||
async def _run_grounding_insights_extraction_async(self, research):
|
||||
"""Run grounding insights extraction in parallel (async version without progress updates)."""
|
||||
|
||||
@@ -36,12 +36,89 @@ class PromptBuilder:
|
||||
competitor_text = ', '.join(research.competitor_analysis.get('top_competitors', [])) if research and research.competitor_analysis else "Not available"
|
||||
opportunity_text = ', '.join(research.competitor_analysis.get('opportunities', [])) if research and research.competitor_analysis else "Not available"
|
||||
advantages_text = ', '.join(research.competitor_analysis.get('competitive_advantages', [])) if research and research.competitor_analysis else "Not available"
|
||||
competitor_headings_text = ', '.join(research.competitor_analysis.get('competitor_headings', [])[:3]) if research and research.competitor_analysis and research.competitor_analysis.get('competitor_headings') else ""
|
||||
content_gaps_text = ', '.join(research.competitor_analysis.get('content_gaps', [])) if research and research.competitor_analysis and research.competitor_analysis.get('content_gaps') else ""
|
||||
industry_leaders_text = ', '.join(research.competitor_analysis.get('industry_leaders', [])) if research and research.competitor_analysis and research.competitor_analysis.get('industry_leaders') else ""
|
||||
|
||||
# Extract additional UI-mapped context fields
|
||||
analysis_insights_text = (research.keyword_analysis.get('analysis_insights', '') or '') if research and research.keyword_analysis else ''
|
||||
market_positioning_text = (research.competitor_analysis.get('market_positioning', '') or '') if research and research.competitor_analysis else ''
|
||||
difficulty_score = research.keyword_analysis.get('difficulty', None) if research and research.keyword_analysis else None
|
||||
|
||||
# Extract search queries as intent signals
|
||||
search_queries_text = ', '.join(research.search_queries) if research and hasattr(research, 'search_queries') and research.search_queries else ""
|
||||
|
||||
# Build numbered source list — all sources with index, title, excerpt, and highlights
|
||||
# The LLM will reference these indices when assigning sources to sections
|
||||
source_list_text = ""
|
||||
if sources:
|
||||
source_lines = []
|
||||
for i, src in enumerate(sources, 1):
|
||||
title = getattr(src, 'title', '') or ''
|
||||
excerpt = getattr(src, 'excerpt', '') or ''
|
||||
highlights = getattr(src, 'highlights', []) or []
|
||||
summary = getattr(src, 'summary', '') or ''
|
||||
source_type = getattr(src, 'source_type', '') or ''
|
||||
author = getattr(src, 'author', '') or ''
|
||||
|
||||
line = f" [{i}] {title}"
|
||||
if source_type:
|
||||
line += f" [{source_type}]"
|
||||
if author:
|
||||
line += f" by {author}"
|
||||
if summary:
|
||||
line += f" — {summary[:1000]}"
|
||||
elif excerpt:
|
||||
line += f" — {excerpt[:1000]}"
|
||||
if highlights:
|
||||
line += f" | Key findings: {'; '.join(h[:250] for h in highlights[:3])}"
|
||||
source_lines.append(line)
|
||||
if source_lines:
|
||||
source_list_text = "RESEARCH SOURCES (numbered for reference):\n" + "\n".join(source_lines)
|
||||
|
||||
# Top factual excerpts for depth (keep as supplement)
|
||||
source_excerpts_text = ""
|
||||
if sources:
|
||||
sorted_sources = sorted(
|
||||
[s for s in sources if (s.excerpt or s.summary)],
|
||||
key=lambda s: s.credibility_score or 0.8, reverse=True
|
||||
)[:5]
|
||||
excerpts = []
|
||||
for i, src in enumerate(sorted_sources, 1):
|
||||
excerpt = src.excerpt or src.summary or ""
|
||||
if len(excerpt) > 500:
|
||||
excerpt = excerpt[:497] + "..."
|
||||
excerpts.append(f" {i}. \"{src.title}\" — {excerpt}")
|
||||
if excerpts:
|
||||
source_excerpts_text = "DETAILED FACTS FROM TOP SOURCES:\n" + "\n".join(excerpts)
|
||||
|
||||
# Extract recency: newest source publication date
|
||||
newest_date_str = ""
|
||||
if sources:
|
||||
valid_dates = [s.published_at for s in sources if s.published_at]
|
||||
if valid_dates:
|
||||
try:
|
||||
parsed = [d for d in valid_dates if d[:4].isdigit()]
|
||||
if parsed:
|
||||
sorted_dates = sorted(parsed, reverse=True)
|
||||
newest_date_str = f"Most Recent Source: {sorted_dates[0]}"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Extract top grounding evidence snippets as verified data points
|
||||
grounding_evidence_text = ""
|
||||
if research and research.grounding_metadata and research.grounding_metadata.grounding_supports:
|
||||
supports = research.grounding_metadata.grounding_supports
|
||||
top_supports = [s for s in supports if s.segment_text and len(s.segment_text) > 20][:5]
|
||||
if top_supports:
|
||||
evidence_parts = []
|
||||
for i, s in enumerate(top_supports, 1):
|
||||
text = s.segment_text[:400]
|
||||
if len(s.segment_text) > 400:
|
||||
text += "..."
|
||||
evidence_parts.append(f" {i}. {text}")
|
||||
grounding_evidence_text = "VERIFIED EVIDENCE (high-confidence snippets):\n" + "\n".join(evidence_parts)
|
||||
|
||||
# Build selected angle prominence section
|
||||
if selected_content_angle and selected_content_angle.strip():
|
||||
selected_angle_section = f"""
|
||||
@@ -106,8 +183,17 @@ Top Competitors: {competitor_text}
|
||||
Market Opportunities: {opportunity_text}
|
||||
Competitive Advantages: {advantages_text}
|
||||
{f"Market Positioning: {market_positioning_text}" if market_positioning_text else ""}
|
||||
{f"Competitor Headings (AVOID duplicating): {competitor_headings_text}" if competitor_headings_text else ""}
|
||||
{f"Content Gaps (MUST address these gaps): {content_gaps_text}" if content_gaps_text else ""}
|
||||
{f"Industry Leaders: {industry_leaders_text}" if industry_leaders_text else ""}
|
||||
{f"Search Intent Signals: {search_queries_text}" if search_queries_text else ""}
|
||||
|
||||
RESEARCH SOURCES: {len(sources)} authoritative sources available
|
||||
{source_list_text}
|
||||
{newest_date_str}
|
||||
|
||||
{source_excerpts_text}
|
||||
|
||||
{grounding_evidence_text}
|
||||
|
||||
{f"CUSTOM INSTRUCTIONS: {custom_instructions}" if custom_instructions else ""}
|
||||
|
||||
@@ -118,8 +204,9 @@ STRATEGIC REQUIREMENTS:
|
||||
- Create SEO-optimized headings with natural keyword integration
|
||||
- Surface the strongest research-backed angles within the outline
|
||||
- Build logical narrative flow from problem to solution
|
||||
- Include data-driven insights from research sources
|
||||
- Address content gaps and market opportunities
|
||||
- Include data-driven insights from research sources — use the numbered sources above
|
||||
- For each section, assign the most relevant source indices using the [N] numbers above
|
||||
- Address content gaps and market opportunities — if content gaps are listed, dedicate sections to fill those gaps
|
||||
- Optimize for search intent and user questions
|
||||
- Ensure engaging, actionable content throughout
|
||||
|
||||
@@ -136,7 +223,8 @@ Return JSON format:
|
||||
"subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
|
||||
"key_points": ["Key point 1", "Key point 2", "Key point 3"],
|
||||
"target_words": 300,
|
||||
"keywords": ["keyword 1", "keyword 2"]
|
||||
"keywords": ["keyword 1", "keyword 2"],
|
||||
"source_indices": [1, 3, 5]
|
||||
}}
|
||||
]
|
||||
}}"""
|
||||
@@ -170,9 +258,14 @@ Return JSON format:
|
||||
"keywords": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"source_indices": {
|
||||
"type": "array",
|
||||
"items": {"type": "integer"},
|
||||
"description": "Indices of research sources (from the numbered list above) that support this section"
|
||||
}
|
||||
},
|
||||
"required": ["heading", "subheadings", "key_points", "target_words", "keywords"]
|
||||
"required": ["heading", "subheadings", "key_points", "target_words", "keywords", "source_indices"]
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -100,18 +100,37 @@ class ResponseProcessor:
|
||||
raise ValueError(f"AI outline generation failed: {error_str}")
|
||||
|
||||
def convert_to_sections(self, outline_data: Dict[str, Any], sources: List) -> List[BlogOutlineSection]:
|
||||
"""Convert outline data to BlogOutlineSection objects."""
|
||||
"""Convert outline data to BlogOutlineSection objects.
|
||||
|
||||
If the LLM assigned source_indices to sections, populate references
|
||||
directly from those indices. Indices are 1-based (matching the [N]
|
||||
labels in the prompt) — converted to 0-based for list access.
|
||||
Sections without source_indices will be populated by the algorithmic
|
||||
source mapper in a later step.
|
||||
"""
|
||||
outline_sections = []
|
||||
for i, section_data in enumerate(outline_data.get('outline', [])):
|
||||
if not isinstance(section_data, dict) or 'heading' not in section_data:
|
||||
continue
|
||||
|
||||
# Parse LLM-assigned source indices (1-based)
|
||||
raw_indices = section_data.get('source_indices', [])
|
||||
section_refs = []
|
||||
if raw_indices and sources:
|
||||
for idx in raw_indices:
|
||||
try:
|
||||
source_idx = int(idx) - 1 # Convert 1-based → 0-based
|
||||
if 0 <= source_idx < len(sources):
|
||||
section_refs.append(sources[source_idx])
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
section = BlogOutlineSection(
|
||||
id=f"s{i+1}",
|
||||
heading=section_data.get('heading', f'Section {i+1}'),
|
||||
subheadings=section_data.get('subheadings', []),
|
||||
key_points=section_data.get('key_points', []),
|
||||
references=[], # Will be populated by intelligent mapping
|
||||
references=section_refs, # LLM-assigned if provided, else []
|
||||
target_words=section_data.get('target_words', 200),
|
||||
keywords=section_data.get('keywords', [])
|
||||
)
|
||||
|
||||
@@ -41,10 +41,33 @@ class SourceToSectionMapper:
|
||||
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
|
||||
'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
|
||||
'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those',
|
||||
'how', 'what', 'when', 'where', 'why', 'who', 'which', 'how', 'much', 'many', 'more', 'most',
|
||||
'how', 'what', 'when', 'where', 'why', 'who', 'which', 'much', 'many', 'more', 'most',
|
||||
'some', 'any', 'all', 'each', 'every', 'other', 'another', 'such', 'no', 'not', 'only', 'own',
|
||||
'same', 'so', 'than', 'too', 'very', 'just', 'now', 'here', 'there', 'up', 'down', 'out', 'off',
|
||||
'over', 'under', 'again', 'further', 'then', 'once'
|
||||
'over', 'under', 'again', 'further', 'then', 'once', 'also', 'into', 'about', 'between',
|
||||
'through', 'during', 'before', 'after', 'above', 'below', 'from', 'since', 'until', 'while',
|
||||
'because', 'however', 'therefore', 'thus', 'hence', 'yet', 'still', 'already', 'even'
|
||||
}
|
||||
|
||||
# Common abbreviation/synonym pairs for fuzzy matching
|
||||
self._synonym_map = {
|
||||
'ai': ['artificial intelligence', 'machine intelligence'],
|
||||
'ml': ['machine learning'],
|
||||
'dl': ['deep learning'],
|
||||
'nlp': ['natural language processing'],
|
||||
'iot': ['internet of things'],
|
||||
'saas': ['software as a service'],
|
||||
'b2b': ['business to business'],
|
||||
'b2c': ['business to consumer'],
|
||||
'cx': ['customer experience'],
|
||||
'ux': ['user experience'],
|
||||
'roi': ['return on investment'],
|
||||
'kpi': ['key performance indicator'],
|
||||
'crm': ['customer relationship management'],
|
||||
'erp': ['enterprise resource planning'],
|
||||
'seo': ['search engine optimization'],
|
||||
'cto': ['chief technology officer'],
|
||||
'vp': ['vice president'],
|
||||
}
|
||||
|
||||
logger.info("✅ SourceToSectionMapper initialized with intelligent mapping algorithms")
|
||||
@@ -53,15 +76,21 @@ class SourceToSectionMapper:
|
||||
self,
|
||||
sections: List[BlogOutlineSection],
|
||||
research_data: BlogResearchResponse,
|
||||
user_id: str
|
||||
user_id: str,
|
||||
competitive_advantage: str = ""
|
||||
) -> List[BlogOutlineSection]:
|
||||
"""
|
||||
Map research sources to outline sections using intelligent algorithms.
|
||||
|
||||
Sections that already have LLM-assigned references (from source_indices
|
||||
in the outline prompt) are preserved. Algorithmic mapping fills gaps
|
||||
for sections without LLM-assigned sources.
|
||||
|
||||
Args:
|
||||
sections: List of outline sections to map sources to
|
||||
research_data: Research data containing sources and metadata
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
competitive_advantage: Selected competitive advantage to preferentially match
|
||||
|
||||
Returns:
|
||||
List of outline sections with intelligently mapped sources
|
||||
@@ -76,16 +105,39 @@ class SourceToSectionMapper:
|
||||
logger.warning("No sections or sources to map")
|
||||
return sections
|
||||
|
||||
logger.info(f"Mapping {len(research_data.sources)} sources to {len(sections)} sections")
|
||||
# Separate sections with LLM-assigned references from those without
|
||||
sections_with_refs = [s for s in sections if s.references]
|
||||
sections_without_refs = [s for s in sections if not s.references]
|
||||
|
||||
# Step 1: Algorithmic mapping
|
||||
mapping_results = self._algorithmic_source_mapping(sections, research_data)
|
||||
logger.info(
|
||||
f"Mapping {len(research_data.sources)} sources to {len(sections)} sections "
|
||||
f"({len(sections_with_refs)} with LLM-assigned references, "
|
||||
f"{len(sections_without_refs)} need algorithmic mapping)"
|
||||
)
|
||||
|
||||
# Step 2: AI validation and improvement (single prompt, user_id required for subscription checks)
|
||||
if sections_without_refs:
|
||||
# Step 1: Algorithmic mapping for sections without LLM-assigned references
|
||||
mapping_results = self._algorithmic_source_mapping(sections_without_refs, research_data, competitive_advantage)
|
||||
|
||||
# Step 2: AI validation and improvement
|
||||
validated_mapping = self._ai_validate_mapping(mapping_results, research_data, user_id)
|
||||
|
||||
# Step 3: Apply validated mapping to sections
|
||||
mapped_sections = self._apply_mapping_to_sections(sections, validated_mapping)
|
||||
# Step 3: Apply mapping only to sections that need it
|
||||
mapped_sections_with = self._apply_mapping_to_sections(sections_without_refs, validated_mapping)
|
||||
else:
|
||||
mapped_sections_with = []
|
||||
|
||||
# Combine: keep LLM-assigned sections as-is, add algorithmically mapped ones
|
||||
mapped_sections = list(sections_with_refs) + mapped_sections_with
|
||||
|
||||
# Preserve original ordering
|
||||
original_ids = [s.id for s in sections]
|
||||
mapped_sections.sort(key=lambda s: original_ids.index(s.id) if s.id in original_ids else 999)
|
||||
|
||||
# Warn if any section still has zero references
|
||||
for s in mapped_sections:
|
||||
if not s.references:
|
||||
logger.warning(f"Section '{s.heading}' (id={s.id}) has ZERO sources — content generator will use keyword-based fallback")
|
||||
|
||||
logger.info("✅ Source-to-section mapping completed successfully")
|
||||
return mapped_sections
|
||||
@@ -93,7 +145,8 @@ class SourceToSectionMapper:
|
||||
def _algorithmic_source_mapping(
|
||||
self,
|
||||
sections: List[BlogOutlineSection],
|
||||
research_data: BlogResearchResponse
|
||||
research_data: BlogResearchResponse,
|
||||
competitive_advantage: str = ""
|
||||
) -> Dict[str, List[Tuple[ResearchSource, float]]]:
|
||||
"""
|
||||
Perform algorithmic mapping of sources to sections.
|
||||
@@ -101,6 +154,7 @@ class SourceToSectionMapper:
|
||||
Args:
|
||||
sections: List of outline sections
|
||||
research_data: Research data with sources
|
||||
competitive_advantage: Selected competitive advantage to boost matching
|
||||
|
||||
Returns:
|
||||
Dictionary mapping section IDs to list of (source, score) tuples
|
||||
@@ -114,7 +168,7 @@ class SourceToSectionMapper:
|
||||
# Calculate multi-dimensional relevance score
|
||||
semantic_score = self._calculate_semantic_similarity(section, source)
|
||||
keyword_score = self._calculate_keyword_relevance(section, source, research_data)
|
||||
contextual_score = self._calculate_contextual_relevance(section, source, research_data)
|
||||
contextual_score = self._calculate_contextual_relevance(section, source, research_data, competitive_advantage)
|
||||
|
||||
# Weighted total score
|
||||
total_score = (
|
||||
@@ -140,38 +194,54 @@ class SourceToSectionMapper:
|
||||
def _calculate_semantic_similarity(self, section: BlogOutlineSection, source: ResearchSource) -> float:
|
||||
"""
|
||||
Calculate semantic similarity between section and source.
|
||||
|
||||
Args:
|
||||
section: Outline section
|
||||
source: Research source
|
||||
|
||||
Returns:
|
||||
Semantic similarity score (0.0 to 1.0)
|
||||
Uses word overlap, stem matching, bigram overlap, title-boost, and synonym expansion.
|
||||
"""
|
||||
# Extract text content for comparison
|
||||
section_text = self._extract_section_text(section)
|
||||
source_text = self._extract_source_text(source)
|
||||
|
||||
# Calculate word overlap
|
||||
section_words = self._extract_meaningful_words(section_text)
|
||||
source_words = self._extract_meaningful_words(source_text)
|
||||
|
||||
if not section_words or not source_words:
|
||||
return 0.0
|
||||
|
||||
# Calculate Jaccard similarity
|
||||
intersection = len(set(section_words) & set(source_words))
|
||||
union = len(set(section_words) | set(source_words))
|
||||
section_set = set(section_words)
|
||||
source_set = set(source_words)
|
||||
|
||||
jaccard_similarity = intersection / union if union > 0 else 0.0
|
||||
# 1. Jaccard similarity on raw words
|
||||
intersection = len(section_set & source_set)
|
||||
union = len(section_set | source_set)
|
||||
jaccard = intersection / union if union > 0 else 0.0
|
||||
|
||||
# Boost score for exact phrase matches
|
||||
phrase_boost = self._calculate_phrase_similarity(section_text, source_text)
|
||||
# 2. Stem matching — catches word variants (e.g., "running" vs "runs")
|
||||
section_stems = set(self._stem_word(w) for w in section_words)
|
||||
source_stems = set(self._stem_word(w) for w in source_words)
|
||||
stem_intersection = len(section_stems & source_stems)
|
||||
stem_union = len(section_stems | source_stems)
|
||||
stem_similarity = stem_intersection / stem_union if stem_union > 0 else 0.0
|
||||
|
||||
# Combine Jaccard similarity with phrase boost
|
||||
semantic_score = min(1.0, jaccard_similarity + phrase_boost)
|
||||
# 3. Bigram overlap — catches multi-word concepts (e.g., "machine learning")
|
||||
section_bigrams = set(self._extract_bigrams(section_text))
|
||||
source_bigrams = set(self._extract_bigrams(source_text))
|
||||
bigram_overlap = len(section_bigrams & source_bigrams)
|
||||
bigram_score = min(0.3, bigram_overlap * 0.1) if (section_bigrams or source_bigrams) else 0.0
|
||||
|
||||
return semantic_score
|
||||
# 4. Title-boost — section heading matching source title is a strong signal
|
||||
heading = (section.heading or '').lower()
|
||||
source_title = (source.title or '').lower()
|
||||
heading_words = set(self._extract_meaningful_words(heading))
|
||||
title_words = set(self._extract_meaningful_words(source_title))
|
||||
title_overlap = len(heading_words & title_words) / len(heading_words | title_words) if (heading_words or title_words) else 0.0
|
||||
title_boost = min(0.3, title_overlap * 0.5)
|
||||
|
||||
# 5. Synonym expansion — expand abbreviations and match across synonym pairs
|
||||
synonym_score = self._calculate_synonym_overlap(section_words, source_words)
|
||||
|
||||
# Combine: Jaccard + stem give base, bigram + title + synonyms boost
|
||||
base_similarity = max(jaccard, stem_similarity)
|
||||
combined = min(1.0, base_similarity + bigram_score + title_boost + synonym_score + 0.0)
|
||||
|
||||
return combined
|
||||
|
||||
def _calculate_keyword_relevance(
|
||||
self,
|
||||
@@ -219,7 +289,8 @@ class SourceToSectionMapper:
|
||||
self,
|
||||
section: BlogOutlineSection,
|
||||
source: ResearchSource,
|
||||
research_data: BlogResearchResponse
|
||||
research_data: BlogResearchResponse,
|
||||
competitive_advantage: str = ""
|
||||
) -> float:
|
||||
"""
|
||||
Calculate contextual relevance based on section content and source context.
|
||||
@@ -228,6 +299,7 @@ class SourceToSectionMapper:
|
||||
section: Outline section
|
||||
source: Research source
|
||||
research_data: Research data with context
|
||||
competitive_advantage: Selected competitive advantage to boost matching
|
||||
|
||||
Returns:
|
||||
Contextual relevance score (0.0 to 1.0)
|
||||
@@ -264,6 +336,15 @@ class SourceToSectionMapper:
|
||||
industry_score = sum(1 for word in industry_words if word in source_text) / len(industry_words) if industry_words else 0.0
|
||||
contextual_score += industry_score * 0.2
|
||||
|
||||
# 4. Competitive advantage boost — sources that match the advantage get a score lift
|
||||
if competitive_advantage:
|
||||
advantage_words = set(self._extract_meaningful_words(competitive_advantage.lower()))
|
||||
if advantage_words:
|
||||
advantage_in_section = sum(1 for w in advantage_words if w in section_text) / len(advantage_words)
|
||||
advantage_in_source = sum(1 for w in advantage_words if w in source_text) / len(advantage_words)
|
||||
if advantage_in_section > 0.3 and advantage_in_source > 0.3:
|
||||
contextual_score += 0.25 * (advantage_in_section + advantage_in_source)
|
||||
|
||||
return min(1.0, contextual_score)
|
||||
|
||||
def _ai_validate_mapping(
|
||||
@@ -360,10 +441,15 @@ class SourceToSectionMapper:
|
||||
return " ".join(text_parts)
|
||||
|
||||
def _extract_source_text(self, source: ResearchSource) -> str:
|
||||
"""Extract all text content from a source."""
|
||||
"""Extract all text content from a source, including full text for better matching."""
|
||||
text_parts = [source.title]
|
||||
if source.summary:
|
||||
text_parts.append(source.summary)
|
||||
if source.excerpt:
|
||||
text_parts.append(source.excerpt)
|
||||
content = getattr(source, 'content', '') or ''
|
||||
if content:
|
||||
text_parts.append(content[:500])
|
||||
return " ".join(text_parts)
|
||||
|
||||
def _extract_meaningful_words(self, text: str) -> List[str]:
|
||||
@@ -382,6 +468,41 @@ class SourceToSectionMapper:
|
||||
|
||||
return meaningful_words
|
||||
|
||||
def _stem_word(self, word: str) -> str:
|
||||
"""Rudimentary suffix-stripping stemmer for English words."""
|
||||
if len(word) <= 3:
|
||||
return word
|
||||
for suffix in ['ization', 'ation', 'tion', 'sion', 'ment', 'ness', 'ity', 'ing', 'able', 'ible', 'ful', 'less', 'ous', 'ive', 'ally', 'ly', 'er', 'ed', 'es', 's']:
|
||||
if word.endswith(suffix) and len(word) - len(suffix) >= 3:
|
||||
return word[:-len(suffix)]
|
||||
return word
|
||||
|
||||
def _extract_bigrams(self, text: str) -> List[str]:
|
||||
"""Extract meaningful two-word phrases from text."""
|
||||
words = self._extract_meaningful_words(text)
|
||||
if len(words) < 2:
|
||||
return []
|
||||
return [f"{words[i]} {words[i+1]}" for i in range(len(words) - 1)]
|
||||
|
||||
def _calculate_synonym_overlap(self, section_words: List[str], source_words: List[str]) -> float:
|
||||
"""Score overlap via abbreviation/synonym expansion."""
|
||||
section_set = set(section_words)
|
||||
source_set = set(source_words)
|
||||
extra_matches = 0
|
||||
total_terms = len(section_set | source_set) or 1
|
||||
|
||||
for abbr, expansions in self._synonym_map.items():
|
||||
abbr_in_section = abbr in section_set
|
||||
abbr_in_source = abbr in source_set
|
||||
for expansion in expansions:
|
||||
exp_words = set(expansion.split())
|
||||
exp_in_section = exp_words.issubset(section_set)
|
||||
exp_in_source = exp_words.issubset(source_set)
|
||||
if (abbr_in_section and exp_in_source) or (abbr_in_source and exp_in_section):
|
||||
extra_matches += 1
|
||||
|
||||
return min(0.2, extra_matches * 0.05)
|
||||
|
||||
def _calculate_phrase_similarity(self, text1: str, text2: str) -> float:
|
||||
"""Calculate phrase similarity boost score."""
|
||||
if not text1 or not text2:
|
||||
|
||||
@@ -54,58 +54,58 @@ class TitleGenerator:
|
||||
Returns:
|
||||
Formatted title string
|
||||
"""
|
||||
if not angle or len(angle.strip()) < 10: # Too short to be a good title
|
||||
if not angle or len(angle.strip()) < 10:
|
||||
return ""
|
||||
|
||||
# Clean up the angle
|
||||
cleaned_angle = angle.strip()
|
||||
|
||||
# Capitalize first letter of each sentence and proper nouns
|
||||
sentences = cleaned_angle.split('. ')
|
||||
formatted_sentences = []
|
||||
for sentence in sentences:
|
||||
if sentence.strip():
|
||||
# Use title case for better formatting
|
||||
formatted_sentence = sentence.strip().title()
|
||||
formatted_sentences.append(formatted_sentence)
|
||||
|
||||
formatted_title = '. '.join(formatted_sentences)
|
||||
|
||||
# Ensure it ends with proper punctuation
|
||||
if not formatted_title.endswith(('.', '!', '?')):
|
||||
formatted_title += '.'
|
||||
# Use sentence case: capitalize first letter, rest as-is
|
||||
if cleaned_angle:
|
||||
cleaned_angle = cleaned_angle[0].upper() + cleaned_angle[1:]
|
||||
|
||||
# Limit length to reasonable blog title size
|
||||
if len(formatted_title) > 200:
|
||||
formatted_title = formatted_title[:197] + "..."
|
||||
if len(cleaned_angle) > 120:
|
||||
cleaned_angle = cleaned_angle[:117] + "..."
|
||||
|
||||
return formatted_title
|
||||
return cleaned_angle
|
||||
|
||||
def combine_title_options(self, ai_titles: List[str], content_angle_titles: List[str], primary_keywords: List[str]) -> List[str]:
|
||||
def combine_title_options(self, ai_titles: List[str], content_angle_titles: List[str], primary_keywords: List[str], research_topic: str = "") -> List[str]:
|
||||
"""
|
||||
Combine AI-generated titles with content angle titles, ensuring variety and quality.
|
||||
|
||||
AI titles (proper SEO titles generated by LLM) take priority.
|
||||
Content angle titles (long-format descriptions) are used as fallback.
|
||||
The research topic is the last resort when nothing else exists.
|
||||
|
||||
Args:
|
||||
ai_titles: AI-generated title options
|
||||
content_angle_titles: Titles derived from content angles
|
||||
ai_titles: AI-generated title options (proper blog titles, 50-65 chars)
|
||||
content_angle_titles: Titles derived from content angles (longer, descriptive)
|
||||
primary_keywords: Primary keywords for fallback generation
|
||||
research_topic: Original user research topic as ultimate fallback
|
||||
|
||||
Returns:
|
||||
Combined list of title options (max 6 total)
|
||||
"""
|
||||
all_titles = []
|
||||
|
||||
# Add content angle titles first (these are research-based and valuable)
|
||||
for title in content_angle_titles[:3]: # Limit to top 3 content angles
|
||||
if title and title not in all_titles:
|
||||
all_titles.append(title)
|
||||
|
||||
# Add AI-generated titles
|
||||
# 1. AI-generated titles first (proper SEO titles from LLM)
|
||||
for title in ai_titles:
|
||||
if title and title not in all_titles:
|
||||
all_titles.append(title)
|
||||
|
||||
# Note: Removed fallback titles as requested - only use research and AI-generated titles
|
||||
# 2. Content angle titles as fallback (research-based, but verbose)
|
||||
for title in content_angle_titles[:3]:
|
||||
if title and title not in all_titles:
|
||||
all_titles.append(title)
|
||||
|
||||
# 3. Research topic as last resort when nothing was generated
|
||||
if not all_titles and research_topic:
|
||||
all_titles.append(research_topic)
|
||||
|
||||
# 4. Primary keyword fallback as absolute last resort
|
||||
if not all_titles and primary_keywords:
|
||||
kw = primary_keywords[0]
|
||||
all_titles.append(kw)
|
||||
|
||||
# Limit to 6 titles maximum for UI usability
|
||||
final_titles = all_titles[:6]
|
||||
@@ -115,9 +115,10 @@ class TitleGenerator:
|
||||
|
||||
def generate_fallback_titles(self, primary_keywords: List[str]) -> List[str]:
|
||||
"""Generate fallback titles when AI generation fails."""
|
||||
from datetime import datetime
|
||||
primary_keyword = primary_keywords[0] if primary_keywords else "Topic"
|
||||
return [
|
||||
f"The Complete Guide to {primary_keyword}",
|
||||
f"{primary_keyword}: Everything You Need to Know",
|
||||
f"How to Master {primary_keyword} in 2024"
|
||||
f"How to Master {primary_keyword} in {datetime.now().year}"
|
||||
]
|
||||
|
||||
@@ -18,7 +18,7 @@ class CompetitorAnalyzer:
|
||||
Analyze the following research content and extract competitor insights:
|
||||
|
||||
Research Content:
|
||||
{content[:3000]}
|
||||
{content[:8000]}
|
||||
|
||||
Extract and analyze:
|
||||
1. Top competitors mentioned (companies, brands, platforms)
|
||||
|
||||
@@ -17,7 +17,7 @@ class ContentAngleGenerator:
|
||||
Analyze the following research content and create strategic content angles for: {topic} in {industry}
|
||||
|
||||
Research Content:
|
||||
{content[:3000]}
|
||||
{content[:8000]}
|
||||
|
||||
Create 7 compelling content angles that:
|
||||
1. Leverage current trends and data from the research
|
||||
|
||||
@@ -432,7 +432,7 @@ class ResearchDataFilter:
|
||||
'how to', 'guide', 'tutorial', 'steps', 'process', 'method',
|
||||
'best practices', 'tips', 'strategies', 'techniques', 'approach',
|
||||
'comparison', 'vs', 'versus', 'difference', 'pros and cons',
|
||||
'trends', 'future', '2024', '2025', 'emerging', 'new'
|
||||
'trends', 'future', str(datetime.now().year), str(datetime.now().year + 1), 'emerging', 'new'
|
||||
]
|
||||
|
||||
for indicator in actionable_indicators:
|
||||
|
||||
@@ -7,6 +7,8 @@ Neural search implementation using Exa API for high-quality, citation-rich resea
|
||||
from exa_py import Exa
|
||||
import os
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
from typing import List, Dict, Any
|
||||
from loguru import logger
|
||||
from models.subscription_models import APIProvider
|
||||
@@ -355,6 +357,125 @@ class ExaResearchProvider(BaseProvider):
|
||||
|
||||
return None
|
||||
|
||||
def _calculate_credibility_score(self, result) -> float:
|
||||
"""Dynamic credibility score based on domain authority, recency, and content substance."""
|
||||
scores = []
|
||||
weights = []
|
||||
|
||||
# Domain authority (weight: 3) — most important signal
|
||||
url = result.url if hasattr(result, 'url') else ''
|
||||
domain_score = self._score_domain_authority(url)
|
||||
scores.append(domain_score)
|
||||
weights.append(3)
|
||||
|
||||
# Recency (weight: 2) — fresher content is more valuable
|
||||
recency_score = self._score_recency(result)
|
||||
scores.append(recency_score)
|
||||
weights.append(2)
|
||||
|
||||
# Content substance (weight: 2) — richer content = more substantive source
|
||||
substance_score = self._score_substance(result)
|
||||
scores.append(substance_score)
|
||||
weights.append(2)
|
||||
|
||||
# Exa relevance score (weight: 2) — Exa's own relevance ranking
|
||||
exa_score = 0.5
|
||||
if hasattr(result, 'score') and result.score is not None:
|
||||
exa_score = float(result.score)
|
||||
scores.append(exa_score)
|
||||
weights.append(2)
|
||||
|
||||
total = sum(s * w for s, w in zip(scores, weights))
|
||||
total_weight = sum(weights)
|
||||
return round(total / total_weight, 3)
|
||||
|
||||
@staticmethod
|
||||
def _score_domain_authority(url: str) -> float:
|
||||
if not url:
|
||||
return 0.5
|
||||
try:
|
||||
domain = urlparse(url).netloc.lower()
|
||||
except Exception:
|
||||
return 0.5
|
||||
if domain.startswith('www.'):
|
||||
domain = domain[4:]
|
||||
|
||||
# Tier 1: Government, educational, major research
|
||||
if domain.endswith('.gov') or domain.endswith('.edu'):
|
||||
return 0.95
|
||||
if domain in ('arxiv.org', 'pubmed.ncbi.nlm.nih.gov', 'ncbi.nlm.nih.gov',
|
||||
'scholar.google.com', 'researchgate.net', 'sciencedaily.com',
|
||||
'nature.com', 'science.org', 'pnas.org'):
|
||||
return 0.92
|
||||
|
||||
# Tier 2: Major established news and professional publications
|
||||
tier2 = {
|
||||
'reuters.com', 'apnews.com', 'bbc.com', 'bbc.co.uk', 'npr.org',
|
||||
'wsj.com', 'nytimes.com', 'economist.com', 'bloomberg.com',
|
||||
'theguardian.com', 'ft.com', 'washingtonpost.com',
|
||||
'forbes.com', 'hbr.org', 'techcrunch.com', 'wired.com',
|
||||
'cnn.com', 'nbcnews.com', 'cbsnews.com', 'abcnews.go.com',
|
||||
}
|
||||
# Extract base domain
|
||||
parts = domain.split('.')
|
||||
base = '.'.join(parts[-2:]) if len(parts) >= 2 else domain
|
||||
if base in tier2:
|
||||
return 0.88
|
||||
|
||||
# Tier 3: Industry research and established .org
|
||||
tier3 = {
|
||||
'statista.com', 'pewresearch.org', 'gartner.com', 'mckinsey.com',
|
||||
'deloitte.com', 'pwc.com', 'ey.com', 'kpmg.com',
|
||||
'hubspot.com', 'moz.com', 'searchengineland.com',
|
||||
'neilpatel.com', 'backlinko.com', 'copyblogger.com',
|
||||
}
|
||||
if base in tier3:
|
||||
return 0.80
|
||||
if domain.endswith('.org'):
|
||||
return 0.75
|
||||
|
||||
return 0.60
|
||||
|
||||
def _score_recency(self, result) -> float:
|
||||
if not hasattr(result, 'publishedDate') or not result.publishedDate:
|
||||
return 0.70
|
||||
try:
|
||||
published = datetime.strptime(result.publishedDate[:10], '%Y-%m-%d')
|
||||
days_old = (datetime.now() - published).days
|
||||
if days_old < 30:
|
||||
return 1.0
|
||||
elif days_old < 180:
|
||||
return 0.90
|
||||
elif days_old < 365:
|
||||
return 0.80
|
||||
elif days_old < 730:
|
||||
return 0.65
|
||||
elif days_old < 1825:
|
||||
return 0.45
|
||||
else:
|
||||
return 0.25
|
||||
except Exception:
|
||||
return 0.70
|
||||
|
||||
def _score_substance(self, result) -> float:
|
||||
total_chars = 0
|
||||
if hasattr(result, 'highlights') and result.highlights:
|
||||
total_chars += sum(len(h or '') for h in result.highlights)
|
||||
if hasattr(result, 'summary') and result.summary:
|
||||
total_chars += len(result.summary)
|
||||
if hasattr(result, 'text') and result.text:
|
||||
total_chars += len(result.text)
|
||||
|
||||
if total_chars > 2000:
|
||||
return 0.95
|
||||
elif total_chars > 1000:
|
||||
return 0.85
|
||||
elif total_chars > 500:
|
||||
return 0.75
|
||||
elif total_chars > 100:
|
||||
return 0.60
|
||||
return 0.40
|
||||
|
||||
def _transform_sources(self, results):
|
||||
"""Transform Exa results to ResearchSource format."""
|
||||
sources = []
|
||||
@@ -368,7 +489,7 @@ class ExaResearchProvider(BaseProvider):
|
||||
'title': result.title if hasattr(result, 'title') else '',
|
||||
'url': result.url if hasattr(result, 'url') else '',
|
||||
'excerpt': self._get_excerpt(result),
|
||||
'credibility_score': 0.85, # Exa results are high quality
|
||||
'credibility_score': self._calculate_credibility_score(result),
|
||||
'published_at': result.publishedDate if hasattr(result, 'publishedDate') else None,
|
||||
'index': idx,
|
||||
'source_type': source_type,
|
||||
@@ -388,7 +509,7 @@ class ExaResearchProvider(BaseProvider):
|
||||
if hasattr(result, 'summary') and result.summary:
|
||||
return result.summary
|
||||
if hasattr(result, 'text') and result.text:
|
||||
return result.text[:500]
|
||||
return result.text[:1000]
|
||||
return ''
|
||||
|
||||
def _determine_source_type(self, url):
|
||||
|
||||
@@ -19,7 +19,7 @@ class KeywordAnalyzer:
|
||||
Analyze the following research content and extract comprehensive keyword insights for: {', '.join(original_keywords)}
|
||||
|
||||
Research Content:
|
||||
{content[:3000]} # Limit to avoid token limits
|
||||
{content[:8000]}
|
||||
|
||||
Extract and analyze:
|
||||
1. Primary keywords (main topic terms)
|
||||
|
||||
@@ -250,10 +250,32 @@ class ResearchService:
|
||||
if 'content' not in locals() or 'sources' not in locals():
|
||||
raise RuntimeError(f"{config.provider.value} research did not return content or sources. Research failed.")
|
||||
|
||||
# Build compact all-source summary for richer analysis
|
||||
analysis_content = self._build_analysis_content(sources)
|
||||
|
||||
# Run dedicated competitor search for richer competitor intelligence
|
||||
competitor_content = analysis_content
|
||||
try:
|
||||
comp_query = f"top {industry} companies or competitors {topic}"
|
||||
comp_results = await exa_provider.simple_search(
|
||||
query=comp_query, num_results=5, user_id=user_id,
|
||||
)
|
||||
if comp_results:
|
||||
comp_lines = ["COMPETITOR SEARCH RESULTS:"]
|
||||
for r in comp_results:
|
||||
title = r.get('title', '')
|
||||
text = (r.get('text', '') or '')[:400]
|
||||
comp_lines.append(f"- {title}")
|
||||
if text:
|
||||
comp_lines.append(f" {text[:200]}")
|
||||
competitor_content = "\n".join(comp_lines) + "\n\n" + analysis_content
|
||||
except Exception as e:
|
||||
logger.warning(f"Competitor search failed (non-critical): {e}")
|
||||
|
||||
# Continue with common analysis (same for both providers)
|
||||
keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
|
||||
competitor_analysis = self.competitor_analyzer.analyze(content, user_id=user_id)
|
||||
suggested_angles = self.content_angle_generator.generate(content, topic, industry, user_id=user_id)
|
||||
keyword_analysis = self.keyword_analyzer.analyze(analysis_content, request.keywords, user_id=user_id)
|
||||
competitor_analysis = self.competitor_analyzer.analyze(competitor_content, user_id=user_id)
|
||||
suggested_angles = self.content_angle_generator.generate(analysis_content, topic, industry, user_id=user_id)
|
||||
|
||||
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
|
||||
|
||||
@@ -586,9 +608,30 @@ class ResearchService:
|
||||
|
||||
# Continue with common analysis (same for both providers)
|
||||
await task_manager.update_progress(task_id, "🔍 Analyzing keywords and content angles...")
|
||||
keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
|
||||
competitor_analysis = self.competitor_analyzer.analyze(content, user_id=user_id)
|
||||
suggested_angles = self.content_angle_generator.generate(content, topic, industry, user_id=user_id)
|
||||
analysis_content = self._build_analysis_content(sources)
|
||||
|
||||
# Run dedicated competitor search for richer competitor intelligence
|
||||
competitor_content = analysis_content
|
||||
try:
|
||||
comp_query = f"top {industry} companies or competitors {topic}"
|
||||
comp_results = await exa_provider.simple_search(
|
||||
query=comp_query, num_results=5, user_id=user_id,
|
||||
)
|
||||
if comp_results:
|
||||
comp_lines = ["COMPETITOR SEARCH RESULTS:"]
|
||||
for r in comp_results:
|
||||
title = r.get('title', '')
|
||||
text = (r.get('text', '') or '')[:400]
|
||||
comp_lines.append(f"- {title}")
|
||||
if text:
|
||||
comp_lines.append(f" {text[:200]}")
|
||||
competitor_content = "\n".join(comp_lines) + "\n\n" + analysis_content
|
||||
except Exception as e:
|
||||
logger.warning(f"Competitor search failed (non-critical): {e}")
|
||||
|
||||
keyword_analysis = self.keyword_analyzer.analyze(analysis_content, request.keywords, user_id=user_id)
|
||||
competitor_analysis = self.competitor_analyzer.analyze(competitor_content, user_id=user_id)
|
||||
suggested_angles = self.content_angle_generator.generate(analysis_content, topic, industry, user_id=user_id)
|
||||
|
||||
await task_manager.update_progress(task_id, "💾 Caching results for future use...")
|
||||
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
|
||||
@@ -720,7 +763,7 @@ class ResearchService:
|
||||
url=src.get("url", ""),
|
||||
excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
|
||||
credibility_score=float(src.get("credibility_score", 0.8)),
|
||||
published_at=str(src.get("publication_date", "2024-01-01")),
|
||||
published_at=str(src.get("publication_date", f"{datetime.now().year}-01-01")),
|
||||
index=src.get("index"),
|
||||
source_type=src.get("type", "web")
|
||||
)
|
||||
@@ -780,6 +823,33 @@ class ResearchService:
|
||||
web_search_queries=search_queries or [],
|
||||
)
|
||||
|
||||
def _build_analysis_content(self, sources: List[Dict[str, Any]]) -> str:
|
||||
"""Build compact all-source summary for LLM analysis.
|
||||
|
||||
Each source is distilled to one line with title, key content, and highlights.
|
||||
This ensures ALL sources are visible to keyword, competitor, and angle
|
||||
analyzers instead of only the first few (raw content[:3000]).
|
||||
"""
|
||||
if not sources:
|
||||
return ""
|
||||
lines = []
|
||||
for src in sources:
|
||||
title = src.get('title', '') or ''
|
||||
summary = src.get('summary', '') or ''
|
||||
highlights = src.get('highlights', []) or []
|
||||
excerpt = src.get('excerpt', '') or ''
|
||||
part = f"• {title}"
|
||||
if summary:
|
||||
part += f" — {summary[:250]}"
|
||||
elif excerpt:
|
||||
part += f" — {excerpt[:250]}"
|
||||
if highlights:
|
||||
findings = [h[:120] for h in highlights[:2] if h]
|
||||
if findings:
|
||||
part += f" | {'; '.join(findings)}"
|
||||
lines.append(part)
|
||||
return "\n".join(lines)
|
||||
|
||||
def _normalize_cached_research_data(self, cached_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Normalize cached research data to fix None values in confidence_scores.
|
||||
|
||||
@@ -6,6 +6,7 @@ Different strategies for executing research based on depth and focus.
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
|
||||
from models.blog_models import BlogResearchRequest, ResearchMode, ResearchConfig
|
||||
@@ -87,7 +88,7 @@ Provide analysis in this EXACT format:
|
||||
- For each: Quote/claim, source URL, published date, metric/context.
|
||||
|
||||
REQUIREMENTS:
|
||||
- Every claim MUST include a source URL (authoritative, recent: 2024-2025 preferred).
|
||||
- Every claim MUST include a source URL (authoritative, recent: {datetime.now().year}-{datetime.now().year + 1} preferred).
|
||||
- Use concrete numbers, dates, outcomes; avoid generic advice.
|
||||
- Keep bullets tight and scannable for spoken narration."""
|
||||
return prompt.strip()
|
||||
@@ -116,7 +117,7 @@ Research Topic: "{topic}"{date_filter}{source_filter}
|
||||
|
||||
Provide COMPLETE analysis in this EXACT format:
|
||||
|
||||
## WHAT'S CHANGED (2024-2025)
|
||||
## WHAT'S CHANGED ({datetime.now().year}-{datetime.now().year + 1})
|
||||
[5-7 concise trend bullets with numbers + source URLs]
|
||||
|
||||
## PROOF & NUMBERS
|
||||
@@ -151,7 +152,7 @@ Primary (3), Secondary (8-10), Long-tail (5-7) with intent hints.
|
||||
VERIFICATION REQUIREMENTS:
|
||||
- Minimum 2 authoritative sources per major claim.
|
||||
- Prefer industry reports > research papers > news > blogs.
|
||||
- 2024-2025 data strongly preferred.
|
||||
- {datetime.now().year}-{datetime.now().year + 1} data strongly preferred.
|
||||
- All numbers must include timeframe and methodology.
|
||||
- Every bullet must be concise for spoken narration and actionable for {target_audience}."""
|
||||
return prompt.strip()
|
||||
@@ -213,7 +214,7 @@ REQUIREMENTS:
|
||||
- Cite all claims with authoritative source URLs
|
||||
- Include specific numbers, dates, examples
|
||||
- Focus on actionable insights for {target_audience}
|
||||
- Use 2024-2025 data when available"""
|
||||
- Use {datetime.now().year}-{datetime.now().year + 1} data when available"""
|
||||
return prompt.strip()
|
||||
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ Leverages existing non-AI SEO tools and uses single AI prompt for structured ana
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import math
|
||||
import re
|
||||
import textstat
|
||||
from datetime import datetime
|
||||
@@ -34,7 +35,7 @@ class BlogContentSEOAnalyzer:
|
||||
|
||||
logger.info("BlogContentSEOAnalyzer initialized")
|
||||
|
||||
async def analyze_blog_content(self, blog_content: str, research_data: Dict[str, Any], blog_title: Optional[str] = None, user_id: str = None) -> Dict[str, Any]:
|
||||
async def analyze_blog_content(self, blog_content: str, research_data: Dict[str, Any], blog_title: Optional[str] = None, user_id: str = None, outline: Optional[List[Dict[str, Any]]] = None, competitive_advantage: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Main analysis method with parallel processing
|
||||
|
||||
@@ -43,6 +44,8 @@ class BlogContentSEOAnalyzer:
|
||||
research_data: Research data containing keywords and other insights
|
||||
blog_title: Optional blog title
|
||||
user_id: Clerk user ID for subscription checking (required)
|
||||
outline: Optional outline sections for context-aware analysis
|
||||
competitive_advantage: Optional competitive advantage for context
|
||||
|
||||
Returns:
|
||||
Comprehensive SEO analysis results
|
||||
@@ -52,21 +55,24 @@ class BlogContentSEOAnalyzer:
|
||||
try:
|
||||
logger.info("Starting blog content SEO analysis")
|
||||
|
||||
# Extract keywords from research data
|
||||
keywords_data = self._extract_keywords_from_research(research_data)
|
||||
logger.info(f"Extracted keywords: {keywords_data}")
|
||||
# Extract research context (keywords + competitor data + search queries)
|
||||
research_context = self._extract_research_context(research_data)
|
||||
logger.info(f"Extracted research context with {len(research_context.get('primary', []))} primary keywords")
|
||||
|
||||
# Phase 1: Run non-AI analyzers in parallel
|
||||
logger.info("Running non-AI analyzers in parallel")
|
||||
non_ai_results = await self._run_non_ai_analyzers(blog_content, keywords_data)
|
||||
non_ai_results = await self._run_non_ai_analyzers(blog_content, research_context)
|
||||
|
||||
# Phase 2: Single AI analysis for structured insights
|
||||
# Phase 2: Single AI analysis for structured insights (with outline + competitive context)
|
||||
logger.info("Running AI analysis")
|
||||
ai_insights = await self._run_ai_analysis(blog_content, keywords_data, non_ai_results, user_id=user_id)
|
||||
ai_insights = await self._run_ai_analysis(
|
||||
blog_content, research_context, non_ai_results, user_id=user_id,
|
||||
outline=outline, competitive_advantage=competitive_advantage
|
||||
)
|
||||
|
||||
# Phase 3: Compile and format results
|
||||
logger.info("Compiling results")
|
||||
results = self._compile_blog_seo_results(non_ai_results, ai_insights, keywords_data)
|
||||
results = self._compile_blog_seo_results(non_ai_results, ai_insights, research_context)
|
||||
|
||||
logger.info(f"SEO analysis completed. Overall score: {results.get('overall_score', 0)}")
|
||||
return results
|
||||
@@ -76,14 +82,19 @@ class BlogContentSEOAnalyzer:
|
||||
# Fail fast - don't return fallback data
|
||||
raise e
|
||||
|
||||
def _extract_keywords_from_research(self, research_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract keywords from research data"""
|
||||
def _extract_research_context(self, research_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract research context from research data including keywords, competitor data, and search queries.
|
||||
|
||||
Previously only extracted keyword_analysis. Now also extracts:
|
||||
- competitor_analysis (content_gaps, industry_leaders, opportunities, competitive_advantages)
|
||||
- search_queries
|
||||
- suggested_angles
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Extracting keywords from research data: {research_data}")
|
||||
logger.info(f"Extracting research context from research data")
|
||||
|
||||
# Extract keywords from research data structure
|
||||
keyword_analysis = research_data.get('keyword_analysis', {})
|
||||
logger.info(f"Found keyword_analysis: {keyword_analysis}")
|
||||
|
||||
# Handle different possible structures
|
||||
primary_keywords = []
|
||||
@@ -109,17 +120,37 @@ class BlogContentSEOAnalyzer:
|
||||
'long_tail': long_tail_keywords,
|
||||
'semantic': semantic_keywords,
|
||||
'all_keywords': all_keywords,
|
||||
'search_intent': keyword_analysis.get('search_intent', 'informational')
|
||||
'search_intent': keyword_analysis.get('search_intent', 'informational'),
|
||||
}
|
||||
|
||||
logger.info(f"Extracted keywords: {result}")
|
||||
# Extract competitor analysis
|
||||
competitor_analysis = research_data.get('competitor_analysis', {})
|
||||
if competitor_analysis:
|
||||
result['content_gaps'] = competitor_analysis.get('content_gaps', [])
|
||||
result['industry_leaders'] = competitor_analysis.get('industry_leaders', [])
|
||||
result['opportunities'] = competitor_analysis.get('opportunities', [])
|
||||
result['competitive_advantages'] = competitor_analysis.get('competitive_advantages', [])
|
||||
else:
|
||||
result['content_gaps'] = []
|
||||
result['industry_leaders'] = []
|
||||
result['opportunities'] = []
|
||||
result['competitive_advantages'] = []
|
||||
|
||||
# Extract search queries
|
||||
search_queries = research_data.get('search_queries', [])
|
||||
result['search_queries'] = search_queries if isinstance(search_queries, list) else []
|
||||
|
||||
# Extract suggested angles
|
||||
suggested_angles = research_data.get('suggested_angles', [])
|
||||
result['suggested_angles'] = suggested_angles if isinstance(suggested_angles, list) else []
|
||||
|
||||
logger.info(f"Extracted research context: {len(primary_keywords)} primary keywords, {len(result.get('content_gaps', []))} content gaps, {len(result.get('search_queries', []))} search queries")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract keywords from research data: {e}")
|
||||
logger.error(f"Failed to extract research context from research data: {e}")
|
||||
logger.error(f"Research data structure: {research_data}")
|
||||
# Fail fast - don't return empty keywords
|
||||
raise ValueError(f"Keyword extraction failed: {e}")
|
||||
raise ValueError(f"Research context extraction failed: {e}")
|
||||
|
||||
async def _run_non_ai_analyzers(self, blog_content: str, keywords_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Run all non-AI analyzers in parallel for maximum performance"""
|
||||
@@ -170,10 +201,24 @@ class BlogContentSEOAnalyzer:
|
||||
sentences = len(re.findall(r'[.!?]+', content))
|
||||
|
||||
# Blog-specific structure analysis
|
||||
content_lower = content.lower()
|
||||
first_500 = content_lower[:500] if len(content) > 500 else content_lower
|
||||
last_500 = content_lower[-500:] if len(content) > 500 else content_lower
|
||||
has_introduction = any('introduction' in line.lower() or 'overview' in line.lower()
|
||||
for line in lines[:10])
|
||||
for line in lines[:10]) or any(
|
||||
phrase in first_500 for phrase in [
|
||||
'in this', 'this article', 'this guide', 'this post',
|
||||
'we will', "you'll learn", "let's explore", "whether you're",
|
||||
'in this section', 'this blog post', 'here we', 'today we',
|
||||
"we'll explore", "we'll cover", "we'll dive"
|
||||
])
|
||||
has_conclusion = any('conclusion' in line.lower() or 'summary' in line.lower()
|
||||
for line in lines[-10:])
|
||||
for line in lines[-10:]) or any(
|
||||
phrase in last_500 for phrase in [
|
||||
'in conclusion', 'to summarize', 'in summary', 'bottom line',
|
||||
'key takeaways', 'remember that', "as we've seen", 'wrapping up',
|
||||
'final thoughts', 'to conclude', 'in short', 'overall'
|
||||
])
|
||||
has_cta = any('call to action' in line.lower() or 'learn more' in line.lower()
|
||||
for line in lines)
|
||||
|
||||
@@ -187,7 +232,7 @@ class BlogContentSEOAnalyzer:
|
||||
'has_conclusion': has_conclusion,
|
||||
'has_call_to_action': has_cta,
|
||||
'structure_score': structure_score,
|
||||
'recommendations': self._get_structure_recommendations(sections, has_introduction, has_conclusion)
|
||||
'recommendations': self._get_structure_recommendations(sections, has_introduction, has_conclusion, content)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Content structure analysis failed: {e}")
|
||||
@@ -332,33 +377,36 @@ class BlogContentSEOAnalyzer:
|
||||
raise e
|
||||
|
||||
# Helper methods for calculations and scoring
|
||||
|
||||
@staticmethod
|
||||
def _sigmoid(x: float, midpoint: float = 0.0, steepness: float = 1.0) -> float:
|
||||
"""Sigmoid function for smooth scoring curves. Returns 0-1."""
|
||||
try:
|
||||
return 1.0 / (1.0 + math.exp(-steepness * (x - midpoint)))
|
||||
except OverflowError:
|
||||
return 0.0 if x < midpoint else 1.0
|
||||
|
||||
def _calculate_structure_score(self, sections: int, paragraphs: int, has_intro: bool, has_conclusion: bool) -> int:
|
||||
"""Calculate content structure score"""
|
||||
score = 0
|
||||
"""Calculate content structure score using continuous curves instead of rigid brackets.
|
||||
|
||||
# Section count (optimal: 3-8 sections)
|
||||
if 3 <= sections <= 8:
|
||||
score += 30
|
||||
elif sections < 3:
|
||||
score += 15
|
||||
else:
|
||||
score += 20
|
||||
Sections: optimal around 5, steep penalties below 3 or above 10.
|
||||
Paragraphs: optimal around 12, steep penalties below 5 or above 25.
|
||||
Intro/conclusion: binary bonuses.
|
||||
"""
|
||||
# Section score: peaks around 4-6, decays smoothly for low or high counts
|
||||
section_score = self._sigmoid(sections, midpoint=4, steepness=0.8) * 40
|
||||
if sections > 8:
|
||||
section_score = max(section_score * 0.7, 10)
|
||||
|
||||
# Paragraph count (optimal: 8-20 paragraphs)
|
||||
if 8 <= paragraphs <= 20:
|
||||
score += 30
|
||||
elif paragraphs < 8:
|
||||
score += 15
|
||||
else:
|
||||
score += 20
|
||||
# Paragraph score: peaks around 12, decays for low or high counts
|
||||
para_score = self._sigmoid(paragraphs, midpoint=10, steepness=0.3) * 40
|
||||
if paragraphs > 25:
|
||||
para_score = max(para_score * 0.6, 8)
|
||||
|
||||
# Introduction and conclusion
|
||||
if has_intro:
|
||||
score += 20
|
||||
if has_conclusion:
|
||||
score += 20
|
||||
intro_score = 10 if has_intro else 0
|
||||
conclusion_score = 10 if has_conclusion else 0
|
||||
|
||||
return min(score, 100)
|
||||
return int(min(max(section_score + para_score + intro_score + conclusion_score, 5), 100))
|
||||
|
||||
def _calculate_keyword_density(self, content: str, keyword: str) -> float:
|
||||
"""Calculate keyword density percentage"""
|
||||
@@ -397,21 +445,20 @@ class BlogContentSEOAnalyzer:
|
||||
return total_words / len(paragraphs)
|
||||
|
||||
def _calculate_readability_score(self, metrics: Dict[str, float]) -> int:
|
||||
"""Calculate overall readability score"""
|
||||
# Flesch Reading Ease (0-100, higher is better)
|
||||
flesch_score = metrics.get('flesch_reading_ease', 0)
|
||||
"""Calculate readability score using a continuous sigmoid curve on Flesch Reading Ease.
|
||||
|
||||
# Convert to 0-100 scale
|
||||
if flesch_score >= 80:
|
||||
return 90
|
||||
elif flesch_score >= 60:
|
||||
return 80
|
||||
elif flesch_score >= 40:
|
||||
return 70
|
||||
elif flesch_score >= 20:
|
||||
return 60
|
||||
else:
|
||||
return 50
|
||||
Maps Flesch 0-100 to a score that:
|
||||
- Below 30: 25-45 (hard to read)
|
||||
- 30-50: 45-65 (moderate)
|
||||
- 50-70: 65-85 (good range)
|
||||
- 70-90: 85-95 (excellent)
|
||||
- Above 90: 95-100 (very easy)
|
||||
"""
|
||||
flesch = metrics.get('flesch_reading_ease', 0)
|
||||
score = self._sigmoid(flesch, midpoint=50, steepness=0.06) * 70 + 25
|
||||
if flesch > 80:
|
||||
score = min(score + 5, 100)
|
||||
return int(min(max(score, 20), 100))
|
||||
|
||||
def _determine_target_audience(self, metrics: Dict[str, float]) -> str:
|
||||
"""Determine target audience based on readability metrics"""
|
||||
@@ -427,183 +474,228 @@ class BlogContentSEOAnalyzer:
|
||||
return "Graduate level"
|
||||
|
||||
def _calculate_content_depth_score(self, word_count: int, vocabulary_diversity: float) -> int:
|
||||
"""Calculate content depth score"""
|
||||
score = 0
|
||||
"""Calculate content depth score using continuous curves.
|
||||
|
||||
# Word count (optimal: 800-2000 words)
|
||||
if 800 <= word_count <= 2000:
|
||||
score += 50
|
||||
elif word_count < 800:
|
||||
score += 30
|
||||
else:
|
||||
score += 40
|
||||
Word count: sigmoid peaks around 1200, gentle decay for long content.
|
||||
Vocabulary diversity: sigmoid peaks around 0.55, decay for low or high diversity.
|
||||
"""
|
||||
# Word count score: optimal around 1000-1500, smooth decay below 500
|
||||
word_score = self._sigmoid(word_count, midpoint=800, steepness=0.005) * 55
|
||||
if word_count > 3000:
|
||||
word_score = min(word_score, 40)
|
||||
elif word_count < 300:
|
||||
word_score = min(word_score, 15)
|
||||
|
||||
# Vocabulary diversity (optimal: 0.4-0.7)
|
||||
if 0.4 <= vocabulary_diversity <= 0.7:
|
||||
score += 50
|
||||
elif vocabulary_diversity < 0.4:
|
||||
score += 30
|
||||
else:
|
||||
score += 40
|
||||
# Vocabulary diversity score: optimal around 0.5-0.65, too high is repetitive, too low is shallow
|
||||
diversity_score = self._sigmoid(vocabulary_diversity, midpoint=0.45, steepness=12) * 45
|
||||
if vocabulary_diversity < 0.3:
|
||||
diversity_score = min(diversity_score, 15)
|
||||
|
||||
return min(score, 100)
|
||||
return int(min(max(word_score + diversity_score, 5), 100))
|
||||
|
||||
def _calculate_flow_score(self, transition_count: int, word_count: int) -> int:
|
||||
"""Calculate content flow score"""
|
||||
"""Calculate content flow score using continuous curve.
|
||||
|
||||
Transition density is typically low (most content has 0.5-3 per 100 words
|
||||
of the specific transition words we track). The sigmoid midpoint is set at 1.0
|
||||
with moderate steepness to produce a reasonable spread.
|
||||
"""
|
||||
if word_count == 0:
|
||||
return 0
|
||||
return 15
|
||||
|
||||
transition_density = transition_count / (word_count / 100)
|
||||
|
||||
# Optimal transition density: 1-3 per 100 words
|
||||
if 1 <= transition_density <= 3:
|
||||
return 90
|
||||
elif transition_density < 1:
|
||||
return 60
|
||||
else:
|
||||
return 70
|
||||
# Sigmoid centered at 1.0 (decent density), moderate steepness
|
||||
score = self._sigmoid(transition_density, midpoint=1.0, steepness=2.5) * 50 + 40
|
||||
if transition_density > 5:
|
||||
score = max(score - 10, 35)
|
||||
return int(min(max(score, 15), 100))
|
||||
|
||||
def _calculate_heading_hierarchy_score(self, h1: List[str], h2: List[str], h3: List[str]) -> int:
|
||||
"""Calculate heading hierarchy score"""
|
||||
score = 0
|
||||
"""Calculate heading hierarchy score using continuous curves.
|
||||
|
||||
# Should have exactly 1 H1
|
||||
if len(h1) == 1:
|
||||
score += 40
|
||||
elif len(h1) == 0:
|
||||
score += 20
|
||||
H1: 1 is ideal, score decays for 0 or 2+.
|
||||
H2: 4-6 is ideal, score decays for low or high counts.
|
||||
H3: presence adds bonus.
|
||||
"""
|
||||
# H1 score: clear peak at 1
|
||||
h1_count = len(h1)
|
||||
if h1_count == 1:
|
||||
h1_score = 40
|
||||
elif h1_count == 0:
|
||||
h1_score = 15
|
||||
else:
|
||||
score += 10
|
||||
h1_score = max(40 // h1_count, 8)
|
||||
|
||||
# Should have 3-8 H2 headings
|
||||
if 3 <= len(h2) <= 8:
|
||||
score += 40
|
||||
elif len(h2) < 3:
|
||||
score += 20
|
||||
else:
|
||||
score += 30
|
||||
# H2 score: sigmoid peaks around 4-6
|
||||
h2_count = len(h2)
|
||||
h2_score = self._sigmoid(h2_count, midpoint=4, steepness=1.0) * 40
|
||||
if h2_count == 0:
|
||||
h2_score = 5
|
||||
elif h2_count > 10:
|
||||
h2_score = max(h2_score * 0.6, 10)
|
||||
|
||||
# H3 headings are optional but good for structure
|
||||
if len(h3) > 0:
|
||||
score += 20
|
||||
# H3 bonus: presence is good, diminishing returns
|
||||
h3_score = min(len(h3) * 5, 20)
|
||||
|
||||
return min(score, 100)
|
||||
return int(min(max(h1_score + h2_score + h3_score, 10), 100))
|
||||
|
||||
def _calculate_keyword_score(self, keyword_analysis: Dict[str, Any]) -> int:
|
||||
"""Calculate keyword optimization score"""
|
||||
score = 0
|
||||
"""Calculate keyword optimization score using continuous curves.
|
||||
|
||||
Density: sigmoid centered at 2%, smooth peak.
|
||||
Heading presence: binary bonus per keyword.
|
||||
Early occurrence: sigmoid bonus.
|
||||
Missing/over-optimization: smooth penalties.
|
||||
"""
|
||||
density_score = 0
|
||||
heading_bonus = 0
|
||||
early_bonus = 0
|
||||
|
||||
# Check keyword density (optimal: 1-3%)
|
||||
densities = keyword_analysis.get('keyword_density', {})
|
||||
for keyword, density in densities.items():
|
||||
if 1 <= density <= 3:
|
||||
score += 30
|
||||
elif density < 1:
|
||||
score += 15
|
||||
else:
|
||||
score += 10
|
||||
keyword_count = max(len(densities), 1)
|
||||
|
||||
# Check keyword distribution
|
||||
for keyword, density in densities.items():
|
||||
# Density score: smooth peak at 1-3%, sigmoid curve
|
||||
density_contribution = self._sigmoid(density, midpoint=2.0, steepness=2.0) * 30
|
||||
if density > 4:
|
||||
density_contribution *= 0.5 # penalty for over-optimization
|
||||
density_score += density_contribution
|
||||
|
||||
density_score = density_score / keyword_count
|
||||
|
||||
# Heading presence bonus
|
||||
distributions = keyword_analysis.get('keyword_distribution', {})
|
||||
for keyword, dist in distributions.items():
|
||||
if dist.get('in_headings', False):
|
||||
score += 20
|
||||
if dist.get('first_occurrence', -1) < 100: # Early occurrence
|
||||
score += 20
|
||||
heading_bonus += 15
|
||||
first_occ = dist.get('first_occurrence', -1)
|
||||
if isinstance(first_occ, (int, float)) and 0 <= first_occ < 150:
|
||||
early_bonus += int(self._sigmoid(first_occ, midpoint=75, steepness=-0.04) * 15)
|
||||
|
||||
# Penalize missing keywords
|
||||
missing = len(keyword_analysis.get('missing_keywords', []))
|
||||
score -= missing * 10
|
||||
# Penalize missing keywords and over-optimization
|
||||
missing_penalty = len(keyword_analysis.get('missing_keywords', [])) * 8
|
||||
over_opt_penalty = len(keyword_analysis.get('over_optimization', [])) * 12
|
||||
|
||||
# Penalize over-optimization
|
||||
over_opt = len(keyword_analysis.get('over_optimization', []))
|
||||
score -= over_opt * 15
|
||||
|
||||
return max(0, min(score, 100))
|
||||
raw = density_score + heading_bonus + early_bonus - missing_penalty - over_opt_penalty
|
||||
return int(min(max(raw, 5), 100))
|
||||
|
||||
def _calculate_weighted_score(self, scores: Dict[str, int]) -> int:
|
||||
"""Calculate weighted overall score"""
|
||||
"""Calculate weighted overall score.
|
||||
|
||||
AI insight engagement_score is unreliable (no ground truth) so it's excluded
|
||||
from the overall score. The remaining 5 categories are re-weighted to sum to 1.0.
|
||||
AI insights are still reported in category_scores for display but don't affect
|
||||
the overall score.
|
||||
"""
|
||||
weights = {
|
||||
'structure': 0.2,
|
||||
'structure': 0.20,
|
||||
'keywords': 0.25,
|
||||
'readability': 0.2,
|
||||
'quality': 0.15,
|
||||
'headings': 0.1,
|
||||
'ai_insights': 0.1
|
||||
'readability': 0.20,
|
||||
'quality': 0.20,
|
||||
'headings': 0.15,
|
||||
}
|
||||
|
||||
weighted_sum = sum(scores.get(key, 0) * weight for key, weight in weights.items())
|
||||
return int(weighted_sum)
|
||||
return int(min(max(weighted_sum, 0), 100))
|
||||
|
||||
# Recommendation methods
|
||||
def _get_structure_recommendations(self, sections: int, has_intro: bool, has_conclusion: bool) -> List[str]:
|
||||
"""Get structure recommendations"""
|
||||
def _get_structure_recommendations(self, sections: int, has_intro: bool, has_conclusion: bool, content: str = '') -> List[str]:
|
||||
"""Get structure recommendations based on actual content analysis"""
|
||||
recommendations = []
|
||||
|
||||
if sections < 3:
|
||||
recommendations.append("Add more sections to improve content structure")
|
||||
recommendations.append("Add more sections to improve content structure and topic coverage")
|
||||
elif sections > 8:
|
||||
recommendations.append("Consider combining some sections for better flow")
|
||||
recommendations.append("Consider combining some sections for better flow and readability")
|
||||
|
||||
if not has_intro:
|
||||
recommendations.append("Add an introduction section to set context")
|
||||
# More robust intro detection: check first 200 chars for first-person address,
|
||||
# question, or general hook — not just keyword matching
|
||||
first_200 = (content[:500] if content else '').lower()
|
||||
intro_indicators = any([
|
||||
has_intro,
|
||||
'?' in first_200[:200],
|
||||
any(phrase in first_200 for phrase in ['in this', 'this article', 'this guide', 'this post', 'we will', "you'll learn", "let's explore", "whether you're"]),
|
||||
first_200.strip().startswith('# '),
|
||||
])
|
||||
if not intro_indicators:
|
||||
recommendations.append("Add an introduction that hooks the reader and previews key topics")
|
||||
|
||||
if not has_conclusion:
|
||||
recommendations.append("Add a conclusion section to summarize key points")
|
||||
# More robust conclusion detection
|
||||
last_500 = (content[-500:] if content else '').lower()
|
||||
conclusion_indicators = any([
|
||||
has_conclusion,
|
||||
any(phrase in last_500 for phrase in ['in conclusion', 'to summarize', 'in summary', 'bottom line', 'key takeaways', 'remember that', 'as we\'ve seen']),
|
||||
])
|
||||
if not conclusion_indicators:
|
||||
recommendations.append("Add a conclusion to summarize key points and provide next steps")
|
||||
|
||||
return recommendations
|
||||
|
||||
def _get_readability_recommendations(self, metrics: Dict[str, float], avg_sentence_length: float) -> List[str]:
|
||||
"""Get readability recommendations"""
|
||||
"""Get readability recommendations with specific, actionable guidance"""
|
||||
recommendations = []
|
||||
|
||||
flesch_score = metrics.get('flesch_reading_ease', 0)
|
||||
|
||||
if flesch_score < 60:
|
||||
recommendations.append("Simplify language and use shorter sentences")
|
||||
if flesch_score < 30:
|
||||
recommendations.append("Content is very difficult to read — shorten sentences, use simpler words, and break up complex ideas")
|
||||
elif flesch_score < 50:
|
||||
recommendations.append("Content is fairly complex — consider simplifying some sentences and adding more plain-language explanations")
|
||||
|
||||
if avg_sentence_length > 20:
|
||||
recommendations.append("Break down long sentences for better readability")
|
||||
if avg_sentence_length > 25:
|
||||
recommendations.append(f"Average sentence length is {avg_sentence_length:.0f} words — aim for 15-20 words per sentence for better readability")
|
||||
elif avg_sentence_length > 20:
|
||||
recommendations.append("Some sentences may be too long — try breaking a few into shorter ones for easier reading")
|
||||
|
||||
if flesch_score > 80:
|
||||
recommendations.append("Consider adding more technical depth for expert audience")
|
||||
if flesch_score > 80 and flesch_score < 95:
|
||||
recommendations.append("Readability is very good — consider adding slightly more technical depth for expert credibility")
|
||||
|
||||
return recommendations
|
||||
|
||||
def _get_content_quality_recommendations(self, word_count: int, vocabulary_diversity: float, transition_count: int) -> List[str]:
|
||||
"""Get content quality recommendations"""
|
||||
"""Get content quality recommendations with specific, actionable guidance"""
|
||||
recommendations = []
|
||||
|
||||
if word_count < 800:
|
||||
recommendations.append("Expand content with more detailed explanations")
|
||||
elif word_count > 2000:
|
||||
recommendations.append("Consider breaking into multiple posts")
|
||||
if word_count < 400:
|
||||
recommendations.append("Content is significantly underdeveloped — expand with detailed explanations, examples, and supporting evidence")
|
||||
elif word_count < 800:
|
||||
recommendations.append("Content is thin — add depth with specific examples, data points, and detailed explanations for each section")
|
||||
elif word_count > 3000:
|
||||
recommendations.append("Content is very long — consider whether all sections are necessary or if some could be a separate post")
|
||||
|
||||
if vocabulary_diversity < 0.4:
|
||||
recommendations.append("Use more varied vocabulary to improve engagement")
|
||||
if vocabulary_diversity < 0.35:
|
||||
recommendations.append("Vocabulary is highly repetitive — use synonyms and varied phrasing to improve engagement")
|
||||
elif vocabulary_diversity < 0.45:
|
||||
recommendations.append("Vocabulary variety could be improved — try rephrasing repeated terms for more natural flow")
|
||||
|
||||
if transition_count < 3:
|
||||
recommendations.append("Add more transition words to improve flow")
|
||||
if transition_count < 2:
|
||||
recommendations.append("Very few transition words found — add connectors like 'however', 'therefore', 'furthermore' between ideas")
|
||||
elif transition_count < 5:
|
||||
recommendations.append("Add more transition words to improve the flow between paragraphs and sections")
|
||||
|
||||
return recommendations
|
||||
|
||||
def _get_heading_recommendations(self, h1: List[str], h2: List[str], h3: List[str]) -> List[str]:
|
||||
"""Get heading recommendations"""
|
||||
"""Get heading recommendations with specific, actionable guidance"""
|
||||
recommendations = []
|
||||
|
||||
if len(h1) == 0:
|
||||
recommendations.append("Add a main H1 heading")
|
||||
recommendations.append("Add a main H1 heading — this is the primary title for both readers and search engines")
|
||||
elif len(h1) > 1:
|
||||
recommendations.append("Use only one H1 heading per post")
|
||||
recommendations.append(f"Found {len(h1)} H1 headings — use only one H1 per post for clarity. Convert extras to H2.")
|
||||
|
||||
if len(h2) < 3:
|
||||
recommendations.append("Add more H2 headings to structure content")
|
||||
elif len(h2) > 8:
|
||||
recommendations.append("Consider using H3 headings for better hierarchy")
|
||||
recommendations.append(f"Only {len(h2)} H2 headings found — add section headings to break up content and improve scanning")
|
||||
elif len(h2) > 10:
|
||||
recommendations.append(f"{len(h2)} H2 headings may be too many — consider using H3 subheadings within sections for better hierarchy")
|
||||
|
||||
if len(h2) >= 3 and len(h3) == 0 and len(h2) > 5:
|
||||
recommendations.append("Consider adding H3 subheadings within longer H2 sections for better content hierarchy")
|
||||
|
||||
return recommendations
|
||||
|
||||
async def _run_ai_analysis(self, blog_content: str, keywords_data: Dict[str, Any], non_ai_results: Dict[str, Any], user_id: str = None) -> Dict[str, Any]:
|
||||
async def _run_ai_analysis(self, blog_content: str, keywords_data: Dict[str, Any], non_ai_results: Dict[str, Any], user_id: str = None, outline: Optional[List[Dict[str, Any]]] = None, competitive_advantage: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Run single AI analysis for structured insights (provider-agnostic)"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for subscription checking. Please provide Clerk user ID.")
|
||||
@@ -612,7 +704,9 @@ class BlogContentSEOAnalyzer:
|
||||
context = {
|
||||
'blog_content': blog_content,
|
||||
'keywords_data': keywords_data,
|
||||
'non_ai_results': non_ai_results
|
||||
'non_ai_results': non_ai_results,
|
||||
'outline': outline or [],
|
||||
'competitive_advantage': competitive_advantage or '',
|
||||
}
|
||||
|
||||
# Create AI prompt for structured analysis
|
||||
@@ -624,10 +718,18 @@ class BlogContentSEOAnalyzer:
|
||||
"content_quality_insights": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"engagement_score": {"type": "number"},
|
||||
"value_proposition": {"type": "string"},
|
||||
"content_gaps": {"type": "array", "items": {"type": "string"}},
|
||||
"improvement_suggestions": {"type": "array", "items": {"type": "string"}}
|
||||
"improvement_suggestions": {"type": "array", "items": {"type": "string"}},
|
||||
"content_depth_indicators": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"has_specific_data_points": {"type": "boolean"},
|
||||
"has_examples_or_illustrations": {"type": "boolean"},
|
||||
"has_actionable_takeaways": {"type": "boolean"},
|
||||
"depth_assessment": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"seo_optimization_insights": {
|
||||
@@ -648,13 +750,12 @@ class BlogContentSEOAnalyzer:
|
||||
"ux_improvements": {"type": "array", "items": {"type": "string"}}
|
||||
}
|
||||
},
|
||||
"competitive_analysis": {
|
||||
"content_strengths": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content_differentiation": {"type": "string"},
|
||||
"unique_value": {"type": "string"},
|
||||
"competitive_advantages": {"type": "array", "items": {"type": "string"}},
|
||||
"market_positioning": {"type": "string"}
|
||||
"strongest_sections": {"type": "array", "items": {"type": "string"}},
|
||||
"unique_value_points": {"type": "array", "items": {"type": "string"}},
|
||||
"reader_value_assessment": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -675,35 +776,83 @@ class BlogContentSEOAnalyzer:
|
||||
raise e
|
||||
|
||||
def _create_ai_analysis_prompt(self, context: Dict[str, Any]) -> str:
|
||||
"""Create AI analysis prompt"""
|
||||
"""Create AI analysis prompt with research context and outline awareness"""
|
||||
blog_content = context['blog_content']
|
||||
keywords_data = context['keywords_data']
|
||||
non_ai_results = context['non_ai_results']
|
||||
outline = context.get('outline', [])
|
||||
competitive_advantage = context.get('competitive_advantage', '')
|
||||
|
||||
# Build outline context
|
||||
outline_text = ""
|
||||
if outline:
|
||||
section_names = []
|
||||
for sec in outline[:8]:
|
||||
heading = sec.get('heading', '') if isinstance(sec, dict) else getattr(sec, 'heading', '')
|
||||
subheadings = sec.get('subheadings', []) if isinstance(sec, dict) else getattr(sec, 'subheadings', [])
|
||||
sub_text = f" (subtopics: {', '.join(subheadings[:4])})" if subheadings else ""
|
||||
target_words = sec.get('target_words', '') if isinstance(sec, dict) else getattr(sec, 'target_words', '')
|
||||
word_text = f" [~{target_words} words]" if target_words else ""
|
||||
section_names.append(f" - {heading}{sub_text}{word_text}")
|
||||
outline_text = "\n".join(section_names)
|
||||
|
||||
# Build research context block
|
||||
research_block = ""
|
||||
content_gaps = keywords_data.get('content_gaps', [])
|
||||
competitive_advantages = keywords_data.get('competitive_advantages', [])
|
||||
search_queries = keywords_data.get('search_queries', [])
|
||||
suggested_angles = keywords_data.get('suggested_angles', [])
|
||||
industry_leaders = keywords_data.get('industry_leaders', [])
|
||||
|
||||
if content_gaps:
|
||||
research_block += f"\nCONTENT GAPS (from competitor analysis): {', '.join(content_gaps[:5])}"
|
||||
if competitive_advantages:
|
||||
research_block += f"\nOUR COMPETITIVE ADVANTAGES: {', '.join(competitive_advantages[:3])}"
|
||||
if competitive_advantage:
|
||||
research_block += f"\nFOCUSED COMPETITIVE ADVANTAGE: {competitive_advantage}"
|
||||
if search_queries:
|
||||
research_block += f"\nORIGINAL SEARCH QUERIES: {', '.join(search_queries[:5])}"
|
||||
if suggested_angles:
|
||||
research_block += f"\nPLANNED CONTENT ANGLES: {', '.join(suggested_angles[:3])}"
|
||||
if industry_leaders:
|
||||
research_block += f"\nINDUSTRY LEADERS: {', '.join(industry_leaders[:3])}"
|
||||
|
||||
prompt = f"""
|
||||
Analyze this blog content for SEO optimization and user experience. Provide structured insights based on the content and keyword data.
|
||||
Analyze this blog content for SEO optimization and user experience. Provide structured insights based ONLY on what is actually present in the content and keyword data. Do NOT fabricate data, statistics, competitor names, or case studies that are not in the content.
|
||||
|
||||
BLOG CONTENT:
|
||||
{blog_content[:2000]}...
|
||||
{blog_content[:3000]}...
|
||||
|
||||
KEYWORDS DATA:
|
||||
Primary Keywords: {keywords_data.get('primary', [])}
|
||||
Long-tail Keywords: {keywords_data.get('long_tail', [])}
|
||||
Semantic Keywords: {keywords_data.get('semantic', [])}
|
||||
Search Intent: {keywords_data.get('search_intent', 'informational')}
|
||||
Search Intent: {keywords_data.get('search_intent', 'informational')}{research_block}
|
||||
|
||||
NON-AI ANALYSIS RESULTS:
|
||||
Structure Score: {non_ai_results.get('content_structure', {}).get('structure_score', 0)}
|
||||
Readability Score: {non_ai_results.get('readability_analysis', {}).get('readability_score', 0)}
|
||||
Content Quality Score: {non_ai_results.get('content_quality', {}).get('content_depth_score', 0)}
|
||||
MEASURED ANALYSIS RESULTS:
|
||||
Structure Score: {non_ai_results.get('content_structure', {}).get('structure_score', 0)}/100
|
||||
Readability Score: {non_ai_results.get('readability_analysis', {}).get('readability_score', 0)}/100
|
||||
Content Quality Score: {non_ai_results.get('content_quality', {}).get('content_depth_score', 0)}/100
|
||||
Heading Hierarchy Score: {non_ai_results.get('heading_structure', {}).get('heading_hierarchy_score', 0)}/100
|
||||
Word Count: {non_ai_results.get('content_quality', {}).get('word_count', 0)}
|
||||
Sections: {non_ai_results.get('content_structure', {}).get('total_sections', 0)}
|
||||
Has Introduction: {non_ai_results.get('content_structure', {}).get('has_introduction', False)}
|
||||
Has Conclusion: {non_ai_results.get('content_structure', {}).get('has_conclusion', False)}{f"""
|
||||
|
||||
Please provide:
|
||||
1. Content Quality Insights: Assess engagement potential, value proposition, content gaps, and improvement suggestions
|
||||
2. SEO Optimization Insights: Evaluate keyword optimization, content relevance, search intent alignment, and SEO improvements
|
||||
3. User Experience Insights: Analyze content flow, readability, engagement factors, and UX improvements
|
||||
4. Competitive Analysis: Identify content differentiation, unique value, competitive advantages, and market positioning
|
||||
PLANNED OUTLINE STRUCTURE:
|
||||
{outline_text}""" if outline_text else ""}
|
||||
{f"""
|
||||
|
||||
Focus on actionable insights that can improve the blog's performance and user engagement.
|
||||
FOCUSED ADVANTAGE: {competitive_advantage}""" if competitive_advantage else ""}
|
||||
|
||||
IMPORTANT: SEO metadata (title tag, meta description, Open Graph tags, Twitter cards, JSON-LD schema) will be generated in a separate step. Do NOT recommend adding or improving meta descriptions, title tags, OG tags, or structured data markup — focus only on content-level improvements.
|
||||
|
||||
Provide:
|
||||
1. Content Quality Insights: Assess the value proposition based on actual content. Identify specific content gaps (what TOPICS from the planned outline or competitor analysis are missing; do NOT suggest adding case studies unless the content references specific studies). Suggest improvements grounded in what the content currently covers.
|
||||
2. Content Depth Indicators: Objectively assess whether the content contains specific data points, examples, or actionable takeaways. These are binary assessments based on what's actually in the text.
|
||||
3. SEO Optimization Insights: Evaluate keyword optimization based on the provided keyword data. Assess content relevance and search intent alignment relative to the original search queries.
|
||||
4. User Experience Insights: Analyze content flow and readability. Identify engagement factors present in the text.
|
||||
5. Content Strengths: Identify the strongest sections of the content by heading name. Note unique value points the content provides. Do NOT invent competitive advantages — only describe what makes THIS content valuable based on the competitive advantages and content gaps listed above.
|
||||
"""
|
||||
|
||||
return prompt
|
||||
@@ -719,13 +868,28 @@ class BlogContentSEOAnalyzer:
|
||||
raise ValueError("AI insights are missing")
|
||||
|
||||
# Calculate category scores
|
||||
# Compute ai_depth_score from measurable content_depth_indicators instead of
|
||||
# hallucinated engagement_score. If depth_indicators are present, score based on
|
||||
# boolean flags; otherwise default to 50 (neutral).
|
||||
ai_quality = ai_insights.get('content_quality_insights', {})
|
||||
depth_indicators = ai_quality.get('content_depth_indicators', {})
|
||||
if depth_indicators:
|
||||
depth_flags = [
|
||||
depth_indicators.get('has_specific_data_points', False),
|
||||
depth_indicators.get('has_examples_or_illustrations', False),
|
||||
depth_indicators.get('has_actionable_takeaways', False),
|
||||
]
|
||||
depth_score = 40 + (sum(depth_flags) * 20) # 40 baseline + 20 per true flag = 40-100
|
||||
else:
|
||||
depth_score = 50
|
||||
|
||||
category_scores = {
|
||||
'structure': non_ai_results.get('content_structure', {}).get('structure_score', 0),
|
||||
'keywords': self._calculate_keyword_score(non_ai_results.get('keyword_analysis', {})),
|
||||
'readability': non_ai_results.get('readability_analysis', {}).get('readability_score', 0),
|
||||
'quality': non_ai_results.get('content_quality', {}).get('content_depth_score', 0),
|
||||
'headings': non_ai_results.get('heading_structure', {}).get('heading_hierarchy_score', 0),
|
||||
'ai_insights': ai_insights.get('content_quality_insights', {}).get('engagement_score', 0)
|
||||
'ai_insights': depth_score
|
||||
}
|
||||
|
||||
# Calculate overall score
|
||||
@@ -758,6 +922,14 @@ class BlogContentSEOAnalyzer:
|
||||
"""Compile actionable recommendations from all sources"""
|
||||
recommendations = []
|
||||
|
||||
# Metadata-related keywords to filter out (handled by metadata generator)
|
||||
metadata_keywords = ['meta description', 'title tag', 'og tag', 'open graph',
|
||||
'twitter card', 'json-ld', 'schema markup', 'structured data markup']
|
||||
|
||||
def _is_metadata_rec(rec_text: str) -> bool:
|
||||
rec_lower = rec_text.lower()
|
||||
return any(kw in rec_lower for kw in metadata_keywords)
|
||||
|
||||
# Structure recommendations
|
||||
structure_recs = non_ai_results.get('content_structure', {}).get('recommendations', [])
|
||||
for rec in structure_recs:
|
||||
@@ -788,9 +960,10 @@ class BlogContentSEOAnalyzer:
|
||||
'impact': 'Improves user engagement and comprehension'
|
||||
})
|
||||
|
||||
# AI insights recommendations
|
||||
# AI insights recommendations (filter out metadata-related recs)
|
||||
ai_recs = ai_insights.get('content_quality_insights', {}).get('improvement_suggestions', [])
|
||||
for rec in ai_recs:
|
||||
if not _is_metadata_rec(rec):
|
||||
recommendations.append({
|
||||
'category': 'Content Quality',
|
||||
'priority': 'Medium',
|
||||
@@ -798,6 +971,28 @@ class BlogContentSEOAnalyzer:
|
||||
'impact': 'Enhances content value and engagement'
|
||||
})
|
||||
|
||||
# SEO improvement recommendations (filter metadata recs)
|
||||
seo_recs = ai_insights.get('seo_optimization_insights', {}).get('seo_improvements', [])
|
||||
for rec in seo_recs:
|
||||
if not _is_metadata_rec(rec):
|
||||
recommendations.append({
|
||||
'category': 'SEO',
|
||||
'priority': 'Medium',
|
||||
'recommendation': rec,
|
||||
'impact': 'Improves search engine optimization'
|
||||
})
|
||||
|
||||
# Content strengths as informational (lower priority)
|
||||
content_strengths = ai_insights.get('content_strengths', {})
|
||||
strong_sections = content_strengths.get('strongest_sections', [])
|
||||
if strong_sections:
|
||||
recommendations.append({
|
||||
'category': 'Strengths',
|
||||
'priority': 'Low',
|
||||
'recommendation': f"Strongest sections: {', '.join(strong_sections[:3])}. Consider expanding these areas further.",
|
||||
'impact': 'Leverages existing content strengths'
|
||||
})
|
||||
|
||||
return recommendations
|
||||
|
||||
def _create_visualization_data(self, category_scores: Dict[str, int], non_ai_results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
@@ -851,7 +1046,7 @@ class BlogContentSEOAnalyzer:
|
||||
'weakest_category': weakest_category[0],
|
||||
'key_strengths': self._identify_key_strengths(category_scores),
|
||||
'key_weaknesses': self._identify_key_weaknesses(category_scores),
|
||||
'ai_summary': ai_insights.get('content_quality_insights', {}).get('value_proposition', '')
|
||||
'ai_summary': ai_insights.get('content_quality_insights', {}).get('value_proposition', 'Content analysis completed.')
|
||||
}
|
||||
|
||||
def _identify_key_strengths(self, category_scores: Dict[str, int]) -> List[str]:
|
||||
|
||||
@@ -84,14 +84,14 @@ class BlogSEOMetadataGenerator:
|
||||
raise e
|
||||
|
||||
def _extract_keywords_from_research(self, research_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract keywords and context from research data"""
|
||||
"""Extract keywords and context from research data, including competitor analysis and content gaps."""
|
||||
try:
|
||||
keyword_analysis = research_data.get('keyword_analysis', {})
|
||||
|
||||
# Handle both 'semantic' and 'semantic_keywords' field names
|
||||
semantic_keywords = keyword_analysis.get('semantic', []) or keyword_analysis.get('semantic_keywords', [])
|
||||
|
||||
return {
|
||||
result = {
|
||||
'primary_keywords': keyword_analysis.get('primary', []),
|
||||
'long_tail_keywords': keyword_analysis.get('long_tail', []),
|
||||
'semantic_keywords': semantic_keywords,
|
||||
@@ -100,6 +100,30 @@ class BlogSEOMetadataGenerator:
|
||||
'target_audience': research_data.get('target_audience', 'general'),
|
||||
'industry': research_data.get('industry', 'general')
|
||||
}
|
||||
|
||||
# Extract competitor analysis context
|
||||
competitor_analysis = research_data.get('competitor_analysis', {})
|
||||
if competitor_analysis:
|
||||
result['content_gaps'] = competitor_analysis.get('content_gaps', [])
|
||||
result['industry_leaders'] = competitor_analysis.get('industry_leaders', [])
|
||||
result['opportunities'] = competitor_analysis.get('opportunities', [])
|
||||
result['competitive_advantages'] = competitor_analysis.get('competitive_advantages', [])
|
||||
else:
|
||||
result['content_gaps'] = []
|
||||
result['industry_leaders'] = []
|
||||
result['opportunities'] = []
|
||||
result['competitive_advantages'] = []
|
||||
|
||||
# Extract search queries
|
||||
search_queries = research_data.get('search_queries', [])
|
||||
result['search_queries'] = search_queries if isinstance(search_queries, list) else []
|
||||
|
||||
# Extract suggested angles
|
||||
suggested_angles = research_data.get('suggested_angles', [])
|
||||
result['suggested_angles'] = suggested_angles if isinstance(suggested_angles, list) else []
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract keywords from research: {e}")
|
||||
return {
|
||||
@@ -109,7 +133,13 @@ class BlogSEOMetadataGenerator:
|
||||
'all_keywords': [],
|
||||
'search_intent': 'informational',
|
||||
'target_audience': 'general',
|
||||
'industry': 'general'
|
||||
'industry': 'general',
|
||||
'content_gaps': [],
|
||||
'industry_leaders': [],
|
||||
'opportunities': [],
|
||||
'competitive_advantages': [],
|
||||
'search_queries': [],
|
||||
'suggested_angles': []
|
||||
}
|
||||
|
||||
async def _generate_core_metadata(
|
||||
@@ -194,18 +224,20 @@ class BlogSEOMetadataGenerator:
|
||||
# Check if we got a valid response
|
||||
if not ai_response or not isinstance(ai_response, dict):
|
||||
logger.error("Core metadata generation failed: Invalid response from LLM")
|
||||
# Return fallback response
|
||||
primary_keywords = ', '.join(keywords_data.get('primary_keywords', ['content']))
|
||||
# Return fallback response using content-derived values
|
||||
primary_kw = keywords_data.get('primary_keywords', ['content'])
|
||||
primary_kw_first = primary_kw[0] if primary_kw else 'content'
|
||||
word_count = len(blog_content.split())
|
||||
slug = re.sub(r'[^a-z0-9]+', '-', blog_title.lower())[:50].strip('-')
|
||||
return {
|
||||
'seo_title': blog_title,
|
||||
'meta_description': f'Learn about {primary_keywords.split(", ")[0] if primary_keywords else "this topic"}.',
|
||||
'url_slug': blog_title.lower().replace(' ', '-').replace(':', '').replace(',', '')[:50],
|
||||
'blog_tags': primary_keywords.split(', ') if primary_keywords else ['content'],
|
||||
'blog_categories': ['Content Marketing', 'Technology'],
|
||||
'social_hashtags': ['#content', '#marketing', '#technology'],
|
||||
'meta_description': f'Discover insights about {primary_kw_first}. Comprehensive guide with practical tips and expert analysis.',
|
||||
'url_slug': slug,
|
||||
'blog_tags': primary_kw[:5] if isinstance(primary_kw, list) else [primary_kw_first],
|
||||
'blog_categories': [primary_kw_first.title(), 'Guide'],
|
||||
'social_hashtags': [f'#{primary_kw_first.replace(" ", "")}', '#guide', '#tips'],
|
||||
'reading_time': max(1, word_count // 200),
|
||||
'focus_keyword': primary_keywords.split(', ')[0] if primary_keywords else 'content'
|
||||
'focus_keyword': primary_kw_first
|
||||
}
|
||||
|
||||
logger.info(f"Core metadata generation completed. Response keys: {list(ai_response.keys())}")
|
||||
@@ -302,36 +334,41 @@ class BlogSEOMetadataGenerator:
|
||||
# Check if we got a valid response
|
||||
if not ai_response or not isinstance(ai_response, dict) or not ai_response.get('open_graph') or not ai_response.get('twitter_card') or not ai_response.get('json_ld_schema'):
|
||||
logger.error("Social metadata generation failed: Invalid or empty response from LLM")
|
||||
# Return fallback response
|
||||
# Return fallback response using content-derived values
|
||||
primary_kw = keywords_data.get('primary_keywords', ['content'])
|
||||
primary_kw_first = primary_kw[0] if primary_kw else 'content'
|
||||
slug = re.sub(r'[^a-z0-9]+', '-', blog_title.lower())[:50].strip('-')
|
||||
word_count = len(blog_content.split())
|
||||
current_date = datetime.now().isoformat()
|
||||
return {
|
||||
'open_graph': {
|
||||
'title': blog_title,
|
||||
'description': f'Learn about {keywords_data.get("primary_keywords", ["this topic"])[0] if keywords_data.get("primary_keywords") else "this topic"}.',
|
||||
'image': 'https://example.com/image.jpg',
|
||||
'description': f'Discover insights about {primary_kw_first}. Comprehensive guide with practical tips.',
|
||||
'image': '',
|
||||
'type': 'article',
|
||||
'site_name': 'Your Website',
|
||||
'url': 'https://example.com/blog'
|
||||
'site_name': '',
|
||||
'url': f'https://example.com/blog/{slug}'
|
||||
},
|
||||
'twitter_card': {
|
||||
'card': 'summary_large_image',
|
||||
'title': blog_title,
|
||||
'description': f'Learn about {keywords_data.get("primary_keywords", ["this topic"])[0] if keywords_data.get("primary_keywords") else "this topic"}.',
|
||||
'image': 'https://example.com/image.jpg',
|
||||
'site': '@yourwebsite',
|
||||
'creator': '@author'
|
||||
'description': f'Explore our guide on {primary_kw_first}.',
|
||||
'image': '',
|
||||
'site': '',
|
||||
'creator': ''
|
||||
},
|
||||
'json_ld_schema': {
|
||||
'@context': 'https://schema.org',
|
||||
'@type': 'Article',
|
||||
'headline': blog_title,
|
||||
'description': f'Learn about {keywords_data.get("primary_keywords", ["this topic"])[0] if keywords_data.get("primary_keywords") else "this topic"}.',
|
||||
'author': {'@type': 'Person', 'name': 'Author Name'},
|
||||
'publisher': {'@type': 'Organization', 'name': 'Your Website'},
|
||||
'datePublished': '2025-01-01T00:00:00Z',
|
||||
'dateModified': '2025-01-01T00:00:00Z',
|
||||
'mainEntityOfPage': 'https://example.com/blog',
|
||||
'keywords': keywords_data.get('primary_keywords', ['content']),
|
||||
'wordCount': len(blog_content.split())
|
||||
'description': f'Comprehensive guide about {primary_kw_first}.',
|
||||
'author': {'@type': 'Person', 'name': ''},
|
||||
'publisher': {'@type': 'Organization', 'name': ''},
|
||||
'datePublished': current_date,
|
||||
'dateModified': current_date,
|
||||
'mainEntityOfPage': f'https://example.com/blog/{slug}',
|
||||
'keywords': primary_kw[:5] if isinstance(primary_kw, list) else [primary_kw_first],
|
||||
'wordCount': word_count
|
||||
}
|
||||
}
|
||||
|
||||
@@ -408,21 +445,53 @@ OUTLINE STRUCTURE:
|
||||
- Content hierarchy: Well-structured with {len(outline)} main sections
|
||||
"""
|
||||
|
||||
# Extract SEO analysis insights
|
||||
# Extract SEO analysis insights with weakness-aware guidance
|
||||
seo_context = ""
|
||||
if seo_analysis:
|
||||
overall_score = seo_analysis.get('overall_score', seo_analysis.get('seo_score', 0))
|
||||
category_scores = seo_analysis.get('category_scores', {})
|
||||
applied_recs = seo_analysis.get('applied_recommendations', [])
|
||||
applied_recs = seo_analysis.get('applied_recommendations') or []
|
||||
|
||||
# Build weakness-specific guidance for metadata
|
||||
weakness_guidance = []
|
||||
kw_score = category_scores.get('keywords', category_scores.get('Keywords', 0))
|
||||
if kw_score < 70:
|
||||
weakness_guidance.append("Keyword optimization is weak — ensure title and description prominently feature primary keywords")
|
||||
read_score = category_scores.get('readability', category_scores.get('Readability', 0))
|
||||
if read_score < 70:
|
||||
weakness_guidance.append("Readability needs improvement — use clear, accessible language in the meta description")
|
||||
struct_score = category_scores.get('structure', category_scores.get('Structure', 0))
|
||||
if struct_score < 70:
|
||||
weakness_guidance.append("Content structure needs improvement — the title should clearly signal the content structure")
|
||||
|
||||
seo_context = f"""
|
||||
SEO ANALYSIS RESULTS:
|
||||
- Overall SEO Score: {overall_score}/100
|
||||
- Category Scores: Structure {category_scores.get('structure', category_scores.get('Structure', 0))}, Keywords {category_scores.get('keywords', category_scores.get('Keywords', 0))}, Readability {category_scores.get('readability', category_scores.get('Readability', 0))}
|
||||
- Category Scores: Structure {struct_score}, Keywords {kw_score}, Readability {read_score}
|
||||
- Applied Recommendations: {len(applied_recs)} SEO optimizations have been applied
|
||||
- Content Quality: Optimized for search engines with keyword focus
|
||||
{f"- WEAKNESS GUIDANCE: {'; '.join(weakness_guidance)}" if weakness_guidance else ""}
|
||||
"""
|
||||
|
||||
# Build research context block
|
||||
research_block = ""
|
||||
content_gaps = keywords_data.get('content_gaps', [])
|
||||
competitive_advantages = keywords_data.get('competitive_advantages', [])
|
||||
search_queries = keywords_data.get('search_queries', [])
|
||||
suggested_angles = keywords_data.get('suggested_angles', [])
|
||||
industry_leaders = keywords_data.get('industry_leaders', [])
|
||||
|
||||
if content_gaps:
|
||||
research_block += f"\nCONTENT GAPS (from competitor analysis): {', '.join(content_gaps[:5])}"
|
||||
if competitive_advantages:
|
||||
research_block += f"\nOUR KEY DIFFERENTIATORS: {', '.join(competitive_advantages[:3])}"
|
||||
if search_queries:
|
||||
research_block += f"\nORIGINAL SEARCH QUERIES: {', '.join(search_queries[:5])}"
|
||||
if suggested_angles:
|
||||
research_block += f"\nCONTENT ANGLES: {', '.join(suggested_angles[:3])}"
|
||||
if industry_leaders:
|
||||
research_block += f"\nINDUSTRY LEADERS: {', '.join(industry_leaders[:3])}"
|
||||
|
||||
# Get more content context (key sections instead of just first 1000 chars)
|
||||
content_preview = self._extract_content_highlights(blog_content)
|
||||
|
||||
@@ -443,6 +512,7 @@ SEMANTIC KEYWORDS: {semantic_keywords}
|
||||
SEARCH INTENT: {search_intent}
|
||||
TARGET AUDIENCE: {target_audience}
|
||||
INDUSTRY: {industry}
|
||||
{research_block}
|
||||
|
||||
{seo_context}
|
||||
|
||||
@@ -525,6 +595,18 @@ Generate metadata that is personalized, compelling, and SEO-optimized.
|
||||
overall_score = seo_analysis.get('overall_score', seo_analysis.get('seo_score', 0))
|
||||
seo_context = f"\nSEO SCORE: {overall_score}/100 (optimized content)\n"
|
||||
|
||||
# Build research context for social metadata
|
||||
research_block = ""
|
||||
content_gaps = keywords_data.get('content_gaps', [])
|
||||
competitive_advantages = keywords_data.get('competitive_advantages', [])
|
||||
search_queries = keywords_data.get('search_queries', [])
|
||||
if content_gaps:
|
||||
research_block += f"\nCONTENT GAPS: {', '.join(content_gaps[:3])}"
|
||||
if competitive_advantages:
|
||||
research_block += f"\nDIFFERENTIATORS: {', '.join(competitive_advantages[:3])}"
|
||||
if search_queries:
|
||||
research_block += f"\nSEARCH QUERIES: {', '.join(search_queries[:4])}"
|
||||
|
||||
content_preview = self._extract_content_highlights(blog_content, 1500)
|
||||
|
||||
prompt = f"""
|
||||
@@ -539,6 +621,7 @@ KEYWORDS: {primary_keywords}
|
||||
TARGET AUDIENCE: {target_audience}
|
||||
INDUSTRY: {industry}
|
||||
CURRENT DATE: {current_date}
|
||||
{research_block}
|
||||
|
||||
=== GENERATION REQUIREMENTS ===
|
||||
|
||||
@@ -555,16 +638,16 @@ CURRENT DATE: {current_date}
|
||||
- title: 70 chars max, optimized for Twitter audience
|
||||
- description: 200 chars max with relevant hashtags inline
|
||||
- image: Match Open Graph image
|
||||
- site: @yourwebsite (placeholder, user should update)
|
||||
- creator: @author (placeholder, user should update)
|
||||
- site: Leave empty string (user will add their Twitter handle)
|
||||
- creator: Leave empty string (user will add author Twitter handle)
|
||||
|
||||
3. JSON-LD SCHEMA (Article):
|
||||
- @context: "https://schema.org"
|
||||
- @type: "Article"
|
||||
- headline: Article title (optimized)
|
||||
- description: Article description (150-200 chars)
|
||||
- author: {{"@type": "Person", "name": "Author Name"}} (placeholder)
|
||||
- publisher: {{"@type": "Organization", "name": "Site Name", "logo": {{"@type": "ImageObject", "url": "logo-url"}}}}
|
||||
- author: {{"@type": "Person", "name": ""}} (leave empty, user will add author name)
|
||||
- publisher: {{"@type": "Organization", "name": ""}} (leave empty, user will add site name)
|
||||
- datePublished: {current_date}
|
||||
- dateModified: {current_date}
|
||||
- mainEntityOfPage: {{"@type": "WebPage", "@id": "canonical-url"}}
|
||||
@@ -633,35 +716,109 @@ Make it engaging, personalized for {target_audience}, and optimized for {industr
|
||||
raise e
|
||||
|
||||
def _calculate_optimization_score(self, core_metadata: Dict[str, Any], social_metadata: Dict[str, Any]) -> int:
|
||||
"""Calculate overall optimization score for the generated metadata"""
|
||||
"""Calculate metadata quality score based on content relevance and adherence to best practices.
|
||||
|
||||
Unlike the old completeness-based score (which just checked field existence),
|
||||
this assigns quality-weighted points based on how well each field is optimized.
|
||||
"""
|
||||
try:
|
||||
score = 0
|
||||
|
||||
# Check core metadata completeness
|
||||
if core_metadata.get('seo_title'):
|
||||
# Title quality (0-15): Length in 50-60 chars is optimal
|
||||
seo_title = core_metadata.get('seo_title', '')
|
||||
if seo_title:
|
||||
title_len = len(seo_title)
|
||||
if 50 <= title_len <= 60:
|
||||
score += 15
|
||||
if core_metadata.get('meta_description'):
|
||||
score += 15
|
||||
if core_metadata.get('url_slug'):
|
||||
elif 40 <= title_len <= 70:
|
||||
score += 10
|
||||
if core_metadata.get('blog_tags'):
|
||||
score += 10
|
||||
if core_metadata.get('blog_categories'):
|
||||
score += 10
|
||||
if core_metadata.get('social_hashtags'):
|
||||
score += 10
|
||||
if core_metadata.get('focus_keyword'):
|
||||
score += 10
|
||||
|
||||
# Check social metadata completeness
|
||||
if social_metadata.get('open_graph'):
|
||||
score += 10
|
||||
if social_metadata.get('twitter_card'):
|
||||
score += 5
|
||||
if social_metadata.get('json_ld_schema'):
|
||||
elif title_len > 0:
|
||||
score += 5
|
||||
|
||||
return min(score, 100) # Cap at 100
|
||||
# Meta description quality (0-15): Length in 150-160 chars is optimal, has CTA
|
||||
meta_desc = core_metadata.get('meta_description', '')
|
||||
if meta_desc:
|
||||
desc_len = len(meta_desc)
|
||||
desc_lower = meta_desc.lower()
|
||||
has_cta = any(phrase in desc_lower for phrase in ['learn', 'discover', 'find', 'get', 'explore', 'how to', 'why', 'tips', 'guide', 'try', 'start'])
|
||||
if 150 <= desc_len <= 160 and has_cta:
|
||||
score += 15
|
||||
elif 120 <= desc_len <= 170:
|
||||
score += 10 if has_cta else 7
|
||||
elif desc_len > 0:
|
||||
score += 4
|
||||
|
||||
# URL slug quality (0-10): Short, keyword-rich, no stop words
|
||||
url_slug = core_metadata.get('url_slug', '')
|
||||
if url_slug:
|
||||
slug_parts = url_slug.strip('/').split('/')
|
||||
slug_words = slug_parts[-1].split('-') if slug_parts else []
|
||||
if 2 <= len(slug_words) <= 5:
|
||||
score += 10
|
||||
elif len(slug_words) > 0:
|
||||
score += 5
|
||||
|
||||
# Tags and categories quality (0-20)
|
||||
blog_tags = core_metadata.get('blog_tags', [])
|
||||
blog_categories = core_metadata.get('blog_categories', [])
|
||||
if blog_tags and len(blog_tags) >= 3:
|
||||
score += 10
|
||||
elif blog_tags:
|
||||
score += 5
|
||||
if blog_categories and len(blog_categories) >= 1:
|
||||
score += 10
|
||||
elif blog_categories:
|
||||
score += 5
|
||||
|
||||
# Social hashtags (0-10): Relevant and non-spammy
|
||||
social_hashtags = core_metadata.get('social_hashtags', [])
|
||||
if social_hashtags and 3 <= len(social_hashtags) <= 8:
|
||||
score += 10
|
||||
elif social_hashtags:
|
||||
score += 5
|
||||
|
||||
# Focus keyword (0-10): Present and relevant
|
||||
focus_keyword = core_metadata.get('focus_keyword', '')
|
||||
if focus_keyword and seo_title and focus_keyword.lower() in seo_title.lower():
|
||||
score += 10
|
||||
elif focus_keyword:
|
||||
score += 4
|
||||
|
||||
# Open Graph quality (0-10): Has title, description, correct type
|
||||
og = social_metadata.get('open_graph', {})
|
||||
if og:
|
||||
og_score = 0
|
||||
if og.get('title') and len(og.get('title', '')) > 10:
|
||||
og_score += 4
|
||||
if og.get('description') and 100 <= len(og.get('description', '')) <= 200:
|
||||
og_score += 4
|
||||
if og.get('type') == 'article':
|
||||
og_score += 2
|
||||
score += og_score
|
||||
|
||||
# Twitter Card quality (0-5)
|
||||
twitter = social_metadata.get('twitter_card', {})
|
||||
if twitter:
|
||||
tw_score = 0
|
||||
if twitter.get('title') and len(twitter.get('title', '')) > 10:
|
||||
tw_score += 3
|
||||
if twitter.get('card') == 'summary_large_image':
|
||||
tw_score += 2
|
||||
score += tw_score
|
||||
|
||||
# JSON-LD quality (0-5): Has headline, description, datePublished
|
||||
json_ld = social_metadata.get('json_ld_schema', {})
|
||||
if json_ld:
|
||||
jl_score = 0
|
||||
if json_ld.get('headline'):
|
||||
jl_score += 2
|
||||
if json_ld.get('description'):
|
||||
jl_score += 2
|
||||
if json_ld.get('datePublished'):
|
||||
jl_score += 1
|
||||
score += jl_score
|
||||
|
||||
return min(score, 100)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to calculate optimization score: {e}")
|
||||
|
||||
@@ -2,6 +2,13 @@
|
||||
|
||||
Applies actionable SEO recommendations to existing blog content using the
|
||||
provider-agnostic `llm_text_gen` dispatcher. Ensures GPT_PROVIDER parity.
|
||||
|
||||
Key design principles:
|
||||
- Make TARGETED edits, not full rewrites
|
||||
- Preserve existing content structure and factual claims
|
||||
- Only modify sections that have applicable recommendations
|
||||
- Never fabricate statistics, case studies, or citations
|
||||
- Ground changes in research sources when available
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
@@ -15,7 +22,7 @@ logger = get_service_logger("blog_seo_recommendation_applier")
|
||||
|
||||
|
||||
class BlogSEORecommendationApplier:
|
||||
"""Apply actionable SEO recommendations to blog content."""
|
||||
"""Apply actionable SEO recommendations to blog content with targeted edits."""
|
||||
|
||||
def __init__(self):
|
||||
logger.debug("Initialized BlogSEORecommendationApplier")
|
||||
@@ -27,6 +34,7 @@ class BlogSEORecommendationApplier:
|
||||
raise ValueError("user_id is required for subscription checking. Please provide Clerk user ID.")
|
||||
|
||||
title = payload.get("title", "Untitled Blog")
|
||||
introduction = payload.get("introduction") or ""
|
||||
sections: List[Dict[str, Any]] = payload.get("sections", [])
|
||||
outline = payload.get("outline", [])
|
||||
research = payload.get("research", {})
|
||||
@@ -34,6 +42,7 @@ class BlogSEORecommendationApplier:
|
||||
persona = payload.get("persona", {})
|
||||
tone = payload.get("tone")
|
||||
audience = payload.get("audience")
|
||||
competitive_advantage = payload.get("competitive_advantage", "")
|
||||
|
||||
if not sections:
|
||||
return {"success": False, "error": "No sections provided for recommendation application"}
|
||||
@@ -42,21 +51,28 @@ class BlogSEORecommendationApplier:
|
||||
logger.warning("apply_recommendations called without recommendations")
|
||||
return {"success": True, "title": title, "sections": sections, "applied": []}
|
||||
|
||||
# Determine which sections actually need changes based on recommendations
|
||||
sections_to_edit = self._identify_affected_sections(sections, recommendations)
|
||||
|
||||
prompt = self._build_prompt(
|
||||
title=title,
|
||||
introduction=introduction,
|
||||
sections=sections,
|
||||
sections_to_edit=sections_to_edit,
|
||||
outline=outline,
|
||||
research=research,
|
||||
recommendations=recommendations,
|
||||
persona=persona,
|
||||
tone=tone,
|
||||
audience=audience,
|
||||
competitive_advantage=competitive_advantage,
|
||||
)
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"introduction": {"type": "string"},
|
||||
"sections": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
@@ -84,14 +100,14 @@ class BlogSEORecommendationApplier:
|
||||
"required": ["sections"],
|
||||
}
|
||||
|
||||
logger.info("Applying SEO recommendations via llm_text_gen")
|
||||
logger.info("Applying SEO recommendations via llm_text_gen (targeted edit mode)")
|
||||
|
||||
result = await asyncio.to_thread(
|
||||
llm_text_gen,
|
||||
prompt,
|
||||
None,
|
||||
schema,
|
||||
user_id, # Pass user_id for subscription checking
|
||||
user_id,
|
||||
max_tokens=8192,
|
||||
)
|
||||
|
||||
@@ -103,7 +119,12 @@ class BlogSEORecommendationApplier:
|
||||
raw_sections = result.get("sections", []) or []
|
||||
normalized_sections: List[Dict[str, Any]] = []
|
||||
|
||||
# Build lookup table from updated sections using their identifiers
|
||||
if len(raw_sections) != len(sections):
|
||||
logger.warning(
|
||||
f"LLM returned {len(raw_sections)} sections but {len(sections)} were sent. "
|
||||
"Extra sections will be ignored; missing sections fall back to original content."
|
||||
)
|
||||
|
||||
updated_map: Dict[str, Dict[str, Any]] = {}
|
||||
for updated in raw_sections:
|
||||
section_id = str(
|
||||
@@ -146,7 +167,6 @@ class BlogSEORecommendationApplier:
|
||||
mapped = updated_map.get(fallback_id)
|
||||
|
||||
if not mapped and raw_sections:
|
||||
# Fall back to positional match if identifier lookup failed
|
||||
candidate = raw_sections[index] if index < len(raw_sections) else {}
|
||||
heading = (
|
||||
candidate.get("heading")
|
||||
@@ -166,7 +186,6 @@ class BlogSEORecommendationApplier:
|
||||
}
|
||||
|
||||
if not mapped:
|
||||
# Fallback to original content if nothing else available
|
||||
mapped = {
|
||||
"id": fallback_id,
|
||||
"heading": original.get("heading") or original.get("title") or f"Section {index + 1}",
|
||||
@@ -180,43 +199,147 @@ class BlogSEORecommendationApplier:
|
||||
|
||||
logger.info("SEO recommendations applied successfully")
|
||||
|
||||
updated_introduction = result.get("introduction") or ""
|
||||
if updated_introduction and updated_introduction != introduction:
|
||||
logger.info(f"Introduction updated: {len(updated_introduction)} chars")
|
||||
elif not updated_introduction:
|
||||
updated_introduction = introduction
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"title": result.get("title", title),
|
||||
"introduction": updated_introduction,
|
||||
"sections": normalized_sections,
|
||||
"applied": applied,
|
||||
}
|
||||
|
||||
def _identify_affected_sections(self, sections: List[Dict[str, Any]], recommendations: List[Dict[str, Any]]) -> List[str]:
|
||||
"""Identify which section IDs are likely affected by the recommendations.
|
||||
|
||||
Maps recommendation categories to section headings for targeted editing.
|
||||
Returns a list of section IDs that should be edited.
|
||||
"""
|
||||
affected_ids = set()
|
||||
|
||||
for rec in recommendations:
|
||||
category = (rec.get("category") or "").lower()
|
||||
rec_text = (rec.get("recommendation") or "").lower()
|
||||
|
||||
# Structure recommendations affect first/last sections or all sections
|
||||
if category == "structure":
|
||||
if sections:
|
||||
affected_ids.add(str(sections[0].get("id", "section_1")))
|
||||
affected_ids.add(str(sections[-1].get("id", f"section_{len(sections)}")))
|
||||
# "Add more sections" or "too many sections" affects all
|
||||
if "more section" in rec_text or "combine" in rec_text or "flow" in rec_text:
|
||||
for s in sections:
|
||||
affected_ids.add(str(s.get("id", "")))
|
||||
continue
|
||||
|
||||
# Keyword recommendations affect all sections (keywords should be spread)
|
||||
if category == "keywords":
|
||||
for s in sections:
|
||||
affected_ids.add(str(s.get("id", "")))
|
||||
continue
|
||||
|
||||
# Readability affects all sections
|
||||
if category == "readability":
|
||||
for s in sections:
|
||||
affected_ids.add(str(s.get("id", "")))
|
||||
continue
|
||||
|
||||
# Content quality — try to match recommendation to specific section headings
|
||||
if category in ("content quality", "content", "seo"):
|
||||
heading_keywords = {
|
||||
s.get("heading", "").lower(): str(s.get("id", ""))
|
||||
for s in sections
|
||||
}
|
||||
matched = False
|
||||
for heading_lower, section_id in heading_keywords.items():
|
||||
rec_words = rec_text.split()
|
||||
if any(word in heading_lower for word in rec_words if len(word) > 3):
|
||||
affected_ids.add(section_id)
|
||||
matched = True
|
||||
if not matched:
|
||||
# Affect first and last sections (intro/conclusion) as common targets
|
||||
if sections:
|
||||
affected_ids.add(str(sections[0].get("id", "section_1")))
|
||||
affected_ids.add(str(sections[-1].get("id", f"section_{len(sections)}")))
|
||||
|
||||
# Filter out empty IDs and return
|
||||
return [sid for sid in affected_ids if sid]
|
||||
|
||||
def _build_prompt(
|
||||
self,
|
||||
*,
|
||||
title: str,
|
||||
introduction: str,
|
||||
sections: List[Dict[str, Any]],
|
||||
sections_to_edit: List[str],
|
||||
outline: List[Dict[str, Any]],
|
||||
research: Dict[str, Any],
|
||||
recommendations: List[Dict[str, Any]],
|
||||
persona: Dict[str, Any],
|
||||
tone: str | None,
|
||||
audience: str | None,
|
||||
competitive_advantage: str = "",
|
||||
) -> str:
|
||||
"""Construct prompt for applying recommendations."""
|
||||
"""Construct prompt for applying targeted recommendations."""
|
||||
|
||||
sections_str = []
|
||||
# Build research context block
|
||||
research_block = ""
|
||||
keyword_analysis = research.get("keyword_analysis", {}) if research else {}
|
||||
primary_keywords = ", ".join(keyword_analysis.get("primary", [])[:8]) or "None"
|
||||
competitor_analysis = research.get("competitor_analysis", {}) if research else {}
|
||||
search_queries = research.get("search_queries", []) if research else []
|
||||
suggested_angles = research.get("suggested_angles", []) if research else []
|
||||
content_gaps = competitor_analysis.get("content_gaps", []) if competitor_analysis else []
|
||||
competitive_advantages = competitor_analysis.get("competitive_advantages", []) if competitor_analysis else []
|
||||
|
||||
research_block += f"\nPRIMARY KEYWORDS: {primary_keywords}"
|
||||
if content_gaps:
|
||||
research_block += f"\nCONTENT GAPS (address these in your edits): {', '.join(content_gaps[:5])}"
|
||||
if competitive_advantages:
|
||||
research_block += f"\nKEY DIFFERENTIATORS (emphasize these): {', '.join(competitive_advantages[:3])}"
|
||||
if competitive_advantage:
|
||||
research_block += f"\nPRIMARY ADVANTAGE: {competitive_advantage}"
|
||||
if search_queries:
|
||||
research_block += f"\nTARGET SEARCH QUERIES: {', '.join(search_queries[:5])}"
|
||||
if suggested_angles:
|
||||
research_block += f"\nCONTENT ANGLES: {', '.join(suggested_angles[:3])}"
|
||||
|
||||
# Build per-section content with edit markers
|
||||
sections_content = []
|
||||
for section in sections:
|
||||
sections_str.append(
|
||||
f"ID: {section.get('id', 'section')}, Heading: {section.get('heading', 'Untitled')}\n"
|
||||
f"Current Content:\n{section.get('content', '')}\n"
|
||||
)
|
||||
section_id = str(section.get("id", "section"))
|
||||
heading = section.get("heading", "Untitled")
|
||||
content = section.get("content", "")
|
||||
needs_edit = section_id in sections_to_edit
|
||||
|
||||
outline_str = "\n".join(
|
||||
[
|
||||
f"- {item.get('heading', 'Section')} (Target words: {item.get('target_words', 'N/A')})"
|
||||
for item in outline
|
||||
]
|
||||
)
|
||||
section_text = f"--- SECTION (ID: {section_id}, Heading: \"{heading}\")"
|
||||
if needs_edit:
|
||||
section_text += " [NEEDS EDITS based on recommendations]"
|
||||
else:
|
||||
section_text += " [KEEP AS-IS - no changes needed]"
|
||||
section_text += f" ---\n{content}\n"
|
||||
sections_content.append(section_text)
|
||||
|
||||
research_summary = research.get("keyword_analysis", {}) if research else {}
|
||||
primary_keywords = ", ".join(research_summary.get("primary", [])[:10]) or "None"
|
||||
sections_str = "\n\n".join(sections_content)
|
||||
|
||||
# Build outline with subheadings and key points
|
||||
outline_parts = []
|
||||
for item in outline:
|
||||
heading = item.get("heading", "Section")
|
||||
target_words = item.get("target_words", "N/A")
|
||||
subheadings = item.get("subheadings", [])
|
||||
key_points = item.get("key_points", [])
|
||||
line = f"- {heading} (Target: {target_words} words)"
|
||||
if subheadings:
|
||||
line += f" | Subheadings: {', '.join(subheadings[:4])}"
|
||||
if key_points:
|
||||
line += f" | Key points: {', '.join(key_points[:4])}"
|
||||
outline_parts.append(line)
|
||||
outline_str = "\n".join(outline_parts) if outline_parts else "No outline supplied"
|
||||
|
||||
recommendations_str = []
|
||||
for rec in recommendations:
|
||||
@@ -229,7 +352,7 @@ class BlogSEORecommendationApplier:
|
||||
persona_str = (
|
||||
f"Persona: {persona}\n"
|
||||
if persona
|
||||
else "Persona: (not provided)\n"
|
||||
else ""
|
||||
)
|
||||
|
||||
style_guidance = []
|
||||
@@ -239,36 +362,47 @@ class BlogSEORecommendationApplier:
|
||||
style_guidance.append(f"Target audience: {audience}")
|
||||
style_str = "\n".join(style_guidance) if style_guidance else "Maintain current tone and audience alignment."
|
||||
|
||||
prompt = f"""
|
||||
You are an expert SEO content strategist. Update the blog content to apply the actionable recommendations.
|
||||
intro_text = introduction if introduction else "(No introduction currently — write one ONLY if a recommendation specifically asks for it)"
|
||||
|
||||
Current Title: {title}
|
||||
prompt = f"""You are a careful SEO content editor making TARGETED edits to an existing blog post. Your job is to apply specific SEO recommendations with PRECISION — not to rewrite the entire post.
|
||||
|
||||
Primary Keywords (for context): {primary_keywords}
|
||||
CRITICAL RULES — YOU MUST FOLLOW THESE:
|
||||
1. PRESERVE existing content. Only make MINIMAL, targeted changes to address specific recommendations. Do NOT rewrite sections that are working well.
|
||||
2. NEVER fabricate statistics, case studies, expert quotes, research data, or specific numbers unless they are explicitly stated in the research context below.
|
||||
3. NEVER add content that contradicts or goes beyond what the research sources support.
|
||||
4. KEEP the same emotional tone and writing style as the original content.
|
||||
5. Return EXACTLY the same number of sections with EXACTLY the same IDs. Do NOT add, remove, or rename sections.
|
||||
6. For sections marked [KEEP AS-IS], return the content UNCHANGED — copy it verbatim.
|
||||
7. For sections marked [NEEDS EDITS], make ONLY the specific changes needed to address the applicable recommendations.
|
||||
8. Do NOT add introductions, conclusions, or case studies unless a recommendation EXPLICITLY asks for one.
|
||||
|
||||
Outline Overview:
|
||||
{outline_str or 'No outline supplied'}
|
||||
{research_block}
|
||||
|
||||
Existing Sections:
|
||||
{''.join(sections_str)}
|
||||
PLANNED OUTLINE STRUCTURE:
|
||||
{outline_str}
|
||||
|
||||
Actionable Recommendations to Apply:
|
||||
CURRENT TITLE: {title}
|
||||
|
||||
CURRENT INTRODUCTION:
|
||||
{intro_text}
|
||||
|
||||
CURRENT SECTIONS:
|
||||
{sections_str}
|
||||
|
||||
RECOMMENDATIONS TO APPLY:
|
||||
{''.join(recommendations_str)}
|
||||
{persona_str}{style_str}
|
||||
|
||||
{persona_str}
|
||||
{style_str}
|
||||
|
||||
Instructions:
|
||||
1. Carefully apply the recommendations while preserving factual accuracy and research alignment.
|
||||
2. Keep section identifiers (IDs) unchanged so the frontend can map updates correctly.
|
||||
3. Improve clarity, flow, and SEO optimization per the guidance.
|
||||
4. Return updated sections in the requested JSON format.
|
||||
5. Provide a short summary of which recommendations were addressed.
|
||||
INSTRUCTIONS:
|
||||
- For sections marked [KEEP AS-IS]: Copy the content EXACTLY as provided. Do not change a single word.
|
||||
- For sections marked [NEEDS EDITS]: Make the MINIMUM changes needed to address the recommendations. If a recommendation says "add transition words", add 2-3 transitions — do not rewrite the paragraph. If it says "use more varied vocabulary", replace 2-3 repetitive words — do not rewrite the section.
|
||||
- If a recommendation asks for an introduction and none exists, write a brief 2-3 sentence introduction that naturally leads into the first section. Do NOT fabricate hooks or statistics.
|
||||
- If a recommendation asks for a conclusion, append 2-3 sentences summarizing key takeaways to the LAST section. Do NOT fabricate conclusions that don't follow from the actual content.
|
||||
- Return ALL sections, including the ones you did NOT change.
|
||||
- Provide a summary of which recommendations you addressed and what specific changes you made.
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
__all__ = ["BlogSEORecommendationApplier"]
|
||||
|
||||
|
||||
|
||||
@@ -36,6 +36,8 @@ from models.podcast_models import PodcastProject
|
||||
from models.research_models import ResearchProject
|
||||
# Video Studio models
|
||||
from models.video_models import VideoGenerationTask
|
||||
# YouTube Creator task models
|
||||
from models.youtube_task_models import YouTubeVideoTask
|
||||
# Bing Analytics models
|
||||
from models.bing_analytics_models import Base as BingAnalyticsBase
|
||||
|
||||
|
||||
@@ -47,6 +47,10 @@ class GSCBrainstormService:
|
||||
if not site_url:
|
||||
sites = self.gsc_service.get_site_list(user_id)
|
||||
if not sites:
|
||||
logger.info(f"No GSC sites found for user {user_id} — falling back to AI-only brainstorm")
|
||||
fallback = self._generate_ai_only_brainstorm(user_id, keywords, None, None, None)
|
||||
if fallback:
|
||||
return fallback
|
||||
return {
|
||||
"error": "No GSC sites found. Make sure your site is verified in Google Search Console.",
|
||||
"content_opportunities": [],
|
||||
@@ -70,6 +74,10 @@ class GSCBrainstormService:
|
||||
)
|
||||
|
||||
if "error" in analytics:
|
||||
logger.info(f"GSC analytics error for user {user_id}: {analytics.get('error')} — falling back to AI-only brainstorm")
|
||||
fallback = self._generate_ai_only_brainstorm(user_id, keywords, site_url, start_date, end_date)
|
||||
if fallback:
|
||||
return fallback
|
||||
return {
|
||||
"error": analytics.get("error", "Failed to fetch GSC data"),
|
||||
"content_opportunities": [],
|
||||
@@ -88,6 +96,10 @@ class GSCBrainstormService:
|
||||
pages_data = self._parse_page_rows(page_rows)
|
||||
|
||||
if not keywords_data:
|
||||
logger.info(f"No GSC keyword data for user {user_id} — falling back to AI-only brainstorm")
|
||||
fallback = self._generate_ai_only_brainstorm(user_id, keywords, site_url, start_date, end_date)
|
||||
if fallback:
|
||||
return fallback
|
||||
return {
|
||||
"error": "No keyword data available for the selected period. This usually means your site is new to GSC or hasn't received search traffic yet.",
|
||||
"content_opportunities": [],
|
||||
@@ -110,6 +122,10 @@ class GSCBrainstormService:
|
||||
logger.info(f"After topic filter: {len(keywords_data)} keywords, {len(pages_data)} pages")
|
||||
|
||||
if not keywords_data:
|
||||
logger.info(f"No GSC keywords matched topic '{keywords}' for user {user_id} — falling back to AI-only brainstorm")
|
||||
fallback = self._generate_ai_only_brainstorm(user_id, keywords, site_url, start_date, end_date)
|
||||
if fallback:
|
||||
return fallback
|
||||
return {
|
||||
"error": "No GSC keywords matched your topic. Try a broader research topic or check your GSC data.",
|
||||
"content_opportunities": [],
|
||||
@@ -155,6 +171,128 @@ class GSCBrainstormService:
|
||||
"summary": summary,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# AI-only fallback (when GSC has no data)
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def _generate_ai_only_brainstorm(
|
||||
self,
|
||||
user_id: str,
|
||||
keywords: str,
|
||||
site_url: Optional[str],
|
||||
start_date: Optional[str],
|
||||
end_date: Optional[str],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Generate topic ideas using AI alone when GSC data is unavailable.
|
||||
Returns a brainstorm-shaped result with empty GSC-specific arrays
|
||||
but populated ai_recommendations.
|
||||
"""
|
||||
try:
|
||||
prompt = f"""You are an expert content strategist helping a blog writer brainstorm topic ideas.
|
||||
|
||||
The user is interested in writing about: "{keywords}"
|
||||
|
||||
Since they are a new or early-stage website, there is no Google Search Console data available yet.
|
||||
Generate compelling blog post ideas they can write RIGHT NOW to start building traffic.
|
||||
|
||||
For each suggestion include:
|
||||
1. A specific, compelling blog post TITLE (not a vague topic)
|
||||
2. The primary keyword it should target
|
||||
3. Why this topic will perform well (search demand, competition level, timing)
|
||||
4. The recommended content format (how-to, listicle, comparison, pillar page, etc.)
|
||||
5. Estimated difficulty level (Easy / Medium / Hard)
|
||||
|
||||
Return your response in this EXACT JSON format (no markdown, no code fences):
|
||||
{{
|
||||
"immediate_opportunities": [
|
||||
{{
|
||||
"title": "Specific Blog Post Title",
|
||||
"keyword": "primary target keyword",
|
||||
"reason": "Why this will perform well",
|
||||
"format": "How-To Guide | Listicle | Comparison | Pillar Page | etc.",
|
||||
"estimated_impact": "Beginner-friendly traffic opportunity"
|
||||
}}
|
||||
],
|
||||
"content_strategy": [
|
||||
{{
|
||||
"title": "Pillar Content Title",
|
||||
"keyword": "target keyword",
|
||||
"reason": "Strategic importance for building topical authority",
|
||||
"format": "Pillar Page | Ultimate Guide | Resource",
|
||||
"estimated_impact": "Foundation for long-term organic growth"
|
||||
}}
|
||||
],
|
||||
"long_term_strategy": [
|
||||
{{
|
||||
"title": "Authority Building Title",
|
||||
"keyword": "target keyword",
|
||||
"reason": "Establishes expertise and captures high-intent traffic over time",
|
||||
"format": "Research-Backed Analysis | Expert Roundup | Original Study",
|
||||
"estimated_impact": "Compound traffic growth over 6-12 months"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
IMPORTANT:
|
||||
- Provide 3-5 items in each category
|
||||
- All suggestions MUST relate to the user's interest in "{keywords}"
|
||||
- Titles should be specific, compelling, and SEO-aware
|
||||
- Prioritize topics with clear search intent and realistic ranking potential for a new site
|
||||
- Include a mix of easy wins (long-tail, low competition) and strategic pillar content
|
||||
- For estimated_impact, describe the opportunity type (not click numbers since we lack data)"""
|
||||
|
||||
system_prompt = (
|
||||
"You are an expert content strategist specializing in SEO and blog topic generation. "
|
||||
"You help new websites identify high-potential content topics even without search console data. "
|
||||
"You always respond with valid JSON matching the requested format exactly."
|
||||
)
|
||||
|
||||
result = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt=system_prompt,
|
||||
user_id=user_id,
|
||||
flow_type="gsc_brainstorm_fallback",
|
||||
)
|
||||
|
||||
if result:
|
||||
parsed = self._parse_ai_response(result)
|
||||
if parsed:
|
||||
return {
|
||||
"content_opportunities": [],
|
||||
"keyword_gaps": [],
|
||||
"quick_wins": [],
|
||||
"page_opportunities": [],
|
||||
"ai_recommendations": parsed,
|
||||
"summary": {
|
||||
"site_url": site_url or "",
|
||||
"date_range": {
|
||||
"start": start_date or "",
|
||||
"end": end_date or "",
|
||||
},
|
||||
"total_keywords_analyzed": 0,
|
||||
"total_impressions": 0,
|
||||
"total_clicks": 0,
|
||||
"avg_ctr": 0,
|
||||
"avg_position": 0,
|
||||
"ctr_vs_benchmark": 0,
|
||||
"health_score": 0,
|
||||
"keyword_distribution": {
|
||||
"positions_1_3": 0,
|
||||
"positions_4_10": 0,
|
||||
"positions_11_20": 0,
|
||||
"positions_21_plus": 0,
|
||||
},
|
||||
"top_keywords": [],
|
||||
"top_pages": [],
|
||||
"note": "AI-generated suggestions based on your topic. No GSC data was available — these are strategic recommendations, not data-driven insights."
|
||||
},
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"AI-only brainstorm fallback failed for user {user_id}: {e}")
|
||||
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Data parsing helpers
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
@@ -188,7 +188,6 @@ class GSCService:
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
# Check if table exists first to avoid error on fresh DB
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='gsc_credentials'")
|
||||
if not cursor.fetchone():
|
||||
return None
|
||||
@@ -204,7 +203,6 @@ class GSCService:
|
||||
|
||||
credentials_data = json.loads(result[0])
|
||||
|
||||
# Check for required fields, but allow connection without refresh token
|
||||
required_fields = ['token_uri', 'client_id', 'client_secret']
|
||||
missing_fields = [field for field in required_fields if not credentials_data.get(field)]
|
||||
|
||||
@@ -214,7 +212,6 @@ class GSCService:
|
||||
|
||||
credentials = Credentials.from_authorized_user_info(credentials_data, self.scopes)
|
||||
|
||||
# Refresh token if needed and possible
|
||||
if credentials.expired:
|
||||
if credentials.refresh_token:
|
||||
try:
|
||||
@@ -222,9 +219,11 @@ class GSCService:
|
||||
self.save_user_credentials(user_id, credentials)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to refresh GSC token for user {user_id}: {e}")
|
||||
self.clear_incomplete_credentials(user_id)
|
||||
return None
|
||||
else:
|
||||
logger.warning(f"GSC token expired for user {user_id} but no refresh token available - user needs to re-authorize")
|
||||
self.clear_incomplete_credentials(user_id)
|
||||
return None
|
||||
|
||||
return credentials
|
||||
@@ -288,7 +287,6 @@ class GSCService:
|
||||
try:
|
||||
logger.info(f"Handling GSC OAuth callback with state: {state[:20]}...")
|
||||
|
||||
# Extract user_id from state
|
||||
if ':' not in state:
|
||||
logger.error(f"Invalid GSC state format: {state}")
|
||||
return False
|
||||
@@ -300,17 +298,19 @@ class GSCService:
|
||||
logger.error(f"User database not found for user {user_id}")
|
||||
return False
|
||||
|
||||
# Verify state in user's DB (but don't delete yet — delete after successful token exchange)
|
||||
# Verify state in user's DB (best effort — if missing, attempt code exchange anyway)
|
||||
state_valid = False
|
||||
try:
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('SELECT user_id FROM gsc_oauth_states WHERE state = ?', (state,))
|
||||
result = cursor.fetchone()
|
||||
state_valid = cursor.fetchone() is not None
|
||||
except Exception as state_err:
|
||||
logger.warning(f"State verification query failed, proceeding anyway: {state_err}")
|
||||
|
||||
if not result:
|
||||
logger.error(f"Invalid or expired GSC OAuth state for user {user_id}")
|
||||
return False
|
||||
if not state_valid:
|
||||
logger.warning(f"GSC OAuth state not found in DB for user {user_id} — will attempt code exchange without state verification")
|
||||
|
||||
# Exchange code for credentials
|
||||
if not self.client_config:
|
||||
logger.error("Cannot handle callback: Client configuration not loaded")
|
||||
return False
|
||||
@@ -325,7 +325,12 @@ class GSCService:
|
||||
flow.fetch_token(code=authorization_code)
|
||||
credentials = flow.credentials
|
||||
|
||||
# State consumed successfully — clean up
|
||||
if not credentials or not credentials.token:
|
||||
logger.error(f"Token exchange returned empty credentials for user {user_id}")
|
||||
return False
|
||||
|
||||
# Clean up state if it was valid
|
||||
if state_valid:
|
||||
try:
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
@@ -334,11 +339,15 @@ class GSCService:
|
||||
except Exception as cleanup_err:
|
||||
logger.warning(f"Failed to clean up OAuth state: {cleanup_err}")
|
||||
|
||||
# Save credentials
|
||||
return self.save_user_credentials(user_id, credentials)
|
||||
result = self.save_user_credentials(user_id, credentials)
|
||||
if result:
|
||||
logger.info(f"GSC OAuth callback succeeded for user {user_id} (state_valid={state_valid})")
|
||||
else:
|
||||
logger.error(f"GSC OAuth callback: token exchange succeeded but failed to save credentials for user {user_id}")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling GSC OAuth callback: {e}")
|
||||
logger.error(f"Error handling GSC OAuth callback for user {user_id if 'user_id' in dir() else 'unknown'}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
@@ -726,6 +735,8 @@ class GSCService:
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('DELETE FROM gsc_credentials WHERE user_id = ?', (user_id,))
|
||||
cursor.execute('DELETE FROM gsc_data_cache WHERE user_id = ?', (user_id,))
|
||||
cursor.execute('DELETE FROM gsc_oauth_states WHERE user_id = ?', (user_id,))
|
||||
conn.commit()
|
||||
|
||||
logger.info(f"Cleared incomplete GSC credentials for user: {user_id}")
|
||||
|
||||
@@ -47,7 +47,10 @@ class WixAuthService:
|
||||
'code_verifier': code_verifier,
|
||||
}
|
||||
token_url = f'{self.base_url}/oauth2/token'
|
||||
logger.info(f"Wix token exchange: client_id={self.client_id}, redirect_uri={self.redirect_uri}, code_verifier_prefix={code_verifier[:10]}...")
|
||||
response = requests.post(token_url, headers=headers, data=data)
|
||||
if response.status_code != 200:
|
||||
logger.error(f"Wix token exchange failed: {response.status_code} {response.text}")
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
@@ -66,9 +69,17 @@ class WixAuthService:
|
||||
def get_site_info(self, access_token: str) -> Dict[str, Any]:
|
||||
headers = {
|
||||
'Authorization': f'Bearer {access_token}',
|
||||
'Content-Type': 'application/json'
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
if self.client_id:
|
||||
headers['wix-client-id'] = self.client_id
|
||||
response = requests.get(f"{self.base_url}/sites/v1/site", headers=headers)
|
||||
if response.status_code == 404:
|
||||
logger.warning("Wix site info not found (404) — user may not have a published site or token lacks sites scope")
|
||||
return {"_no_site": True, "error": "No Wix site found for this account"}
|
||||
if response.status_code == 401:
|
||||
logger.warning("Wix site info request unauthorized (401) — token expired or invalid")
|
||||
return {"_auth_failed": True, "error": "Token expired or invalid — reconnect required"}
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
@@ -55,19 +55,20 @@ def get_wix_headers(
|
||||
if token.startswith('OauthNG.JWS.'):
|
||||
# Wix OAuth token - use Bearer prefix
|
||||
headers['Authorization'] = f'Bearer {token}'
|
||||
logger.debug(f"Using Wix OAuth token with Bearer prefix (OauthNG.JWS. format detected)")
|
||||
else:
|
||||
# Count dots - JWT has exactly 2 dots
|
||||
dot_count = token.count('.')
|
||||
|
||||
if dot_count == 2 and len(token) < 500:
|
||||
# Likely OAuth JWT token - use Bearer prefix
|
||||
headers['Authorization'] = f'Bearer {token}'
|
||||
logger.debug(f"Using OAuth Bearer token (JWT format detected)")
|
||||
else:
|
||||
# Likely API key - use directly without Bearer prefix
|
||||
logger.debug("Using Wix OAuth token with Bearer prefix (OauthNG.JWS. format detected)")
|
||||
elif token.startswith('IST.'):
|
||||
# Wix Headless API key - send as-is, no Bearer
|
||||
headers['Authorization'] = token
|
||||
logger.debug(f"Using API key for authorization (non-JWT format detected)")
|
||||
logger.debug("Using Wix API key for authorization (IST. format detected)")
|
||||
else:
|
||||
# Standard JWT has exactly 2 dots separating header.payload.signature
|
||||
dot_count = token.count('.')
|
||||
if dot_count == 2:
|
||||
headers['Authorization'] = f'Bearer {token}'
|
||||
logger.debug("Using OAuth Bearer token (JWT format: 2 dots detected)")
|
||||
else:
|
||||
headers['Authorization'] = token
|
||||
logger.debug("Using token as-is (non-JWT format detected)")
|
||||
|
||||
if client_id:
|
||||
headers['wix-client-id'] = client_id
|
||||
@@ -125,8 +126,10 @@ def should_use_api_key(access_token: Optional[str] = None) -> bool:
|
||||
access_token = str(access_token)
|
||||
|
||||
token = access_token.strip()
|
||||
if token.count('.') != 2 or len(token) > 500:
|
||||
return True
|
||||
|
||||
if token.startswith('OauthNG.JWS.'):
|
||||
return False
|
||||
if token.startswith('IST.'):
|
||||
return True
|
||||
# Standard JWT has exactly 2 dots
|
||||
return token.count('.') != 2
|
||||
|
||||
|
||||
@@ -2,60 +2,28 @@ from typing import Any, Dict, List, Optional
|
||||
import requests
|
||||
from loguru import logger
|
||||
|
||||
from .retry import wix_api_call_with_retry, WixAPIError
|
||||
from .auth_utils import get_wix_headers
|
||||
|
||||
|
||||
class WixBlogService:
|
||||
"""Service for Wix Blog API operations with retry logic and error handling."""
|
||||
|
||||
def __init__(self, base_url: str, client_id: Optional[str]):
|
||||
self.base_url = base_url
|
||||
self.client_id = client_id
|
||||
|
||||
def headers(self, access_token: str, extra: Optional[Dict[str, str]] = None) -> Dict[str, str]:
|
||||
h: Dict[str, str] = {
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
||||
# Support both OAuth tokens and API keys
|
||||
# API keys don't use 'Bearer' prefix
|
||||
# Ensure access_token is a string (defensive check)
|
||||
if access_token:
|
||||
# Normalize token to string if needed
|
||||
if not isinstance(access_token, str):
|
||||
from .utils import normalize_token_string
|
||||
normalized = normalize_token_string(access_token)
|
||||
if normalized:
|
||||
access_token = normalized
|
||||
else:
|
||||
access_token = str(access_token)
|
||||
|
||||
token = access_token.strip()
|
||||
if token:
|
||||
# CRITICAL: Wix OAuth tokens can have format "OauthNG.JWS.xxx.yyy.zzz"
|
||||
# These should use "Bearer" prefix even though they have more than 2 dots
|
||||
if token.startswith('OauthNG.JWS.'):
|
||||
# Wix OAuth token - use Bearer prefix
|
||||
h['Authorization'] = f'Bearer {token}'
|
||||
logger.debug("Using Wix OAuth token with Bearer prefix (OauthNG.JWS. format detected)")
|
||||
elif '.' not in token or len(token) > 500:
|
||||
# Likely an API key - use directly without Bearer prefix
|
||||
h['Authorization'] = token
|
||||
logger.debug("Using API key for authorization")
|
||||
else:
|
||||
# Standard JWT OAuth token (xxx.yyy.zzz format) - use Bearer prefix
|
||||
h['Authorization'] = f'Bearer {token}'
|
||||
logger.debug("Using OAuth Bearer token for authorization")
|
||||
|
||||
if self.client_id:
|
||||
h['wix-client-id'] = self.client_id
|
||||
if extra:
|
||||
h.update(extra)
|
||||
return h
|
||||
"""Build headers with automatic token type detection."""
|
||||
return get_wix_headers(access_token, client_id=self.client_id, extra=extra)
|
||||
|
||||
def create_draft_post(self, access_token: str, payload: Dict[str, Any], extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||
"""Create draft post with consolidated logging"""
|
||||
"""Create draft post with retry logic and consolidated logging."""
|
||||
from .logger import wix_logger
|
||||
import json
|
||||
import traceback as tb
|
||||
|
||||
# Build payload summary for logging
|
||||
# Build payload summary for logging (safe, no sensitive data)
|
||||
payload_summary = {}
|
||||
if 'draftPost' in payload:
|
||||
dp = payload['draftPost']
|
||||
@@ -66,64 +34,114 @@ class WixBlogService:
|
||||
}
|
||||
|
||||
request_headers = self.headers(access_token, extra_headers)
|
||||
logger.debug(f"Wix API request headers: {list(request_headers.keys())}")
|
||||
if 'wix-site-id' in request_headers:
|
||||
logger.info(f"Wix API call includes wix-site-id: {request_headers['wix-site-id'][:8]}...")
|
||||
else:
|
||||
logger.warning("Wix API call MISSING wix-site-id header — this may fail for multi-site tokens")
|
||||
|
||||
url = f"{self.base_url}/blog/v3/draft-posts"
|
||||
|
||||
try:
|
||||
response = requests.post(f"{self.base_url}/blog/v3/draft-posts", headers=request_headers, json=payload)
|
||||
except TypeError as e:
|
||||
logger.error(f"TypeError during requests.post in create_draft_post: {e}")
|
||||
logger.error(f"Traceback: {tb.format_exc()}")
|
||||
logger.error(f"access_token type: {type(access_token)}")
|
||||
logger.error(f"payload type: {type(payload)}, keys: {list(payload.keys()) if isinstance(payload, dict) else 'N/A'}")
|
||||
result = wix_api_call_with_retry('POST', url, request_headers, json_payload=payload, max_attempts=3)
|
||||
wix_logger.log_api_call("POST", "/blog/v3/draft-posts", 200, payload_summary, None)
|
||||
return result
|
||||
except WixAPIError as e:
|
||||
wix_logger.log_api_call("POST", "/blog/v3/draft-posts", e.status_code or 500, payload_summary, e.response_body)
|
||||
logger.error(f"Wix create_draft_post failed after retries: HTTP {e.status_code} - {e.response_body}")
|
||||
raise
|
||||
except Exception as e:
|
||||
wix_logger.log_api_call("POST", "/blog/v3/draft-posts", 500, payload_summary, str(e)[:200])
|
||||
logger.error(f"Unexpected error in create_draft_post: {e}")
|
||||
raise
|
||||
|
||||
# Consolidated error logging
|
||||
error_body = None
|
||||
if response.status_code >= 400:
|
||||
try:
|
||||
error_body = response.json()
|
||||
except:
|
||||
error_body = {'message': response.text[:200]}
|
||||
|
||||
wix_logger.log_api_call("POST", "/blog/v3/draft-posts", response.status_code, payload_summary, error_body)
|
||||
|
||||
if response.status_code >= 400:
|
||||
# Only show detailed error info for debugging
|
||||
if response.status_code == 500:
|
||||
logger.debug(f" Full error: {json.dumps(error_body, indent=2) if isinstance(error_body, dict) else error_body}")
|
||||
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def publish_draft(self, access_token: str, draft_post_id: str, extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||
response = requests.post(f"{self.base_url}/blog/v3/draft-posts/{draft_post_id}/publish", headers=self.headers(access_token, extra_headers))
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
"""Publish a draft post with retry logic."""
|
||||
url = f"{self.base_url}/blog/v3/draft-posts/{draft_post_id}/publish"
|
||||
headers = self.headers(access_token, extra_headers)
|
||||
|
||||
try:
|
||||
return wix_api_call_with_retry('POST', url, headers, max_attempts=3)
|
||||
except WixAPIError as e:
|
||||
logger.error(f"Wix publish_draft failed: HTTP {e.status_code} - {e.response_body}")
|
||||
raise
|
||||
|
||||
def list_categories(self, access_token: str, extra_headers: Optional[Dict[str, str]] = None) -> List[Dict[str, Any]]:
|
||||
response = requests.get(f"{self.base_url}/blog/v3/categories", headers=self.headers(access_token, extra_headers))
|
||||
response.raise_for_status()
|
||||
return response.json().get('categories', [])
|
||||
"""List blog categories with retry logic."""
|
||||
url = f"{self.base_url}/blog/v3/categories"
|
||||
headers = self.headers(access_token, extra_headers)
|
||||
|
||||
def create_category(self, access_token: str, label: str, description: Optional[str] = None, language: Optional[str] = None, extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||
try:
|
||||
result = wix_api_call_with_retry('GET', url, headers, max_attempts=3)
|
||||
return result.get('categories', [])
|
||||
except WixAPIError as e:
|
||||
logger.error(f"Wix list_categories failed: HTTP {e.status_code}")
|
||||
raise
|
||||
|
||||
def create_category(self, access_token: str, label: str, description: Optional[str] = None,
|
||||
language: Optional[str] = None, extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||
"""Create a blog category with retry logic."""
|
||||
url = f"{self.base_url}/blog/v3/categories"
|
||||
headers = self.headers(access_token, extra_headers)
|
||||
payload: Dict[str, Any] = {'category': {'label': label}, 'fieldsets': ['URL']}
|
||||
if description:
|
||||
payload['category']['description'] = description
|
||||
if language:
|
||||
payload['category']['language'] = language
|
||||
response = requests.post(f"{self.base_url}/blog/v3/categories", headers=self.headers(access_token, extra_headers), json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
try:
|
||||
return wix_api_call_with_retry('POST', url, headers, json_payload=payload, max_attempts=3)
|
||||
except WixAPIError as e:
|
||||
logger.error(f"Wix create_category failed: HTTP {e.status_code}")
|
||||
raise
|
||||
|
||||
def list_tags(self, access_token: str, extra_headers: Optional[Dict[str, str]] = None) -> List[Dict[str, Any]]:
|
||||
response = requests.get(f"{self.base_url}/blog/v3/tags", headers=self.headers(access_token, extra_headers))
|
||||
response.raise_for_status()
|
||||
return response.json().get('tags', [])
|
||||
"""List blog tags with retry logic."""
|
||||
url = f"{self.base_url}/blog/v3/tags"
|
||||
headers = self.headers(access_token, extra_headers)
|
||||
|
||||
def create_tag(self, access_token: str, label: str, language: Optional[str] = None, extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {'label': label, 'fieldsets': ['URL']}
|
||||
try:
|
||||
result = wix_api_call_with_retry('GET', url, headers, max_attempts=3)
|
||||
return result.get('tags', [])
|
||||
except WixAPIError as e:
|
||||
logger.error(f"Wix list_tags failed: HTTP {e.status_code}")
|
||||
raise
|
||||
|
||||
def create_tag(self, access_token: str, label: str, language: Optional[str] = None,
|
||||
extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||
"""Create a blog tag with retry logic."""
|
||||
url = f"{self.base_url}/blog/v3/tags"
|
||||
headers = self.headers(access_token, extra_headers)
|
||||
payload: Dict[str, Any] = {'tag': {'label': label}, 'fieldsets': ['URL']}
|
||||
if language:
|
||||
payload['language'] = language
|
||||
response = requests.post(f"{self.base_url}/blog/v3/tags", headers=self.headers(access_token, extra_headers), json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
payload['tag']['language'] = language
|
||||
|
||||
try:
|
||||
return wix_api_call_with_retry('POST', url, headers, json_payload=payload, max_attempts=3)
|
||||
except WixAPIError as e:
|
||||
logger.error(f"Wix create_tag failed: HTTP {e.status_code}")
|
||||
raise
|
||||
|
||||
def get_draft_post(self, access_token: str, draft_post_id: str,
|
||||
extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||
"""Get a draft post by ID with retry logic."""
|
||||
url = f"{self.base_url}/blog/v3/draft-posts/{draft_post_id}"
|
||||
headers = self.headers(access_token, extra_headers)
|
||||
|
||||
try:
|
||||
return wix_api_call_with_retry('GET', url, headers, max_attempts=3)
|
||||
except WixAPIError as e:
|
||||
logger.error(f"Wix get_draft_post failed: HTTP {e.status_code}")
|
||||
raise
|
||||
|
||||
def update_draft_post(self, access_token: str, draft_post_id: str, payload: Dict[str, Any],
|
||||
extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||
"""Update a draft post with retry logic."""
|
||||
url = f"{self.base_url}/blog/v3/draft-posts/{draft_post_id}"
|
||||
headers = self.headers(access_token, extra_headers)
|
||||
|
||||
try:
|
||||
return wix_api_call_with_retry('PUT', url, headers, json_payload=payload, max_attempts=3)
|
||||
except WixAPIError as e:
|
||||
logger.error(f"Wix update_draft_post failed: HTTP {e.status_code}")
|
||||
raise
|
||||
|
||||
@@ -5,6 +5,7 @@ Handles blog post creation, validation, and publishing to Wix.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
import requests
|
||||
@@ -170,6 +171,16 @@ def validate_ricos_content(ricos_content: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return ricos_content
|
||||
|
||||
|
||||
_UUID_RE = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
|
||||
|
||||
def _looks_like_uuid(value: str) -> bool:
|
||||
try:
|
||||
uuid.UUID(value)
|
||||
return True
|
||||
except (ValueError, AttributeError):
|
||||
return bool(_UUID_RE.match(value))
|
||||
|
||||
|
||||
def validate_payload_no_none(obj, path=""):
|
||||
"""Recursively validate that no None values exist in the payload"""
|
||||
if obj is None:
|
||||
@@ -193,6 +204,7 @@ def create_blog_post(
|
||||
tag_ids: List[str] = None,
|
||||
publish: bool = True,
|
||||
seo_metadata: Dict[str, Any] = None,
|
||||
site_id: str = None,
|
||||
import_image_func = None,
|
||||
lookup_categories_func = None,
|
||||
lookup_tags_func = None,
|
||||
@@ -220,111 +232,51 @@ def create_blog_post(
|
||||
Returns:
|
||||
Created blog post information
|
||||
"""
|
||||
if not member_id:
|
||||
raise ValueError("memberId is required for third-party apps creating blog posts")
|
||||
# ===== PRE-FLIGHT VALIDATION =====
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
# Ensure access_token is a string (handle cases where it might be int, dict, or other type)
|
||||
# Use normalize_token_string to handle various token formats (dict with accessToken.value, etc.)
|
||||
if not member_id:
|
||||
errors.append("memberId is required for third-party apps creating blog posts")
|
||||
|
||||
title_clean = str(title).strip() if title else ""
|
||||
if not title_clean:
|
||||
errors.append("Title is required")
|
||||
elif len(title_clean) > 200:
|
||||
errors.append(f"Title is too long ({len(title_clean)} chars, max 200)")
|
||||
|
||||
# Ensure access_token is a string
|
||||
normalized_token = normalize_token_string(access_token)
|
||||
if not normalized_token:
|
||||
raise ValueError("access_token is required and must be a valid string or token object")
|
||||
errors.append("access_token is required and must be a valid string or token object")
|
||||
else:
|
||||
access_token = normalized_token.strip()
|
||||
if not access_token:
|
||||
raise ValueError("access_token cannot be empty")
|
||||
errors.append("access_token cannot be empty")
|
||||
|
||||
# BACK TO BASICS MODE: Try simplest possible structure FIRST
|
||||
# Since posting worked before Ricos/SEO, let's test with absolute minimum
|
||||
BACK_TO_BASICS_MODE = False # Disabled: full Ricos conversion now produces valid output
|
||||
content_clean = str(content).strip() if content else ""
|
||||
if not content_clean:
|
||||
logger.warning("Content was empty, using default text")
|
||||
content = "This is a post from ALwrity."
|
||||
elif len(content_clean) > 100000:
|
||||
errors.append(f"Content is too long ({len(content_clean)} chars, max 100,000)")
|
||||
|
||||
if errors:
|
||||
raise ValueError(f"Wix publish validation failed: {'; '.join(errors)}")
|
||||
|
||||
wix_logger.reset()
|
||||
wix_logger.log_operation_start("Blog Post Creation", title=title[:50] if title else None, member_id=member_id[:20] if member_id else None)
|
||||
|
||||
if BACK_TO_BASICS_MODE:
|
||||
logger.info("🔧 Wix: BACK TO BASICS MODE - Testing minimal structure")
|
||||
|
||||
# Import auth utilities for proper token handling
|
||||
from .auth_utils import get_wix_headers
|
||||
|
||||
# Create absolute minimal Ricos structure
|
||||
minimal_ricos = {
|
||||
'nodes': [{
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': [{
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [],
|
||||
'textData': {
|
||||
'text': (content[:500] if content else "This is a post from ALwrity.").strip(),
|
||||
'decorations': []
|
||||
}
|
||||
}]
|
||||
}]
|
||||
}
|
||||
|
||||
# Extract wix-site-id from token if possible
|
||||
extra_headers = {}
|
||||
try:
|
||||
token_str = str(access_token)
|
||||
if token_str and token_str.startswith('OauthNG.JWS.'):
|
||||
import jwt
|
||||
import json
|
||||
jwt_part = token_str[12:]
|
||||
payload = jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
|
||||
data_payload = payload.get('data', {})
|
||||
if isinstance(data_payload, str):
|
||||
try:
|
||||
data_payload = json.loads(data_payload)
|
||||
except:
|
||||
pass
|
||||
instance_data = data_payload.get('instance', {})
|
||||
meta_site_id = instance_data.get('metaSiteId')
|
||||
if isinstance(meta_site_id, str) and meta_site_id:
|
||||
extra_headers['wix-site-id'] = meta_site_id
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Build minimal payload
|
||||
minimal_blog_data = {
|
||||
'draftPost': {
|
||||
'title': str(title).strip() if title else "Untitled",
|
||||
'memberId': str(member_id).strip(),
|
||||
'richContent': minimal_ricos
|
||||
},
|
||||
'publish': False,
|
||||
'fieldsets': ['URL']
|
||||
}
|
||||
|
||||
try:
|
||||
from .blog import WixBlogService
|
||||
blog_service_test = WixBlogService('https://www.wixapis.com', None)
|
||||
result = blog_service_test.create_draft_post(access_token, minimal_blog_data, extra_headers if extra_headers else None)
|
||||
logger.success("✅✅✅ Wix: BACK TO BASICS SUCCEEDED! Issue is with Ricos/SEO structure")
|
||||
wix_logger.log_operation_result("Back to Basics Test", True, result)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Wix: BACK TO BASICS FAILED - {str(e)[:100]}")
|
||||
logger.error(" ⚠️ Issue is NOT with Ricos/SEO - likely permissions/token")
|
||||
wix_logger.add_error(f"Back to Basics: {str(e)[:100]}")
|
||||
|
||||
# Import auth utilities for proper token handling
|
||||
from .auth_utils import get_wix_headers
|
||||
|
||||
# Headers for blog post creation (use user's OAuth token)
|
||||
headers = get_wix_headers(access_token)
|
||||
|
||||
# Build valid Ricos rich content
|
||||
# Ensure content is not empty
|
||||
if not content or not content.strip():
|
||||
content = "This is a post from ALwrity."
|
||||
logger.warning("⚠️ Content was empty, using default text")
|
||||
|
||||
# Quick token/permission check (only log if issues found)
|
||||
has_blog_scope = None
|
||||
meta_site_id = None
|
||||
try:
|
||||
from .utils import decode_wix_token
|
||||
import json
|
||||
from .utils import decode_wix_token, extract_meta_from_token
|
||||
token_data = decode_wix_token(access_token)
|
||||
if 'scope' in token_data:
|
||||
scopes = token_data.get('scope')
|
||||
@@ -332,70 +284,93 @@ def create_blog_post(
|
||||
scope_list = scopes.split(',') if ',' in scopes else [scopes]
|
||||
has_blog_scope = any('BLOG' in s.upper() for s in scope_list)
|
||||
if not has_blog_scope:
|
||||
logger.error("❌ Wix: Token missing BLOG scopes - verify OAuth app permissions")
|
||||
if 'data' in token_data:
|
||||
data = token_data.get('data')
|
||||
if isinstance(data, str):
|
||||
try:
|
||||
data = json.loads(data)
|
||||
except:
|
||||
pass
|
||||
if isinstance(data, dict) and 'instance' in data:
|
||||
instance = data.get('instance', {})
|
||||
meta_site_id = instance.get('metaSiteId')
|
||||
logger.error("Wix: Token missing BLOG scopes - verify OAuth app permissions")
|
||||
meta_info = extract_meta_from_token(access_token)
|
||||
meta_site_id = meta_info.get('metaSiteId')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Add wix-site-id to headers for all API calls (categories, tags, draft post)
|
||||
resolved_site_id = site_id or meta_site_id or os.getenv('WIX_SITE_ID')
|
||||
if resolved_site_id:
|
||||
headers['wix-site-id'] = resolved_site_id
|
||||
logger.info(f"Using wix-site-id: {resolved_site_id[:8]}... (source: {'param' if site_id else 'token' if meta_site_id else 'env'})")
|
||||
else:
|
||||
token_str = str(access_token)
|
||||
if token_str.startswith('IST.'):
|
||||
logger.error("IST. API key requires WIX_SITE_ID environment variable or site_id parameter.")
|
||||
else:
|
||||
logger.warning("No wix-site-id found — API calls may fail if token requires it")
|
||||
|
||||
# Quick permission test (only log failures)
|
||||
try:
|
||||
test_headers = get_wix_headers(access_token)
|
||||
import requests
|
||||
test_response = requests.get(f"{base_url}/blog/v3/categories", headers=test_headers, timeout=5)
|
||||
if test_response.status_code == 403:
|
||||
logger.error("❌ Wix: Permission denied - OAuth app missing BLOG.CREATE-DRAFT")
|
||||
logger.error("Wix: Permission denied - OAuth app missing BLOG.CREATE-DRAFT")
|
||||
elif test_response.status_code == 401:
|
||||
logger.error("❌ Wix: Unauthorized - token may be expired")
|
||||
logger.error("Wix: Unauthorized - token may be expired")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Safely get token length (access_token is already validated as string above)
|
||||
token_length = len(access_token) if access_token else 0
|
||||
wix_logger.log_token_info(token_length, has_blog_scope, meta_site_id)
|
||||
|
||||
# Convert markdown to Ricos
|
||||
# PRIMARY: Use Wix Ricos Documents API for best formatting support (tables, complex markdown, etc.)
|
||||
# FALLBACK: Use custom parser if Wix API fails (no length limit, handles tables natively)
|
||||
has_table = bool(re.search(r'^\|.*\|', content, re.MULTILINE))
|
||||
|
||||
# Pre-check: Wix Ricos API has a 10,000 character limit for HTML input.
|
||||
# Estimate HTML length from markdown (~1.4x expansion) to avoid silent truncation.
|
||||
# If HTML would exceed limit, skip Wix API and use custom parser.
|
||||
use_wix_api = True
|
||||
MAX_HTML_LIMIT = 9800
|
||||
estimated_html_len = len(content) * 1.4
|
||||
if estimated_html_len > MAX_HTML_LIMIT:
|
||||
logger.warning(f"Content too long for Wix Ricos API (est. HTML: {estimated_html_len:.0f} > {MAX_HTML_LIMIT}) — using custom parser")
|
||||
use_wix_api = False
|
||||
|
||||
ricos_content = None
|
||||
if use_wix_api:
|
||||
try:
|
||||
logger.info("Converting markdown via Wix Ricos Documents API...")
|
||||
ricos_content = convert_via_wix_api(content, access_token, base_url)
|
||||
logger.info(f"Wix API conversion succeeded: {len(ricos_content.get('nodes', []))} nodes")
|
||||
except Exception as e:
|
||||
logger.warning(f"Wix API conversion failed, falling back to custom parser: {e}")
|
||||
|
||||
# If markdown had tables and Wix API didn't produce TABLE nodes, fall back to custom parser
|
||||
if has_table and ricos_content:
|
||||
node_types = [n.get('type', '') for n in ricos_content.get('nodes', [])]
|
||||
if 'TABLE' not in node_types:
|
||||
logger.info("Markdown had tables but Wix API produced no TABLE nodes — using custom parser for table support")
|
||||
ricos_content = None
|
||||
|
||||
if not ricos_content or not isinstance(ricos_content, dict) or 'nodes' not in ricos_content:
|
||||
logger.info("Using custom markdown parser for Ricos conversion")
|
||||
ricos_content = convert_content_to_ricos(content, None)
|
||||
|
||||
nodes_count = len(ricos_content.get('nodes', []))
|
||||
wix_logger.log_ricos_conversion(nodes_count)
|
||||
|
||||
# Validate Ricos content structure
|
||||
# Per Wix Blog API documentation: richContent should ONLY contain 'nodes'
|
||||
# The example in docs shows: { nodes: [...] } - no type, id, metadata, or documentStyle
|
||||
if not isinstance(ricos_content, dict):
|
||||
logger.error(f"❌ richContent is not a dict: {type(ricos_content)}")
|
||||
logger.error(f"richContent is not a dict: {type(ricos_content)}")
|
||||
raise ValueError("richContent must be a dictionary object")
|
||||
|
||||
if 'nodes' not in ricos_content or not isinstance(ricos_content['nodes'], list):
|
||||
logger.error(f"❌ richContent.nodes is missing or not a list: {ricos_content.get('nodes', 'MISSING')}")
|
||||
logger.error(f"richContent.nodes is missing or not a list: {ricos_content.get('nodes', 'MISSING')}")
|
||||
raise ValueError("richContent must contain a 'nodes' array")
|
||||
|
||||
# Remove type and id fields (not expected by Blog API)
|
||||
# NOTE: metadata is optional - Wix UPDATE endpoint example shows it, but CREATE example doesn't
|
||||
# We'll keep it minimal (nodes only) for CREATE to match the recipe example
|
||||
fields_to_remove = ['type', 'id']
|
||||
for field in fields_to_remove:
|
||||
# Remove top-level fields not expected by Blog API CREATE endpoint
|
||||
# (Wix API converter may include type, id, metadata, documentStyle — strip them)
|
||||
for field in ['type', 'id', 'metadata', 'documentStyle']:
|
||||
if field in ricos_content:
|
||||
logger.debug(f"Removing '{field}' field from richContent (Blog API doesn't expect this)")
|
||||
logger.debug(f"Removing '{field}' from richContent for Blog API compatibility")
|
||||
del ricos_content[field]
|
||||
|
||||
# Remove metadata and documentStyle - Blog API CREATE endpoint example shows only 'nodes'
|
||||
# (UPDATE endpoint shows metadata, but we're using CREATE)
|
||||
if 'metadata' in ricos_content:
|
||||
logger.debug("Removing 'metadata' from richContent (CREATE endpoint expects only 'nodes')")
|
||||
del ricos_content['metadata']
|
||||
if 'documentStyle' in ricos_content:
|
||||
logger.debug("Removing 'documentStyle' from richContent (CREATE endpoint expects only 'nodes')")
|
||||
del ricos_content['documentStyle']
|
||||
|
||||
# Ensure we only have 'nodes' in richContent for CREATE endpoint
|
||||
ricos_content = {'nodes': ricos_content['nodes']}
|
||||
|
||||
@@ -470,55 +445,62 @@ def create_blog_post(
|
||||
if cover_image_url and import_image_func:
|
||||
try:
|
||||
media_id = import_image_func(access_token, cover_image_url, f'Cover: {title}')
|
||||
# Ensure media_id is a string and not None
|
||||
if media_id and isinstance(media_id, str):
|
||||
# import_image_to_wix now returns Optional[str] — None means failure
|
||||
if media_id and isinstance(media_id, str) and media_id.strip():
|
||||
blog_data['draftPost']['media'] = {
|
||||
'wixMedia': {
|
||||
'image': {'id': str(media_id).strip()}
|
||||
'image': {'id': media_id.strip()}
|
||||
},
|
||||
'displayed': True,
|
||||
'custom': True
|
||||
}
|
||||
logger.info(f"Cover image imported: {media_id[:16]}...")
|
||||
else:
|
||||
logger.warning(f"Invalid media_id type or value: {type(media_id)}, skipping media")
|
||||
logger.warning(f"Cover image import returned no valid media_id (type={type(media_id)}). Continuing without cover image.")
|
||||
warnings.append("Cover image could not be imported — post published without cover image.")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to import cover image: {e}")
|
||||
logger.warning(f"Cover image import failed (non-fatal): {e}. Continuing without cover image.")
|
||||
warnings.append(f"Cover image import failed: {str(e)[:100]}")
|
||||
|
||||
# Handle categories - can be either IDs (list of strings) or names (for lookup)
|
||||
category_ids_to_use = None
|
||||
if category_ids:
|
||||
# Check if these are IDs (UUIDs) or names
|
||||
if isinstance(category_ids, list) and len(category_ids) > 0:
|
||||
# Assume IDs if first item looks like UUID (has hyphens and is long)
|
||||
# Use proper UUID detection instead of fragile heuristic
|
||||
first_item = str(category_ids[0])
|
||||
if '-' in first_item and len(first_item) > 30:
|
||||
if _looks_like_uuid(first_item):
|
||||
category_ids_to_use = category_ids
|
||||
elif lookup_categories_func:
|
||||
# These are names, need to lookup/create
|
||||
extra_headers = {}
|
||||
if 'wix-site-id' in headers:
|
||||
extra_headers['wix-site-id'] = headers['wix-site-id']
|
||||
if resolved_site_id:
|
||||
extra_headers['wix-site-id'] = resolved_site_id
|
||||
category_ids_to_use = lookup_categories_func(
|
||||
access_token, category_ids, extra_headers if extra_headers else None
|
||||
)
|
||||
if not category_ids_to_use:
|
||||
warnings.append(f"Categories could not be created ({len(category_ids)} requested) — OAuth app may lack BLOG.CREATE-DRAFT scope.")
|
||||
|
||||
# Handle tags - can be either IDs (list of strings) or names (for lookup)
|
||||
tag_ids_to_use = None
|
||||
if tag_ids:
|
||||
# Check if these are IDs (UUIDs) or names
|
||||
if isinstance(tag_ids, list) and len(tag_ids) > 0:
|
||||
# Assume IDs if first item looks like UUID (has hyphens and is long)
|
||||
# Use proper UUID detection instead of fragile heuristic
|
||||
first_item = str(tag_ids[0])
|
||||
if '-' in first_item and len(first_item) > 30:
|
||||
if _looks_like_uuid(first_item):
|
||||
tag_ids_to_use = tag_ids
|
||||
elif lookup_tags_func:
|
||||
# These are names, need to lookup/create
|
||||
extra_headers = {}
|
||||
if 'wix-site-id' in headers:
|
||||
extra_headers['wix-site-id'] = headers['wix-site-id']
|
||||
if resolved_site_id:
|
||||
extra_headers['wix-site-id'] = resolved_site_id
|
||||
tag_ids_to_use = lookup_tags_func(
|
||||
access_token, tag_ids, extra_headers if extra_headers else None
|
||||
)
|
||||
if not tag_ids_to_use:
|
||||
warnings.append(f"Tags could not be created ({len(tag_ids)} requested) — OAuth app may lack BLOG scope for tag management.")
|
||||
|
||||
# Add categories if we have IDs (must be non-empty list of strings)
|
||||
# CRITICAL: Wix API rejects empty arrays or arrays with None/empty strings
|
||||
@@ -558,34 +540,21 @@ def create_blog_post(
|
||||
logger.debug("No SEO metadata provided to create_blog_post")
|
||||
|
||||
try:
|
||||
# Extract wix-site-id from token if possible
|
||||
extra_headers = {}
|
||||
try:
|
||||
token_str = str(access_token)
|
||||
if token_str and token_str.startswith('OauthNG.JWS.'):
|
||||
import jwt
|
||||
import json
|
||||
jwt_part = token_str[12:]
|
||||
payload = jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
|
||||
data_payload = payload.get('data', {})
|
||||
if isinstance(data_payload, str):
|
||||
try:
|
||||
data_payload = json.loads(data_payload)
|
||||
except:
|
||||
pass
|
||||
instance_data = data_payload.get('instance', {})
|
||||
meta_site_id = instance_data.get('metaSiteId')
|
||||
if isinstance(meta_site_id, str) and meta_site_id:
|
||||
extra_headers['wix-site-id'] = meta_site_id
|
||||
except Exception:
|
||||
pass
|
||||
# Use wix-site-id already resolved earlier
|
||||
extra_headers_final = {}
|
||||
wix_site_id = resolved_site_id
|
||||
if wix_site_id:
|
||||
extra_headers_final['wix-site-id'] = wix_site_id
|
||||
logger.info(f"Using wix-site-id for draft post: {wix_site_id[:8]}...")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract wix-site-id from token: {e}")
|
||||
|
||||
try:
|
||||
# Validate payload structure before sending
|
||||
draft_post = blog_data.get('draftPost', {})
|
||||
if not isinstance(draft_post, dict):
|
||||
raise ValueError("draftPost must be a dict object")
|
||||
|
||||
# Validate richContent structure
|
||||
if 'richContent' in draft_post:
|
||||
rc = draft_post['richContent']
|
||||
if not isinstance(rc, dict):
|
||||
@@ -596,7 +565,6 @@ def create_blog_post(
|
||||
raise ValueError(f"richContent.nodes must be a list, got {type(rc['nodes'])}")
|
||||
logger.debug(f"✅ richContent validation passed: {len(rc.get('nodes', []))} nodes")
|
||||
|
||||
# Validate seoData structure if present
|
||||
if 'seoData' in draft_post:
|
||||
seo = draft_post['seoData']
|
||||
if not isinstance(seo, dict):
|
||||
@@ -607,8 +575,6 @@ def create_blog_post(
|
||||
raise ValueError(f"seoData.settings must be a dict, got {type(seo.get('settings'))}")
|
||||
logger.debug(f"✅ seoData validation passed: {len(seo.get('tags', []))} tags")
|
||||
|
||||
# Final validation: Ensure no None values in any nested objects
|
||||
# Wix API rejects None values and expects proper types
|
||||
try:
|
||||
validate_payload_no_none(blog_data, "blog_data")
|
||||
logger.debug("✅ Payload validation passed: No None values found")
|
||||
@@ -616,12 +582,10 @@ def create_blog_post(
|
||||
logger.error(f"❌ Payload validation failed: {e}")
|
||||
raise
|
||||
|
||||
# Log payload summary
|
||||
logger.debug(f"Payload: draftPost keys={list(draft_post.keys())}, "
|
||||
f"nodes={len(draft_post.get('richContent', {}).get('nodes', []))}, "
|
||||
f"has_seo={'seoData' in draft_post}")
|
||||
|
||||
# Final deep validation: Serialize and deserialize to catch any JSON-serialization issues
|
||||
try:
|
||||
import json
|
||||
json.dumps(blog_data, ensure_ascii=False)
|
||||
@@ -629,7 +593,6 @@ def create_blog_post(
|
||||
logger.error(f"❌ Payload JSON serialization failed: {e}")
|
||||
raise ValueError(f"Payload contains non-serializable data: {e}")
|
||||
|
||||
# Clean up None values that Wix API would reject
|
||||
rc = blog_data['draftPost']['richContent']
|
||||
for field in ['documentStyle', 'metadata']:
|
||||
if field in rc and (rc[field] is None or rc[field] == "" or not isinstance(rc[field], dict)):
|
||||
@@ -638,14 +601,17 @@ def create_blog_post(
|
||||
logger.info(f"📤 Publishing to Wix: title='{blog_data['draftPost'].get('title', '')}', "
|
||||
f"nodes={len(rc.get('nodes', []))}")
|
||||
|
||||
result = blog_service.create_draft_post(access_token, blog_data, extra_headers or None)
|
||||
result = blog_service.create_draft_post(access_token, blog_data, extra_headers_final or None)
|
||||
|
||||
# Log success
|
||||
draft_post = result.get('draftPost', {})
|
||||
post_id = draft_post.get('id', 'N/A')
|
||||
wix_logger.log_operation_result("Create Draft Post", True, result)
|
||||
logger.success(f"✅ Wix: Blog post created - ID: {post_id}")
|
||||
|
||||
if warnings:
|
||||
result['_warnings'] = warnings
|
||||
logger.info(f"Publish completed with {len(warnings)} warnings: {'; '.join(warnings)}")
|
||||
|
||||
return result
|
||||
except TypeError as e:
|
||||
import traceback
|
||||
|
||||
@@ -5,79 +5,71 @@ from typing import Any, Dict, List
|
||||
|
||||
def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Parse inline markdown formatting (bold, italic, links) into Ricos text nodes.
|
||||
Parse inline markdown formatting (bold, italic, links, code, strikethrough) into Ricos text nodes.
|
||||
Returns a list of text nodes with decorations.
|
||||
Handles: **bold**, *italic*, [links](url), `code`, and combinations.
|
||||
Handles: **bold**, *italic*, [links](url), `code`, ~strikethrough~, and combinations.
|
||||
"""
|
||||
if not text:
|
||||
return [{
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
|
||||
'nodes': [],
|
||||
'textData': {'text': '', 'decorations': []}
|
||||
}]
|
||||
|
||||
nodes = []
|
||||
|
||||
# Process text character by character to handle nested/adjacent formatting
|
||||
# This is more robust than regex for complex cases
|
||||
i = 0
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
|
||||
while i < len(text):
|
||||
# Check for bold **text** (must come before single * check)
|
||||
if i < len(text) - 1 and text[i:i+2] == '**':
|
||||
# Save any accumulated text
|
||||
def flush_text():
|
||||
nonlocal current_text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
'nodes': [],
|
||||
'textData': {'text': current_text, 'decorations': []}
|
||||
})
|
||||
current_text = ''
|
||||
|
||||
# Find closing **
|
||||
while i < len(text):
|
||||
# Bold **text**
|
||||
if i < len(text) - 1 and text[i:i+2] == '**':
|
||||
flush_text()
|
||||
end_bold = text.find('**', i + 2)
|
||||
if end_bold != -1:
|
||||
bold_text = text[i + 2:end_bold]
|
||||
# Recursively parse the bold text for nested formatting
|
||||
bold_nodes = parse_markdown_inline(bold_text)
|
||||
# Add BOLD decoration to all text nodes within
|
||||
# Per Wix API: decorations are objects with 'type' field, not strings
|
||||
for node in bold_nodes:
|
||||
if node['type'] == 'TEXT':
|
||||
node_decorations = node['textData'].get('decorations', []).copy()
|
||||
# Check if BOLD decoration already exists
|
||||
has_bold = any(d.get('type') == 'BOLD' for d in node_decorations if isinstance(d, dict))
|
||||
if not has_bold:
|
||||
node_decorations.append({'type': 'BOLD'})
|
||||
node['textData']['decorations'] = node_decorations
|
||||
decs = node['textData'].get('decorations', []).copy()
|
||||
if not any(d.get('type') == 'BOLD' for d in decs if isinstance(d, dict)):
|
||||
decs.append({'type': 'BOLD'})
|
||||
node['textData']['decorations'] = decs
|
||||
nodes.append(node)
|
||||
i = end_bold + 2
|
||||
continue
|
||||
|
||||
# Check for link [text](url)
|
||||
elif text[i] == '[':
|
||||
# Save any accumulated text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
# Strikethrough ~text~
|
||||
elif text[i] == '~':
|
||||
flush_text()
|
||||
end_strike = text.find('~', i + 1)
|
||||
if end_strike != -1:
|
||||
strike_text = text[i + 1:end_strike]
|
||||
strike_nodes = parse_markdown_inline(strike_text)
|
||||
for node in strike_nodes:
|
||||
if node['type'] == 'TEXT':
|
||||
decs = node['textData'].get('decorations', []).copy()
|
||||
if not any(d.get('type') == 'STRIKETHROUGH' for d in decs if isinstance(d, dict)):
|
||||
decs.append({'type': 'STRIKETHROUGH'})
|
||||
node['textData']['decorations'] = decs
|
||||
nodes.append(node)
|
||||
i = end_strike + 1
|
||||
continue
|
||||
|
||||
# Find matching ]
|
||||
# Link [text](url)
|
||||
elif text[i] == '[':
|
||||
flush_text()
|
||||
link_end = text.find(']', i)
|
||||
if link_end != -1 and link_end < len(text) - 1 and text[link_end + 1] == '(':
|
||||
link_text = text[i + 1:link_end]
|
||||
@@ -85,12 +77,10 @@ def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
|
||||
url_end = text.find(')', url_start)
|
||||
if url_end != -1:
|
||||
url = text[url_start:url_end]
|
||||
# Per Wix API: Links are decorations on TEXT nodes, not separate node types
|
||||
# Create TEXT node with LINK decoration
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
|
||||
'nodes': [],
|
||||
'textData': {
|
||||
'text': link_text,
|
||||
'decorations': [{
|
||||
@@ -98,7 +88,7 @@ def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
|
||||
'linkData': {
|
||||
'link': {
|
||||
'url': url,
|
||||
'target': 'BLANK' # Wix API uses 'BLANK', not '_blank'
|
||||
'target': 'BLANK'
|
||||
}
|
||||
}
|
||||
}]
|
||||
@@ -107,33 +97,17 @@ def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
|
||||
i = url_end + 1
|
||||
continue
|
||||
|
||||
# Check for code `text`
|
||||
# Inline code `text`
|
||||
elif text[i] == '`':
|
||||
# Save any accumulated text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
|
||||
# Find closing `
|
||||
flush_text()
|
||||
code_end = text.find('`', i + 1)
|
||||
if code_end != -1:
|
||||
code_text = text[i + 1:code_end]
|
||||
# Per Wix API: CODE is not a valid decoration type, but we'll keep the structure
|
||||
# Note: Wix uses CODE_BLOCK nodes for code, not CODE decorations
|
||||
# For inline code, we'll just use plain text for now
|
||||
# Wix doesn't have a CODE decoration, but we can preserve the text
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
|
||||
'nodes': [],
|
||||
'textData': {
|
||||
'text': code_text,
|
||||
'decorations': [] # CODE is not a valid decoration in Wix API
|
||||
@@ -142,39 +116,21 @@ def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
|
||||
i = code_end + 1
|
||||
continue
|
||||
|
||||
# Check for italic *text* (only if not part of **)
|
||||
# Italic *text* (must come after ** check)
|
||||
elif text[i] == '*' and (i == 0 or text[i-1] != '*') and (i == len(text) - 1 or text[i+1] != '*'):
|
||||
# Save any accumulated text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
|
||||
# Find closing * (but not **)
|
||||
flush_text()
|
||||
italic_end = text.find('*', i + 1)
|
||||
if italic_end != -1:
|
||||
# Make sure it's not part of **
|
||||
if italic_end == len(text) - 1 or text[italic_end + 1] != '*':
|
||||
italic_text = text[i + 1:italic_end]
|
||||
italic_nodes = parse_markdown_inline(italic_text)
|
||||
# Add ITALIC decoration
|
||||
# Per Wix API: decorations are objects with 'type' field
|
||||
for node in italic_nodes:
|
||||
if node['type'] == 'TEXT':
|
||||
node_decorations = node['textData'].get('decorations', []).copy()
|
||||
# Check if ITALIC decoration already exists
|
||||
has_italic = any(d.get('type') == 'ITALIC' for d in node_decorations if isinstance(d, dict))
|
||||
if not has_italic:
|
||||
node_decorations.append({'type': 'ITALIC'})
|
||||
node['textData']['decorations'] = node_decorations
|
||||
decs = node['textData'].get('decorations', []).copy()
|
||||
if not any(d.get('type') == 'ITALIC' for d in decs if isinstance(d, dict)):
|
||||
decs.append({'type': 'ITALIC'})
|
||||
node['textData']['decorations'] = decs
|
||||
nodes.append(node)
|
||||
i = italic_end + 1
|
||||
continue
|
||||
@@ -183,58 +139,241 @@ def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
|
||||
current_text += text[i]
|
||||
i += 1
|
||||
|
||||
# Add any remaining text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
flush_text()
|
||||
|
||||
# If no nodes created, return single plain text node
|
||||
if not nodes:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
|
||||
'textData': {
|
||||
'text': text,
|
||||
'decorations': []
|
||||
}
|
||||
'nodes': [],
|
||||
'textData': {'text': text, 'decorations': []}
|
||||
})
|
||||
|
||||
return nodes
|
||||
|
||||
|
||||
def _make_code_block_node(code_text: str, language: str = '') -> Dict[str, Any]:
|
||||
"""Create a Ricos CODE_BLOCK node."""
|
||||
lines = code_text.split('\n')
|
||||
text_nodes = []
|
||||
for line in lines:
|
||||
text_nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [],
|
||||
'textData': {'text': line, 'decorations': []}
|
||||
})
|
||||
|
||||
return {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'CODE_BLOCK',
|
||||
'nodes': text_nodes,
|
||||
'codeBlockData': {
|
||||
'language': language or 'text',
|
||||
'textWrap': True
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _make_horizontal_rule_node() -> Dict[str, Any]:
|
||||
"""Create a Ricos DIVIDER node."""
|
||||
return {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'DIVIDER',
|
||||
'nodes': [],
|
||||
'dividerData': {
|
||||
'type': 'LINE',
|
||||
'lineStyle': {
|
||||
'width': 'LARGE',
|
||||
'alignment': 'CENTER'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _parse_markdown_table(lines: List[str], start_idx: int) -> tuple:
|
||||
"""
|
||||
Parse a markdown table starting at start_idx.
|
||||
Returns (table_rows, alignments, next_idx) where table_rows is a list of lists of cell text,
|
||||
and alignments is a list of column alignments ('left', 'center', 'right', None).
|
||||
|
||||
Markdown tables look like:
|
||||
| Header 1 | Header 2 |
|
||||
|----------|----------|
|
||||
| Cell 1 | Cell 2 |
|
||||
|
||||
Alignment is detected from the separator row:
|
||||
|:--------|:--------:|--------:|
|
||||
"""
|
||||
rows = []
|
||||
alignments = None
|
||||
i = start_idx
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
if not line or '|' not in line:
|
||||
break
|
||||
|
||||
cells = [cell.strip() for cell in line.strip('|').split('|')]
|
||||
|
||||
# Detect separator row (contains only dashes, colons, pipes, spaces)
|
||||
if i > start_idx and all(
|
||||
set(cell.strip()) <= set('-:| ') for cell in cells
|
||||
):
|
||||
alignments = []
|
||||
for cell in cells:
|
||||
cell = cell.strip()
|
||||
if cell.startswith(':') and cell.endswith(':'):
|
||||
alignments.append('center')
|
||||
elif cell.endswith(':'):
|
||||
alignments.append('right')
|
||||
elif cell.startswith(':'):
|
||||
alignments.append('left')
|
||||
else:
|
||||
alignments.append(None)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
rows.append(cells)
|
||||
i += 1
|
||||
|
||||
return rows, alignments or [None] * (len(rows[0]) if rows else 1), i
|
||||
|
||||
|
||||
def _make_table_node(header_row: List[str], body_rows: List[List[str]], alignments: List) -> Dict[str, Any]:
|
||||
"""Create a Ricos TABLE node with header and body rows, with formatting."""
|
||||
table_rows = []
|
||||
|
||||
all_rows = [header_row] + body_rows
|
||||
for row_idx, row_cells in enumerate(all_rows):
|
||||
cell_nodes = []
|
||||
for col_idx, cell_text in enumerate(row_cells):
|
||||
text_nodes = parse_markdown_inline(cell_text)
|
||||
# Bold header row cells
|
||||
if row_idx == 0 and text_nodes:
|
||||
for node in text_nodes:
|
||||
if node.get('type') == 'TEXT':
|
||||
decs = node['textData'].get('decorations', [])
|
||||
if not any(d.get('type') == 'BOLD' for d in decs if isinstance(d, dict)):
|
||||
decs_copy = decs.copy()
|
||||
decs_copy.append({'type': 'BOLD'})
|
||||
node['textData']['decorations'] = decs_copy
|
||||
|
||||
paragraph_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': text_nodes if text_nodes else [{
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [],
|
||||
'textData': {'text': cell_text or ' ', 'decorations': []}
|
||||
}],
|
||||
}
|
||||
|
||||
cell_style = {'verticalAlign': 'top'}
|
||||
if row_idx == 0:
|
||||
cell_style['borderWidth'] = {'top': 2, 'bottom': 1, 'left': 1, 'right': 1}
|
||||
# Apply column alignment
|
||||
if alignments and col_idx < len(alignments) and alignments[col_idx]:
|
||||
cell_style['textAlign'] = alignments[col_idx]
|
||||
|
||||
cell_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TABLE_CELL',
|
||||
'nodes': [paragraph_node],
|
||||
'tableCellData': {'style': cell_style},
|
||||
}
|
||||
cell_nodes.append(cell_node)
|
||||
|
||||
row_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TABLE_ROW',
|
||||
'nodes': cell_nodes,
|
||||
}
|
||||
table_rows.append(row_node)
|
||||
|
||||
num_cols = max(len(row) for row in all_rows) if all_rows else 1
|
||||
return {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TABLE',
|
||||
'nodes': table_rows,
|
||||
'tableData': {
|
||||
'cols': num_cols,
|
||||
'rows': len(table_rows),
|
||||
'headerRow': 0 if header_row else -1,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert markdown content into valid Ricos JSON format.
|
||||
Supports headings, paragraphs, lists, bold, italic, links, and images.
|
||||
|
||||
Supports:
|
||||
- Headings (# to ######)
|
||||
- Paragraphs with inline formatting
|
||||
- Unordered lists (-, *)
|
||||
- Ordered lists (1., 2.)
|
||||
- Blockquotes (>)
|
||||
- Code blocks (```language ... ```)
|
||||
- Inline images ()
|
||||
- Horizontal rules (---, ***, ___)
|
||||
- Tables (| Header | Header |)
|
||||
"""
|
||||
if not content:
|
||||
content = "This is a post from ALwrity."
|
||||
|
||||
nodes = []
|
||||
lines = content.split('\n')
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
if not line:
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
stripped = line.strip()
|
||||
|
||||
if not stripped:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
node_id = str(uuid.uuid4())
|
||||
|
||||
# Check for headings
|
||||
if line.startswith('#'):
|
||||
level = len(line) - len(line.lstrip('#'))
|
||||
heading_text = line.lstrip('# ').strip()
|
||||
# Code blocks (```language ... ```)
|
||||
if stripped.startswith('```'):
|
||||
language = stripped[3:].strip() or ''
|
||||
code_lines = []
|
||||
i += 1
|
||||
while i < len(lines):
|
||||
if lines[i].strip() == '```':
|
||||
i += 1
|
||||
break
|
||||
code_lines.append(lines[i])
|
||||
i += 1
|
||||
code_text = '\n'.join(code_lines)
|
||||
if code_text.strip():
|
||||
nodes.append(_make_code_block_node(code_text, language))
|
||||
continue
|
||||
|
||||
# Horizontal rules
|
||||
if re.match(r'^(---+|\*\*\*|___+)$', stripped):
|
||||
nodes.append(_make_horizontal_rule_node())
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Markdown tables (lines starting with |)
|
||||
if stripped.startswith('|') and i + 1 < len(lines) and '|' in lines[i + 1]:
|
||||
table_rows, alignments, next_idx = _parse_markdown_table(lines, i)
|
||||
if table_rows and len(table_rows) >= 1:
|
||||
header_row = table_rows[0]
|
||||
body_rows = table_rows[1:] if len(table_rows) > 1 else []
|
||||
nodes.append(_make_table_node(header_row, body_rows, alignments))
|
||||
i = next_idx
|
||||
continue
|
||||
|
||||
# Headings
|
||||
if stripped.startswith('#'):
|
||||
level = len(stripped) - len(stripped.lstrip('#'))
|
||||
heading_text = stripped.lstrip('# ').strip()
|
||||
text_nodes = parse_markdown_inline(heading_text)
|
||||
nodes.append({
|
||||
'id': node_id,
|
||||
@@ -243,42 +382,37 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
|
||||
'headingData': {'level': min(level, 6)}
|
||||
})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check for blockquotes
|
||||
elif line.startswith('>'):
|
||||
quote_text = line.lstrip('> ').strip()
|
||||
# Continue reading consecutive blockquote lines
|
||||
quote_lines = [quote_text]
|
||||
# Blockquotes
|
||||
if stripped.startswith('>'):
|
||||
quote_lines = [stripped.lstrip('> ').strip()]
|
||||
i += 1
|
||||
while i < len(lines) and lines[i].strip().startswith('>'):
|
||||
quote_lines.append(lines[i].strip().lstrip('> ').strip())
|
||||
i += 1
|
||||
quote_content = ' '.join(quote_lines)
|
||||
text_nodes = parse_markdown_inline(quote_content)
|
||||
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within BLOCKQUOTE
|
||||
# Wix API: omit empty data objects, don't include them as {}
|
||||
paragraph_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': text_nodes,
|
||||
}
|
||||
blockquote_node = {
|
||||
nodes.append({
|
||||
'id': node_id,
|
||||
'type': 'BLOCKQUOTE',
|
||||
'nodes': [paragraph_node],
|
||||
}
|
||||
nodes.append(blockquote_node)
|
||||
})
|
||||
continue
|
||||
|
||||
# Check for unordered lists (handle both '- ' and '* ' markers)
|
||||
elif (line.startswith('- ') or line.startswith('* ') or
|
||||
(line.startswith('-') and len(line) > 1 and line[1] != '-') or
|
||||
(line.startswith('*') and len(line) > 1 and line[1] != '*')):
|
||||
# Unordered lists (including task lists)
|
||||
if (stripped.startswith('- ') or stripped.startswith('* ') or
|
||||
(stripped.startswith('-') and len(stripped) > 1 and stripped[1] != '-') or
|
||||
(stripped.startswith('*') and len(stripped) > 1 and stripped[1] != '*')):
|
||||
list_items = []
|
||||
list_marker = '- ' if line.startswith('-') else '* '
|
||||
# Process list items
|
||||
|
||||
while i < len(lines):
|
||||
current_line = lines[i].strip()
|
||||
# Check if this is a list item
|
||||
is_list_item = (current_line.startswith('- ') or current_line.startswith('* ') or
|
||||
(current_line.startswith('-') and len(current_line) > 1 and current_line[1] != '-') or
|
||||
(current_line.startswith('*') and len(current_line) > 1 and current_line[1] != '*'))
|
||||
@@ -286,12 +420,9 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
|
||||
if not is_list_item:
|
||||
break
|
||||
|
||||
# Extract item text (handle both '- ' and '-item' formats)
|
||||
if current_line.startswith('- ') or current_line.startswith('* '):
|
||||
item_text = current_line[2:].strip()
|
||||
elif current_line.startswith('-'):
|
||||
item_text = current_line[1:].strip()
|
||||
elif current_line.startswith('*'):
|
||||
elif current_line.startswith('-') or current_line.startswith('*'):
|
||||
item_text = current_line[1:].strip()
|
||||
else:
|
||||
item_text = current_line
|
||||
@@ -302,52 +433,48 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
|
||||
# Check for nested items (indented with 2+ spaces)
|
||||
while i < len(lines):
|
||||
next_line = lines[i]
|
||||
# Must be indented and be a list marker
|
||||
if next_line.startswith(' ') and (next_line.strip().startswith('- ') or
|
||||
next_line.strip().startswith('* ') or
|
||||
(next_line.strip().startswith('-') and len(next_line.strip()) > 1) or
|
||||
(next_line.strip().startswith('*') and len(next_line.strip()) > 1)):
|
||||
if (next_line.startswith(' ') and
|
||||
(next_line.strip().startswith('- ') or next_line.strip().startswith('* '))):
|
||||
nested_text = next_line.strip()
|
||||
if nested_text.startswith('- ') or nested_text.startswith('* '):
|
||||
nested_text = nested_text[2:].strip()
|
||||
elif nested_text.startswith('-'):
|
||||
nested_text = nested_text[1:].strip()
|
||||
elif nested_text.startswith('*'):
|
||||
elif nested_text.startswith('-') or nested_text.startswith('*'):
|
||||
nested_text = nested_text[1:].strip()
|
||||
list_items.append(nested_text)
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
|
||||
# Build list items with proper formatting
|
||||
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within LIST_ITEM
|
||||
# NOTE: LIST_ITEM nodes do NOT have a data field per Wix API schema
|
||||
# Wix API: omit empty data objects, don't include them as {}
|
||||
list_node_items = []
|
||||
for item_text in list_items:
|
||||
item_node_id = str(uuid.uuid4())
|
||||
# Detect task list items: "- [ ] task" or "- [x] task"
|
||||
task_match = re.match(r'^\[([ xX])\]\s*(.*)', item_text)
|
||||
if task_match:
|
||||
checked = task_match.group(1).lower() == 'x'
|
||||
prefix = '☑ ' if checked else '☐ '
|
||||
text_nodes = parse_markdown_inline(prefix + task_match.group(2))
|
||||
else:
|
||||
text_nodes = parse_markdown_inline(item_text)
|
||||
paragraph_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': text_nodes,
|
||||
}
|
||||
list_item_node = {
|
||||
'id': item_node_id,
|
||||
list_node_items.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'LIST_ITEM',
|
||||
'nodes': [paragraph_node]
|
||||
}
|
||||
list_node_items.append(list_item_node)
|
||||
})
|
||||
|
||||
bulleted_list_node = {
|
||||
nodes.append({
|
||||
'id': node_id,
|
||||
'type': 'BULLETED_LIST',
|
||||
'nodes': list_node_items,
|
||||
}
|
||||
nodes.append(bulleted_list_node)
|
||||
})
|
||||
continue
|
||||
|
||||
# Check for ordered lists
|
||||
elif re.match(r'^\d+\.\s+', line):
|
||||
# Ordered lists
|
||||
if re.match(r'^\d+\.\s+', stripped):
|
||||
list_items = []
|
||||
while i < len(lines) and re.match(r'^\d+\.\s+', lines[i].strip()):
|
||||
item_text = re.sub(r'^\d+\.\s+', '', lines[i].strip())
|
||||
@@ -359,35 +486,30 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
|
||||
list_items.append(nested_text)
|
||||
i += 1
|
||||
|
||||
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within LIST_ITEM
|
||||
# NOTE: LIST_ITEM nodes do NOT have a data field per Wix API schema
|
||||
# Wix API: omit empty data objects, don't include them as {}
|
||||
list_node_items = []
|
||||
for item_text in list_items:
|
||||
item_node_id = str(uuid.uuid4())
|
||||
text_nodes = parse_markdown_inline(item_text)
|
||||
paragraph_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': text_nodes,
|
||||
}
|
||||
list_item_node = {
|
||||
'id': item_node_id,
|
||||
list_node_items.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'LIST_ITEM',
|
||||
'nodes': [paragraph_node]
|
||||
}
|
||||
list_node_items.append(list_item_node)
|
||||
})
|
||||
|
||||
ordered_list_node = {
|
||||
nodes.append({
|
||||
'id': node_id,
|
||||
'type': 'ORDERED_LIST',
|
||||
'nodes': list_node_items,
|
||||
}
|
||||
nodes.append(ordered_list_node)
|
||||
})
|
||||
continue
|
||||
|
||||
# Check for images
|
||||
elif line.startswith('!['):
|
||||
img_match = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', line)
|
||||
# Images
|
||||
if stripped.startswith('!['):
|
||||
img_match = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', stripped)
|
||||
if img_match:
|
||||
alt_text = img_match.group(1)
|
||||
img_url = img_match.group(2)
|
||||
@@ -407,11 +529,10 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
|
||||
}
|
||||
})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Regular paragraph
|
||||
else:
|
||||
# Collect consecutive non-empty lines as paragraph content
|
||||
para_lines = [line]
|
||||
para_lines = [stripped]
|
||||
i += 1
|
||||
while i < len(lines):
|
||||
next_line = lines[i].strip()
|
||||
@@ -423,6 +544,9 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
|
||||
next_line.startswith('* ') or
|
||||
next_line.startswith('>') or
|
||||
next_line.startswith('![') or
|
||||
next_line.startswith('```') or
|
||||
next_line.startswith('|') or
|
||||
re.match(r'^(---+|\*\*\*|___+)$', next_line) or
|
||||
re.match(r'^\d+\.\s+', next_line)):
|
||||
break
|
||||
para_lines.append(next_line)
|
||||
@@ -431,38 +555,27 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
|
||||
para_text = ' '.join(para_lines)
|
||||
text_nodes = parse_markdown_inline(para_text)
|
||||
|
||||
# Only add paragraph if there are text nodes
|
||||
if text_nodes:
|
||||
paragraph_node = {
|
||||
nodes.append({
|
||||
'id': node_id,
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': text_nodes,
|
||||
}
|
||||
nodes.append(paragraph_node)
|
||||
})
|
||||
|
||||
# Ensure at least one node exists
|
||||
# Wix API: omit empty data objects, don't include them as {}
|
||||
if not nodes:
|
||||
fallback_paragraph = {
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': [{
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
|
||||
'nodes': [],
|
||||
'textData': {
|
||||
'text': content[:500] if content else "This is a post from ALwrity.",
|
||||
'decorations': []
|
||||
}
|
||||
}],
|
||||
}
|
||||
nodes.append(fallback_paragraph)
|
||||
|
||||
# Per Wix Blog API documentation: richContent should ONLY contain 'nodes'
|
||||
# Do NOT include 'type', 'id', 'metadata', or 'documentStyle' at root level
|
||||
# These fields are for Ricos Document format, but Blog API expects just the nodes structure
|
||||
return {
|
||||
'nodes': nodes
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
return {'nodes': nodes}
|
||||
|
||||
@@ -75,7 +75,10 @@ class WixLogger:
|
||||
logger.debug(f" Payload: {', '.join(parts)}")
|
||||
|
||||
if error_body and status_code >= 400:
|
||||
if isinstance(error_body, dict):
|
||||
error_msg = error_body.get('message', 'Unknown error')
|
||||
else:
|
||||
error_msg = str(error_body)
|
||||
logger.error(f" Error: {error_msg}")
|
||||
if status_code == 500:
|
||||
logger.error(" ⚠️ Internal server error - check Wix API status")
|
||||
|
||||
@@ -1,31 +1,124 @@
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, Optional
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
|
||||
from .retry import wix_api_call_with_retry, WixAPIError
|
||||
|
||||
|
||||
def _is_valid_image_url(url: str) -> bool:
|
||||
"""Check if a URL looks like a valid, publicly accessible image URL for Wix import."""
|
||||
if not url or not isinstance(url, str):
|
||||
return False
|
||||
url = url.strip()
|
||||
if url.startswith('data:'):
|
||||
return False
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in ('http', 'https'):
|
||||
return False
|
||||
host = parsed.hostname or ''
|
||||
if host in ('localhost', '127.0.0.1', 'example.com') or host.endswith('.example.com'):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class WixMediaService:
|
||||
"""Service for Wix Media Manager operations with retry logic and error handling."""
|
||||
|
||||
def __init__(self, base_url: str):
|
||||
self.base_url = base_url
|
||||
|
||||
def import_image(self, access_token: str, image_url: str, display_name: str) -> Dict[str, Any]:
|
||||
def import_image(self, access_token: str, image_url: str, display_name: str,
|
||||
client_id: Optional[str] = None, site_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Import external image to Wix Media Manager.
|
||||
|
||||
Official endpoint: https://www.wixapis.com/site-media/v1/files/import
|
||||
Reference: https://dev.wix.com/docs/rest/assets/media/media-manager/files/import-file
|
||||
|
||||
Args:
|
||||
access_token: Valid access token
|
||||
image_url: URL of the image to import
|
||||
display_name: Display name for the image
|
||||
client_id: Optional Wix client ID for wix-client-id header
|
||||
site_id: Optional Wix metaSiteId for wix-site-id header
|
||||
|
||||
Returns:
|
||||
Media result dict with 'file' key, or None on failure
|
||||
|
||||
Raises:
|
||||
WixAPIError: On non-retryable failure or after retries exhausted
|
||||
"""
|
||||
if not _is_valid_image_url(image_url):
|
||||
logger.warning(f"Skipping image import — URL not valid for Wix: {image_url[:80]}...")
|
||||
return None
|
||||
|
||||
logger.info(f"Importing image to Wix: url={image_url[:80]}..., display_name={display_name}")
|
||||
headers = {
|
||||
'Authorization': f'Bearer {access_token}',
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
if client_id:
|
||||
headers['wix-client-id'] = client_id
|
||||
if not site_id:
|
||||
from .utils import extract_meta_from_token
|
||||
meta_info = extract_meta_from_token(access_token)
|
||||
site_id = meta_info.get('metaSiteId')
|
||||
if site_id:
|
||||
headers['wix-site-id'] = site_id
|
||||
payload = {
|
||||
'url': image_url,
|
||||
'mediaType': 'IMAGE',
|
||||
'displayName': display_name,
|
||||
}
|
||||
# Correct endpoint per Wix API documentation
|
||||
endpoint = f"{self.base_url}/site-media/v1/files/import"
|
||||
response = requests.post(endpoint, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
try:
|
||||
result = wix_api_call_with_retry(
|
||||
'POST', endpoint, headers, json_payload=payload, max_attempts=2
|
||||
)
|
||||
if result and 'file' in result and 'id' in result['file']:
|
||||
logger.info(f"Image imported successfully: {result['file']['id'][:16]}...")
|
||||
return result
|
||||
else:
|
||||
logger.warning(f"Image import returned unexpected structure: {list(result.keys()) if isinstance(result, dict) else type(result)}")
|
||||
return None
|
||||
except WixAPIError as e:
|
||||
if e.status_code == 403:
|
||||
logger.error(f"Image import forbidden (403): OAuth app may lack MEDIA.SITE_MEDIA_FILES_IMPORT scope")
|
||||
elif e.status_code == 400:
|
||||
logger.error(f"Image import bad request (400): {e.response_body}")
|
||||
elif e.status_code == 404:
|
||||
logger.error(f"Image import endpoint not found (404) — Wix Media API may not be available for this site")
|
||||
else:
|
||||
logger.error(f"Image import failed after retries: HTTP {e.status_code} - {e.response_body}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error importing image: {e}")
|
||||
raise
|
||||
|
||||
def get_image_url(self, access_token: str, media_id: str) -> Optional[str]:
|
||||
"""
|
||||
Get public URL for a Wix media item.
|
||||
|
||||
Args:
|
||||
access_token: Valid access token
|
||||
media_id: Wix media ID
|
||||
|
||||
Returns:
|
||||
Public URL string, or None
|
||||
"""
|
||||
url = f"{self.base_url}/site-media/v1/files/{media_id}"
|
||||
headers = {
|
||||
'Authorization': f'Bearer {access_token}',
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
||||
try:
|
||||
result = wix_api_call_with_retry('GET', url, headers, max_attempts=2)
|
||||
if result and 'file' in result:
|
||||
return result['file'].get('url')
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get image URL for {media_id}: {e}")
|
||||
return None
|
||||
|
||||
168
backend/services/integrations/wix/retry.py
Normal file
168
backend/services/integrations/wix/retry.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""
|
||||
Retry utilities for Wix API calls with exponential backoff.
|
||||
|
||||
Production-grade retry logic that respects Wix rate limits and handles
|
||||
transient failures gracefully.
|
||||
"""
|
||||
|
||||
import time
|
||||
import random
|
||||
from typing import Callable, TypeVar, Optional
|
||||
from loguru import logger
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
class WixAPIError(Exception):
|
||||
"""Custom exception for Wix API errors with status code context."""
|
||||
|
||||
def __init__(self, message: str, status_code: Optional[int] = None, response_body: Optional[str] = None):
|
||||
super().__init__(message)
|
||||
self.status_code = status_code
|
||||
self.response_body = response_body
|
||||
|
||||
def is_retryable(self) -> bool:
|
||||
"""Determine if this error is retryable based on status code."""
|
||||
if self.status_code is None:
|
||||
return True # Network errors are retryable
|
||||
# 429 = rate limit, 502/503/504 = gateway errors, 500 = internal server error (sometimes transient)
|
||||
return self.status_code in (429, 500, 502, 503, 504)
|
||||
|
||||
def is_rate_limit(self) -> bool:
|
||||
"""Check if this is a rate limit error."""
|
||||
return self.status_code == 429
|
||||
|
||||
|
||||
def with_retry(
|
||||
fn: Callable[[], T],
|
||||
max_attempts: int = 3,
|
||||
base_delay: float = 1.0,
|
||||
max_delay: float = 30.0,
|
||||
retryable_exceptions: tuple = (Exception,),
|
||||
operation_name: str = "Wix API call"
|
||||
) -> T:
|
||||
"""
|
||||
Execute a function with exponential backoff retry logic.
|
||||
|
||||
Args:
|
||||
fn: Function to execute (should make the API call)
|
||||
max_attempts: Maximum number of attempts (default: 3)
|
||||
base_delay: Initial delay in seconds (default: 1.0)
|
||||
max_delay: Maximum delay in seconds (default: 30.0)
|
||||
retryable_exceptions: Tuple of exception types to retry on
|
||||
operation_name: Name for logging
|
||||
|
||||
Returns:
|
||||
Result of fn()
|
||||
|
||||
Raises:
|
||||
WixAPIError: If all retries are exhausted
|
||||
Exception: If a non-retryable exception occurs
|
||||
"""
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(1, max_attempts + 1):
|
||||
try:
|
||||
return fn()
|
||||
except WixAPIError as e:
|
||||
last_exception = e
|
||||
if attempt >= max_attempts:
|
||||
break
|
||||
if not e.is_retryable():
|
||||
logger.warning(f"{operation_name}: non-retryable error (HTTP {e.status_code}), failing fast")
|
||||
raise
|
||||
|
||||
# Calculate delay with exponential backoff and jitter
|
||||
delay = min(base_delay * (2 ** (attempt - 1)), max_delay)
|
||||
# Add jitter (±25%) to prevent thundering herd
|
||||
jitter = delay * 0.25
|
||||
actual_delay = delay + random.uniform(-jitter, jitter)
|
||||
actual_delay = max(0.1, actual_delay) # Minimum 100ms delay
|
||||
|
||||
if e.is_rate_limit():
|
||||
# For rate limits, use a longer base delay
|
||||
actual_delay = max(actual_delay, 2.0)
|
||||
logger.warning(f"{operation_name}: rate limited (429), waiting {actual_delay:.1f}s before retry {attempt + 1}/{max_attempts}")
|
||||
else:
|
||||
logger.warning(f"{operation_name}: attempt {attempt}/{max_attempts} failed (HTTP {e.status_code}), waiting {actual_delay:.1f}s before retry")
|
||||
|
||||
time.sleep(actual_delay)
|
||||
|
||||
except retryable_exceptions as e:
|
||||
last_exception = e
|
||||
if attempt >= max_attempts:
|
||||
break
|
||||
|
||||
delay = min(base_delay * (2 ** (attempt - 1)), max_delay)
|
||||
jitter = delay * 0.25
|
||||
actual_delay = delay + random.uniform(-jitter, jitter)
|
||||
actual_delay = max(0.1, actual_delay)
|
||||
|
||||
logger.warning(f"{operation_name}: attempt {attempt}/{max_attempts} failed ({type(e).__name__}), waiting {actual_delay:.1f}s before retry")
|
||||
time.sleep(actual_delay)
|
||||
|
||||
# All retries exhausted
|
||||
if last_exception:
|
||||
if isinstance(last_exception, WixAPIError):
|
||||
raise last_exception
|
||||
raise WixAPIError(f"{operation_name}: failed after {max_attempts} attempts: {last_exception}")
|
||||
|
||||
raise WixAPIError(f"{operation_name}: failed after {max_attempts} attempts")
|
||||
|
||||
|
||||
def wix_api_call_with_retry(
|
||||
method: str,
|
||||
url: str,
|
||||
headers: dict,
|
||||
json_payload: Optional[dict] = None,
|
||||
max_attempts: int = 3
|
||||
) -> dict:
|
||||
"""
|
||||
Convenience wrapper for making Wix API calls with retry logic.
|
||||
|
||||
Args:
|
||||
method: HTTP method ('GET', 'POST', etc.)
|
||||
url: Full API URL
|
||||
headers: Request headers
|
||||
json_payload: Optional JSON payload for POST/PUT
|
||||
max_attempts: Maximum retry attempts
|
||||
|
||||
Returns:
|
||||
Parsed JSON response
|
||||
|
||||
Raises:
|
||||
WixAPIError: On failure after retries
|
||||
"""
|
||||
import requests
|
||||
|
||||
def _call():
|
||||
if method.upper() == 'GET':
|
||||
resp = requests.get(url, headers=headers, timeout=30)
|
||||
elif method.upper() == 'POST':
|
||||
resp = requests.post(url, headers=headers, json=json_payload, timeout=30)
|
||||
elif method.upper() == 'PUT':
|
||||
resp = requests.put(url, headers=headers, json=json_payload, timeout=30)
|
||||
elif method.upper() == 'DELETE':
|
||||
resp = requests.delete(url, headers=headers, timeout=30)
|
||||
else:
|
||||
raise ValueError(f"Unsupported HTTP method: {method}")
|
||||
|
||||
if resp.status_code >= 400:
|
||||
body = None
|
||||
try:
|
||||
body = resp.text[:500]
|
||||
except:
|
||||
body = str(resp.content)[:500]
|
||||
raise WixAPIError(
|
||||
f"Wix API {method} {url} failed: HTTP {resp.status_code}",
|
||||
status_code=resp.status_code,
|
||||
response_body=body
|
||||
)
|
||||
|
||||
return resp.json()
|
||||
|
||||
return with_retry(
|
||||
_call,
|
||||
max_attempts=max_attempts,
|
||||
operation_name=f"Wix {method} {url.split('/')[-1]}"
|
||||
)
|
||||
@@ -26,10 +26,6 @@ def build_seo_data(seo_metadata: Dict[str, Any], default_title: str = None) -> O
|
||||
Wix seoData object with settings.keywords and tags array, or None if empty
|
||||
"""
|
||||
seo_data = {
|
||||
'settings': {
|
||||
'keywords': [],
|
||||
'preventAutoRedirect': False # Required by Wix API schema
|
||||
},
|
||||
'tags': []
|
||||
}
|
||||
|
||||
@@ -77,11 +73,7 @@ def build_seo_data(seo_metadata: Dict[str, Any], default_title: str = None) -> O
|
||||
# Keep main keyword + next 4 most important
|
||||
keywords_list = keywords_list[:5]
|
||||
|
||||
seo_data['settings']['keywords'] = keywords_list
|
||||
|
||||
# Validate keywords list is not empty (or ensure at least one keyword exists)
|
||||
if not seo_data['settings']['keywords']:
|
||||
logger.warning("No keywords found in SEO metadata, adding empty keywords array")
|
||||
seo_data['settings'] = {'keywords': keywords_list}
|
||||
|
||||
# Build tags array (meta tags, Open Graph, etc.)
|
||||
tags_list = []
|
||||
|
||||
@@ -85,24 +85,45 @@ def decode_wix_token(access_token: str) -> Dict[str, Any]:
|
||||
if token_str.startswith('OauthNG.JWS.'):
|
||||
jwt_part = token_str[12:]
|
||||
return jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
|
||||
if token_str.startswith('IST.'):
|
||||
jwt_part = token_str[4:]
|
||||
return jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
|
||||
return jwt.decode(token_str, options={"verify_signature": False, "verify_aud": False})
|
||||
|
||||
|
||||
def extract_meta_from_token(access_token: str) -> Dict[str, Optional[str]]:
|
||||
try:
|
||||
payload = decode_wix_token(access_token)
|
||||
def _extract_data_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
data_payload = payload.get('data', {})
|
||||
if isinstance(data_payload, str):
|
||||
try:
|
||||
data_payload = json.loads(data_payload)
|
||||
except Exception:
|
||||
pass
|
||||
instance = (data_payload or {}).get('instance', {})
|
||||
return {
|
||||
data_payload = {}
|
||||
return data_payload if isinstance(data_payload, dict) else {}
|
||||
|
||||
|
||||
def extract_meta_from_token(access_token: str) -> Dict[str, Optional[str]]:
|
||||
try:
|
||||
payload = decode_wix_token(access_token)
|
||||
data_payload = _extract_data_payload(payload)
|
||||
instance = (data_payload or {}).get('instance', {}) or {}
|
||||
result = {
|
||||
'siteMemberId': instance.get('siteMemberId'),
|
||||
'metaSiteId': instance.get('metaSiteId'),
|
||||
'permissions': instance.get('permissions'),
|
||||
}
|
||||
# Only fall back to tenant.id for OAuth tokens (not IST. API keys)
|
||||
# IST. tokens have tenant.id = account_id, which is NOT the site metaSiteId
|
||||
token_str = str(access_token)
|
||||
if not result.get('metaSiteId') and not token_str.startswith('IST.'):
|
||||
tenant = data_payload.get('tenant', {}) or {}
|
||||
tenant_id = tenant.get('id')
|
||||
if tenant_id:
|
||||
result['metaSiteId'] = tenant_id
|
||||
if not result.get('metaSiteId'):
|
||||
meta_site_id = payload.get('metaSiteId') or payload.get('site_id')
|
||||
if meta_site_id:
|
||||
result['metaSiteId'] = meta_site_id
|
||||
return result
|
||||
except Exception:
|
||||
return {'siteMemberId': None, 'metaSiteId': None, 'permissions': None}
|
||||
|
||||
|
||||
@@ -86,185 +86,6 @@ class StrategyArchitectAgent(SIFBaseAgent):
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
class ContentGuardianAgent(SIFBaseAgent):
|
||||
"""Agent for preventing cannibalization and ensuring content originality."""
|
||||
|
||||
CANNIBALIZATION_THRESHOLD = 0.85 # Similarity threshold for cannibalization warning
|
||||
ORIGINALITY_THRESHOLD = 0.75 # Minimum originality score
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
|
||||
super().__init__(intelligence_service)
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
|
||||
"""Check if a new draft competes semantically with existing pages."""
|
||||
self._log_agent_operation("Checking for semantic cannibalization", draft_length=len(new_draft))
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return {"warning": False, "error": "Service not initialized"}
|
||||
|
||||
if not new_draft or len(new_draft.strip()) < 50:
|
||||
logger.warning(f"[{self.__class__.__name__}] Draft too short for meaningful analysis")
|
||||
return {"warning": False, "reason": "Draft too short"}
|
||||
|
||||
results = await self.intelligence.search(new_draft, limit=1)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{self.__class__.__name__}] No similar content found - draft is unique")
|
||||
return {"warning": False, "uniqueness_score": 1.0}
|
||||
|
||||
top_result = results[0]
|
||||
similarity_score = top_result.get('score', 0.0)
|
||||
|
||||
logger.debug(f"[{self.__class__.__name__}] Top similarity score: {similarity_score:.4f}")
|
||||
|
||||
if similarity_score > self.CANNIBALIZATION_THRESHOLD:
|
||||
warning_data = {
|
||||
"warning": True,
|
||||
"similar_to": top_result.get('id', 'unknown'),
|
||||
"score": similarity_score,
|
||||
"threshold": self.CANNIBALIZATION_THRESHOLD,
|
||||
"recommendation": "Consider revising the draft to target a different angle or merge with existing content"
|
||||
}
|
||||
logger.warning(f"[{self.__class__.__name__}] Cannibalization detected: {warning_data}")
|
||||
return warning_data
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] No cannibalization detected. Draft is sufficiently unique.")
|
||||
return {"warning": False, "uniqueness_score": 1.0 - similarity_score}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to check cannibalization: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return {"warning": False, "error": str(e)}
|
||||
|
||||
async def verify_originality(self, text: str, competitor_index: Any) -> Dict[str, Any]:
|
||||
"""Verify originality against competitor content index."""
|
||||
self._log_agent_operation("Verifying originality against competitors", text_length=len(text))
|
||||
|
||||
try:
|
||||
if not text or len(text.strip()) < 50:
|
||||
logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
|
||||
return {"originality_score": 0.0, "reason": "Text too short"}
|
||||
|
||||
# STUB: Implement cross-index search against competitor content
|
||||
# This would search the text against a competitor-specific index
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Originality verification stub completed")
|
||||
return {
|
||||
"originality_score": 0.95, # Placeholder
|
||||
"confidence": 0.8,
|
||||
"method": "semantic_comparison",
|
||||
"notes": "Competitor index integration pending"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to verify originality: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return {"originality_score": 0.0, "error": str(e)}
|
||||
|
||||
async def style_enforcer(self, text: str, style_guidelines: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Ensures content adheres to brand voice and style guidelines.
|
||||
"""
|
||||
self._log_agent_operation("Enforcing style guidelines", text_length=len(text))
|
||||
|
||||
try:
|
||||
if not text:
|
||||
return {"compliance_score": 0.0, "issues": ["No text provided"]}
|
||||
|
||||
# 1. Fetch Style Guidelines from SIF if not provided
|
||||
if not style_guidelines and self.sif_service:
|
||||
try:
|
||||
# Search for website analysis to get brand voice/style
|
||||
# We assume the most relevant 'website_analysis' doc contains the guidelines
|
||||
results = await self.intelligence.search("website analysis brand voice style", limit=1)
|
||||
if results:
|
||||
import json
|
||||
res = results[0]
|
||||
metadata_str = res.get('object')
|
||||
metadata = json.loads(metadata_str) if isinstance(metadata_str, str) else (metadata_str or res)
|
||||
|
||||
if metadata.get('type') == 'website_analysis':
|
||||
report = metadata.get('full_report', {})
|
||||
style_guidelines = {
|
||||
"tone": report.get('brand_analysis', {}).get('brand_voice', 'neutral'),
|
||||
"style_patterns": report.get('style_patterns', {}),
|
||||
"writing_style": report.get('writing_style', {})
|
||||
}
|
||||
logger.info(f"[{self.__class__.__name__}] Retrieved style guidelines from SIF: {style_guidelines.get('tone')}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.__class__.__name__}] Failed to retrieve style guidelines from SIF: {e}")
|
||||
|
||||
issues = []
|
||||
score = 1.0
|
||||
|
||||
# Basic Heuristic Checks (Placeholder for LLM-based style analysis)
|
||||
|
||||
# 1. Tone Check (e.g., formal vs casual)
|
||||
# If guidelines specify 'formal', check for contractions
|
||||
tone = style_guidelines.get('tone', '').lower() if style_guidelines else ''
|
||||
if 'formal' in tone or 'professional' in tone:
|
||||
contractions = ["can't", "won't", "don't", "it's"]
|
||||
found_contractions = [c for c in contractions if c in text.lower()]
|
||||
if found_contractions:
|
||||
issues.append(f"Found contractions in formal text: {', '.join(found_contractions[:3])}...")
|
||||
score -= 0.1
|
||||
|
||||
# 2. Length/Sentence Structure (simple metric)
|
||||
sentences = text.split('.')
|
||||
avg_len = sum(len(s.split()) for s in sentences if s) / max(1, len(sentences))
|
||||
if avg_len > 25:
|
||||
issues.append("Average sentence length is too high (>25 words). Consider shortening.")
|
||||
score -= 0.1
|
||||
|
||||
return {
|
||||
"compliance_score": max(0.0, score),
|
||||
"issues": issues,
|
||||
"is_compliant": score > 0.8,
|
||||
"guidelines_source": "sif_index" if not style_guidelines and self.sif_service else "provided"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Style enforcement failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def safety_filter(self, text: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Flags potentially harmful, offensive, or sensitive content.
|
||||
"""
|
||||
self._log_agent_operation("Running safety filter", text_length=len(text))
|
||||
|
||||
try:
|
||||
# Basic Keyword Blocklist (Placeholder for LLM/Safety Model)
|
||||
# In production, this should call a dedicated safety API (e.g., OpenAI Moderation, Llama Guard)
|
||||
unsafe_keywords = [
|
||||
"hate", "kill", "murder", "attack", "destroy", # Violent
|
||||
"scam", "fraud", "steal", # Illegal
|
||||
"explicit", "adult" # NSFW
|
||||
]
|
||||
|
||||
found_flags = []
|
||||
text_lower = text.lower()
|
||||
|
||||
for keyword in unsafe_keywords:
|
||||
if f" {keyword} " in text_lower: # Simple word boundary check
|
||||
found_flags.append(keyword)
|
||||
|
||||
is_safe = len(found_flags) == 0
|
||||
|
||||
return {
|
||||
"is_safe": is_safe,
|
||||
"flags": found_flags,
|
||||
"safety_score": 1.0 if is_safe else 0.0,
|
||||
"action": "approve" if is_safe else "flag_for_review"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Safety filter failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
class LinkGraphAgent(SIFBaseAgent):
|
||||
"""
|
||||
Agent for internal link suggestions, graph management, and authority analysis.
|
||||
|
||||
@@ -40,6 +40,7 @@ from .specialized_agents import (
|
||||
)
|
||||
|
||||
from .trend_surfer_agent import TrendSurferAgent
|
||||
from .content_gap_radar_agent import ContentGapRadarAgent
|
||||
|
||||
# Agent Orchestrator
|
||||
from .agent_orchestrator import (
|
||||
@@ -67,6 +68,7 @@ __all__ = [
|
||||
'SEOOptimizationAgent',
|
||||
'SocialAmplificationAgent',
|
||||
'TrendSurferAgent',
|
||||
'ContentGapRadarAgent',
|
||||
'ALwrityAgentOrchestrator',
|
||||
'orchestration_service'
|
||||
]
|
||||
|
||||
@@ -230,7 +230,7 @@ class ALwrityAgentOrchestrator:
|
||||
# Content Guardian Agent
|
||||
if enabled_by_key.get("content_guardian", True):
|
||||
try:
|
||||
from services.intelligence.sif_agents import ContentGuardianAgent
|
||||
from services.intelligence.agents.specialized.content_guardian import ContentGuardianAgent
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
|
||||
# Initialize intelligence service if not already available
|
||||
@@ -248,6 +248,19 @@ class ALwrityAgentOrchestrator:
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize ContentGuardianAgent: {e}")
|
||||
|
||||
# Content Gap Radar Agent
|
||||
if enabled_by_key.get("content_gap_radar", True):
|
||||
try:
|
||||
from services.intelligence.agents import ContentGapRadarAgent
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
intel_service = TxtaiIntelligenceService(self.user_id)
|
||||
self.content_gap_radar_agent = ContentGapRadarAgent(intel_service, self.user_id)
|
||||
self.agents['content_gap_radar'] = self.content_gap_radar_agent
|
||||
initialized_agents.append("Content Gap Radar")
|
||||
logger.info(f"Initialized ContentGapRadarAgent for user {self.user_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize ContentGapRadarAgent: {e}")
|
||||
|
||||
logger.info(f"Created {len(self.agents)} specialized agents for user {self.user_id}")
|
||||
|
||||
# Log initialization activity
|
||||
@@ -449,7 +462,8 @@ class ALwrityAgentOrchestrator:
|
||||
"competitor": ["Competitor monitoring", "Threat analysis", "Response generation", "Strategy execution"],
|
||||
"seo": ["SEO auditing", "Issue prioritization", "Auto-fixing", "Strategy generation"],
|
||||
"social": ["Social monitoring", "Content adaptation", "Engagement optimization", "Distribution management"],
|
||||
"trend": ["Trend detection", "Opportunity analysis", "Content angle generation"]
|
||||
"trend": ["Trend detection", "Opportunity analysis", "Content angle generation"],
|
||||
"content_gap_radar": ["Content gap detection", "SERP opportunity scoring", "Competitor content deep-dive", "ROI-based topic prioritization", "Content brief generation"]
|
||||
}
|
||||
|
||||
# Service class for agent orchestration
|
||||
|
||||
466
backend/services/intelligence/agents/content_gap_radar_agent.py
Normal file
466
backend/services/intelligence/agents/content_gap_radar_agent.py
Normal file
@@ -0,0 +1,466 @@
|
||||
"""
|
||||
Content Gap Radar Agent
|
||||
|
||||
Scores and prioritizes content opportunities by combining SIF semantic gap analysis,
|
||||
SERP ranking presence (Google CSE), competitor content deep-dive (Exa), and trend
|
||||
momentum into a single ROI score per topic.
|
||||
|
||||
Phase 3 of the Content Gap Radar feature.
|
||||
"""
|
||||
|
||||
import traceback
|
||||
from typing import List, Dict, Any, Optional
|
||||
from loguru import logger
|
||||
|
||||
from services.intelligence.agents.specialized import SIFBaseAgent
|
||||
from services.intelligence.agents.specialized.strategy_architect import StrategyArchitectAgent
|
||||
from services.intelligence.agents.trend_surfer_agent import TrendSurferAgent
|
||||
from services.intelligence.agents.core_agent_framework import TaskProposal
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
from services.seo_tools.serp_gap_service import SerpGapService
|
||||
from services.seo_tools.competitor_content_service import CompetitorContentService
|
||||
|
||||
|
||||
class ContentGapRadarAgent(SIFBaseAgent):
|
||||
"""
|
||||
Agent that scores and prioritizes content opportunities by combining
|
||||
SIF semantic gap analysis, SERP ranking presence, Exa competitor content,
|
||||
and trend momentum into a single ROI score.
|
||||
"""
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
|
||||
super().__init__(intelligence_service, user_id, agent_type="content_gap_radar", **kwargs)
|
||||
self.user_id = user_id
|
||||
self.serp_service = SerpGapService()
|
||||
self.competitor_content_service = CompetitorContentService()
|
||||
self.strategy_architect = StrategyArchitectAgent(intelligence_service, user_id)
|
||||
|
||||
async def analyze(
|
||||
self,
|
||||
competitor_domains: List[str],
|
||||
competitor_indices: Optional[List[Any]] = None,
|
||||
topics: Optional[List[str]] = None,
|
||||
bypass_cache: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Full content gap radar pipeline.
|
||||
|
||||
1. Get topic-level gaps from SIF semantic analysis
|
||||
2. Get SERP ranking data per topic
|
||||
3. Get Exa competitor content for top topics
|
||||
4. Get trend momentum data
|
||||
5. Score each topic with ROI formula
|
||||
6. Return prioritized results
|
||||
|
||||
Args:
|
||||
competitor_domains: Known competitor domains
|
||||
competitor_indices: SIF index positions for competitor docs
|
||||
topics: Optional explicit topic list (derived from SIF if omitted)
|
||||
bypass_cache: Force fresh API calls
|
||||
|
||||
Returns:
|
||||
Dict with scored gaps list and summary.
|
||||
"""
|
||||
self._log_agent_operation(
|
||||
"Running content gap radar",
|
||||
competitor_count=len(competitor_domains),
|
||||
topics_provided=bool(topics),
|
||||
)
|
||||
|
||||
try:
|
||||
sif_gaps = []
|
||||
|
||||
# Step 1: Derive topics from SIF semantic gaps if not provided
|
||||
if not topics:
|
||||
sif_gaps = await self.strategy_architect.find_semantic_gaps(
|
||||
competitor_indices or []
|
||||
)
|
||||
topics = [g["topic"] for g in sif_gaps[:12]]
|
||||
logger.info(
|
||||
f"[{self.__class__.__name__}] Derived {len(topics)} topics from SIF gaps"
|
||||
)
|
||||
|
||||
if not topics:
|
||||
logger.info(f"[{self.__class__.__name__}] No topics to analyze")
|
||||
return {"gaps": [], "summary": {}}
|
||||
|
||||
# If we got sif_gaps externally but topics were provided, fetch SIF data anyway
|
||||
if not sif_gaps:
|
||||
try:
|
||||
sif_gaps = await self.strategy_architect.find_semantic_gaps(
|
||||
competitor_indices or []
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[{self.__class__.__name__}] SIF gap fetch failed (non-fatal): {e}"
|
||||
)
|
||||
sif_gaps = []
|
||||
|
||||
# Build lookup maps for cross-referencing
|
||||
sif_map = {g["topic"]: g for g in sif_gaps}
|
||||
|
||||
# Step 2: SERP gap analysis
|
||||
serp_data = await self.serp_service.analyze_topic_gaps(
|
||||
topics, competitor_domains, bypass_cache=bypass_cache
|
||||
)
|
||||
serp_map = {}
|
||||
for g in serp_data.get("gaps", []):
|
||||
serp_map[g["topic"]] = g
|
||||
|
||||
# Step 3: Exa deep-dive (top 6 topics — paid API)
|
||||
exa_data = await self.competitor_content_service.deep_dive(
|
||||
topics[:6], competitor_domains, bypass_cache=bypass_cache
|
||||
)
|
||||
exa_map = {}
|
||||
for r in exa_data.get("results", []):
|
||||
exa_map[r["topic"]] = r
|
||||
|
||||
# Step 4: Trend momentum data
|
||||
trend_surfer = TrendSurferAgent(
|
||||
self.intelligence, self.user_id
|
||||
)
|
||||
trend_signals = await trend_surfer.surf_trends()
|
||||
|
||||
# Step 5: Score each topic
|
||||
scored = []
|
||||
for topic in topics:
|
||||
scored.append(
|
||||
self._score_topic(
|
||||
topic=topic,
|
||||
sif_map=sif_map,
|
||||
serp_map=serp_map,
|
||||
exa_map=exa_map,
|
||||
trend_signals=trend_signals,
|
||||
)
|
||||
)
|
||||
|
||||
scored.sort(key=lambda x: x["roi_score"], reverse=True)
|
||||
|
||||
# Step 6: Summary
|
||||
high = [g for g in scored if g["priority"] == "high"]
|
||||
medium = [g for g in scored if g["priority"] == "medium"]
|
||||
low = [g for g in scored if g["priority"] == "low"]
|
||||
|
||||
logger.info(
|
||||
f"[{self.__class__.__name__}] Scored {len(scored)} gaps: "
|
||||
f"{len(high)} high, {len(medium)} medium, {len(low)} low"
|
||||
)
|
||||
|
||||
return {
|
||||
"gaps": scored,
|
||||
"summary": {
|
||||
"total_topics_analyzed": len(topics),
|
||||
"high_priority": len(high),
|
||||
"medium_priority": len(medium),
|
||||
"low_priority": len(low),
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[{self.__class__.__name__}] Content gap radar failed: {e}"
|
||||
)
|
||||
logger.error(
|
||||
f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}"
|
||||
)
|
||||
return {"gaps": [], "summary": {}, "error": str(e)}
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""
|
||||
Propose high-ROI content tasks from gap radar analysis.
|
||||
Integrates with Today's Workflow agent committee polling.
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
onboarding = context.get("onboarding_data", {})
|
||||
competitor_focus = onboarding.get("competitor_focus", {})
|
||||
competitor_domains = competitor_focus.get("top_competitor_domains", [])
|
||||
|
||||
if not competitor_domains:
|
||||
logger.info(f"[{self.__class__.__name__}] No competitor domains in context, skipping")
|
||||
return proposals
|
||||
|
||||
try:
|
||||
result = await self.analyze(
|
||||
competitor_domains=competitor_domains,
|
||||
competitor_indices=[],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] propose_daily_tasks failed: {e}")
|
||||
return proposals
|
||||
|
||||
gaps = result.get("gaps", [])
|
||||
scored = [g for g in gaps if g["priority"] in ("high", "medium")]
|
||||
scored.sort(key=lambda x: x["roi_score"], reverse=True)
|
||||
|
||||
for gap in scored[:3]:
|
||||
pillar_id = self._action_to_pillar(gap["recommended_action"])
|
||||
action_url = (
|
||||
"/blog-writer"
|
||||
if pillar_id == "generate"
|
||||
else "/seo-dashboard#content-gap-radar"
|
||||
)
|
||||
proposals.append(TaskProposal(
|
||||
title=f"Write about: {gap['topic']}",
|
||||
description=gap["recommended_action"],
|
||||
pillar_id=pillar_id,
|
||||
priority=gap["priority"],
|
||||
estimated_time=60 if pillar_id == "generate" else 30,
|
||||
source_agent="ContentGapRadarAgent",
|
||||
reasoning=(
|
||||
f"Content gap with {gap['scoring']['gap_size']:.0%} gap size, "
|
||||
f"{gap['scoring']['volume']:.0%} volume, "
|
||||
f"{gap['scoring']['trend']:.0%} trend momentum, "
|
||||
f"ROI {gap['roi_score']:.0%}"
|
||||
),
|
||||
action_type="navigate",
|
||||
action_url=action_url,
|
||||
context_data={"gap": gap},
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@staticmethod
|
||||
def _action_to_pillar(recommended_action: str) -> str:
|
||||
action_lower = recommended_action.lower()
|
||||
if "optimize" in action_lower:
|
||||
return "analyze"
|
||||
return "generate"
|
||||
|
||||
def _score_topic(
|
||||
self,
|
||||
topic: str,
|
||||
sif_map: Dict[str, Any],
|
||||
serp_map: Dict[str, Any],
|
||||
exa_map: Dict[str, Any],
|
||||
trend_signals: List[Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""Score a single topic with the ROI formula."""
|
||||
# gap_size: from SIF coverage_delta
|
||||
sif = sif_map.get(topic, {})
|
||||
gap_size = sif.get("coverage_delta", 0.5)
|
||||
|
||||
# volume: from SERP gap — competitors ranking for this topic
|
||||
serp = serp_map.get(topic, {})
|
||||
comp_count = serp.get("competitor_count", 0)
|
||||
total_domains = serp.get("total_domains_checked", 1)
|
||||
volume = min(comp_count / max(total_domains, 1), 1.0)
|
||||
|
||||
# trend: match topic against TrendSurfer signals
|
||||
trend_score = self._match_trend_score(topic, trend_signals)
|
||||
|
||||
# intent: classify topic commercial value
|
||||
intent = self._classify_intent(topic)
|
||||
|
||||
# competition: Exa content depth as penalty
|
||||
exa = exa_map.get(topic, {})
|
||||
content_count = exa.get("total_results", 0)
|
||||
competition = min(content_count / 10.0, 1.0)
|
||||
|
||||
# ROI = (gap_size × volume × trend × intent) × (1 - 0.3 × competition)
|
||||
base_roi = gap_size * volume * trend_score * intent
|
||||
roi = base_roi * (1 - 0.3 * competition)
|
||||
|
||||
# Priority thresholds
|
||||
if roi >= 0.6:
|
||||
priority = "high"
|
||||
elif roi >= 0.3:
|
||||
priority = "medium"
|
||||
else:
|
||||
priority = "low"
|
||||
|
||||
# Recommended action based on scoring profile
|
||||
action = self._recommend_action(gap_size, competition, intent)
|
||||
|
||||
return {
|
||||
"topic": topic,
|
||||
"roi_score": round(roi, 3),
|
||||
"priority": priority,
|
||||
"recommended_action": action,
|
||||
"scoring": {
|
||||
"gap_size": round(gap_size, 3),
|
||||
"volume": round(volume, 3),
|
||||
"trend": round(trend_score, 3),
|
||||
"intent": round(intent, 3),
|
||||
"competition": round(competition, 3),
|
||||
},
|
||||
"sif_gap": sif if sif else None,
|
||||
"serp_evidence": {
|
||||
"competitors_found": serp.get("competitors_found", []),
|
||||
"competitor_count": comp_count,
|
||||
"domains_with_content": serp.get("domains_with_content", []),
|
||||
} if serp else None,
|
||||
"competitor_content": exa if exa else None,
|
||||
}
|
||||
|
||||
def _match_trend_score(self, topic: str, signals: List[Dict[str, Any]]) -> float:
|
||||
if not signals:
|
||||
return 0.5
|
||||
|
||||
topic_lower = topic.lower()
|
||||
topic_words = set(topic_lower.split())
|
||||
|
||||
best_score = 0.0
|
||||
for signal in signals:
|
||||
impact = signal.get("impact_score", 0.5)
|
||||
text_fields = " ".join(filter(None, [
|
||||
signal.get("topic", ""),
|
||||
signal.get("headline", ""),
|
||||
signal.get("suggested_angle", ""),
|
||||
]))
|
||||
text_lower = text_fields.lower()
|
||||
|
||||
if topic_lower in text_lower:
|
||||
best_score = max(best_score, impact)
|
||||
|
||||
text_words = set(text_lower.split())
|
||||
overlap = len(topic_words & text_words)
|
||||
if overlap > 0:
|
||||
word_score = (overlap / max(len(topic_words), 1)) * impact
|
||||
best_score = max(best_score, word_score)
|
||||
|
||||
return max(best_score, 0.5)
|
||||
|
||||
def _classify_intent(self, topic: str) -> float:
|
||||
"""
|
||||
Classify topic intent using LLM with keyword fallback.
|
||||
Returns intent score 0.0-1.0.
|
||||
"""
|
||||
topic_lower = topic.lower()
|
||||
|
||||
# Keyword-based heuristics
|
||||
commercial_words = [
|
||||
"best", "top", "review", "vs", "comparison", "alternative",
|
||||
"vs.", "versus", "pricing", "cost", "price", "cheap",
|
||||
"affordable", "discount", "coupon", "deal", "buy",
|
||||
]
|
||||
transactional_words = [
|
||||
"buy", "purchase", "order", "subscribe", "sign up",
|
||||
"download", "get started", "free trial", "demo",
|
||||
]
|
||||
|
||||
has_commercial = any(w in topic_lower for w in commercial_words)
|
||||
has_transactional = any(w in topic_lower for w in transactional_words)
|
||||
|
||||
if has_transactional:
|
||||
return 0.9
|
||||
if has_commercial:
|
||||
return 0.7
|
||||
return 0.4 # Informational default
|
||||
|
||||
def _recommend_action(
|
||||
self, gap_size: float, competition: float, intent: float
|
||||
) -> str:
|
||||
"""Generate a recommended action based on scoring profile."""
|
||||
if gap_size > 0.7 and competition < 0.3:
|
||||
return "Create comprehensive pillar page — large gap, low competition"
|
||||
elif gap_size > 0.5 and intent > 0.6:
|
||||
return "Create high-conversion content — significant gap, strong intent"
|
||||
elif competition > 0.7:
|
||||
return "Create differentiated content — high competition requires unique angle"
|
||||
elif gap_size < 0.3:
|
||||
return "Optimize existing content — incremental gap, update current pages"
|
||||
else:
|
||||
return "Create targeted blog post — moderate opportunity"
|
||||
|
||||
async def generate_content_brief(
|
||||
self,
|
||||
topic: str,
|
||||
recommended_action: str,
|
||||
scoring: Optional[Dict[str, float]] = None,
|
||||
serp_evidence: Optional[Dict[str, Any]] = None,
|
||||
sif_gap: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a structured content brief from a gap item.
|
||||
Uses LLM to produce title options, outline sections, target keywords,
|
||||
and a writing angle. Falls back to template-based generation on LLM failure.
|
||||
"""
|
||||
gap_size = (scoring or {}).get("gap_size", 0.5)
|
||||
volume = (scoring or {}).get("volume", 0.5)
|
||||
trend = (scoring or {}).get("trend", 0.5)
|
||||
intent = (scoring or {}).get("intent", 0.5)
|
||||
competition = (scoring or {}).get("competition", 0.5)
|
||||
word_count = 800 if competition > 0.7 else 1200 if gap_size > 0.5 else 600
|
||||
|
||||
serp_context = ""
|
||||
if serp_evidence and serp_evidence.get("competitors_found"):
|
||||
snippets = [
|
||||
f"- {c.get('title','')}: {c.get('snippet','')[:100]}"
|
||||
for c in serp_evidence["competitors_found"][:3]
|
||||
]
|
||||
serp_context = "Competitor content already ranking:\n" + "\n".join(snippets)
|
||||
|
||||
sif_context = ""
|
||||
if sif_gap:
|
||||
sif_context = (
|
||||
f"SIF coverage delta: {sif_gap.get('coverage_delta', 0):.2%}, "
|
||||
f"confidence: {sif_gap.get('confidence', 0):.2%}"
|
||||
)
|
||||
|
||||
prompt = f"""You are a senior content strategist. Create a detailed content brief for the topic below.
|
||||
|
||||
TOPIC: {topic}
|
||||
RECOMMENDED ACTION: {recommended_action}
|
||||
{serp_context}
|
||||
{sif_context}
|
||||
|
||||
Scoring profile:
|
||||
- Gap size: {gap_size:.0%}
|
||||
- Search volume: {volume:.0%}
|
||||
- Trend momentum: {trend:.0%}
|
||||
- Intent score: {intent:.0%}
|
||||
- Competition level: {competition:.0%}
|
||||
- Target word count: {word_count}
|
||||
|
||||
Return a JSON object with these exact keys:
|
||||
{{
|
||||
"titles": ["Title option 1", "Title option 2", "Title option 3"],
|
||||
"outline": [
|
||||
{{"heading": "Section heading", "key_points": ["point 1", "point 2", "point 3"]}}
|
||||
],
|
||||
"keywords": ["keyword1", "keyword2", "keyword3", "keyword4", "keyword5"],
|
||||
"angle": "A single paragraph describing the strategic writing angle",
|
||||
"word_count": {word_count}
|
||||
}}
|
||||
|
||||
Generate 4-6 outline sections. Only return valid JSON, no other text."""
|
||||
|
||||
try:
|
||||
response = await self._generate_llm_response(prompt)
|
||||
import json as _json
|
||||
start = response.find("{")
|
||||
end = response.rfind("}") + 1
|
||||
if start >= 0 and end > start:
|
||||
brief = _json.loads(response[start:end])
|
||||
else:
|
||||
raise ValueError("No JSON found in LLM response")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[{self.__class__.__name__}] LLM brief generation failed, using template: {e}"
|
||||
)
|
||||
brief = {
|
||||
"titles": [
|
||||
f"The Ultimate Guide to {topic}",
|
||||
f"{topic}: Strategies That Actually Work",
|
||||
f"Why {topic} Matters More Than Ever",
|
||||
],
|
||||
"outline": [
|
||||
{"heading": f"Introduction to {topic}", "key_points": ["Context and importance", "What this guide covers"]},
|
||||
{"heading": "Why This Matters", "key_points": ["Current landscape", "Key challenges and opportunities"]},
|
||||
{"heading": "Key Strategies", "key_points": ["Strategy 1 with examples", "Strategy 2 with implementation tips", "Strategy 3 for advanced practitioners"]},
|
||||
{"heading": "Common Pitfalls to Avoid", "key_points": ["Mistake 1 and how to avoid it", "Mistake 2 and how to avoid it"]},
|
||||
{"heading": "Measuring Success", "key_points": ["Key metrics to track", "Tools and methods for measurement"]},
|
||||
{"heading": "Conclusion & Next Steps", "key_points": ["Summary of key takeaways", "Actionable next steps"]},
|
||||
],
|
||||
"keywords": [topic] + [topic.split()[-1]] if len(topic.split()) > 1 else [topic, "guide", "strategy"],
|
||||
"angle": f"Create comprehensive, actionable content about {topic} that fills the gap identified in competitor analysis. Focus on providing unique insights and practical implementation guidance.",
|
||||
"word_count": word_count,
|
||||
}
|
||||
|
||||
return {
|
||||
"topic": topic,
|
||||
"recommended_action": recommended_action,
|
||||
"brief": brief,
|
||||
"scoring": scoring,
|
||||
}
|
||||
@@ -144,25 +144,25 @@ class CompetitorResponseAgent(BaseALwrityAgent):
|
||||
proposals.append(TaskProposal(
|
||||
title="Review Competitor Content",
|
||||
description=f"SIF found {competitor_count} competitor pages. Review for gap opportunities.",
|
||||
pillar_id="create",
|
||||
pillar_id="analyze",
|
||||
priority="high",
|
||||
estimated_time=45,
|
||||
source_agent="CompetitorResponseAgent",
|
||||
reasoning="SIF-detected competitor activity presents content gap opportunities.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
action_url="/seo-dashboard"
|
||||
))
|
||||
else:
|
||||
proposals.append(TaskProposal(
|
||||
title="Research Competitor Topics",
|
||||
description="Search for competitor content in your niche to identify coverage gaps.",
|
||||
pillar_id="create",
|
||||
pillar_id="analyze",
|
||||
priority="medium",
|
||||
estimated_time=30,
|
||||
source_agent="CompetitorResponseAgent",
|
||||
reasoning="Understanding competitor positioning improves content strategy.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
action_url="/seo-dashboard"
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
"""
|
||||
Content Guardian Agent implementation.
|
||||
Content Guardian Agent — ALwrity's committee watchdog.
|
||||
Audits committee proposals, evaluates agent behaviour, flags coverage gaps,
|
||||
and alerts the user when agents need correction.
|
||||
"""
|
||||
import json
|
||||
import traceback
|
||||
import asyncio
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
@@ -8,59 +13,414 @@ from .base import SIFBaseAgent, TXTAI_AVAILABLE, Agent
|
||||
from services.intelligence.agents.core_agent_framework import TaskProposal
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
|
||||
# ── known committee agents for critique ──────────────────────────
|
||||
KNOWN_AGENTS = {
|
||||
"ContentStrategyAgent": {"label": "Content Strategy", "short": "Strategy", "pillar_focus": "plan"},
|
||||
"StrategyArchitectAgent": {"label": "Strategy Architect", "short": "Architect", "pillar_focus": "plan"},
|
||||
"SEOOptimizationAgent": {"label": "SEO Optimization", "short": "SEO", "pillar_focus": "analyze"},
|
||||
"SocialAmplificationAgent":{"label": "Social Amplification","short": "Social", "pillar_focus": "engage"},
|
||||
"CompetitorResponseAgent": {"label": "Competitor Response", "short": "Competitor", "pillar_focus": "analyze"},
|
||||
"ContentGapRadarAgent": {"label": "Content Gap Radar", "short": "Gap Radar", "pillar_focus": "generate"},
|
||||
}
|
||||
|
||||
PILLAR_IDS = {"plan", "generate", "publish", "analyze", "engage", "remarket"}
|
||||
COMMITTEE_CYCLE_WINDOW_DAYS = 30
|
||||
|
||||
|
||||
class ContentGuardianAgent(SIFBaseAgent):
|
||||
"""Agent for monitoring brand consistency and quality."""
|
||||
"""Committee watchdog — audits proposals, critiques agents, flags faults, alerts users."""
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
|
||||
# Pass kwargs to superclass to handle 'task' and other framework arguments
|
||||
CANNIBALIZATION_THRESHOLD = 0.85
|
||||
ORIGINALITY_THRESHOLD = 0.75
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None, **kwargs):
|
||||
super().__init__(intelligence_service, user_id, agent_type="content_guardian", **kwargs)
|
||||
self.sif_service = sif_service
|
||||
|
||||
# ── existing utilities ────────────────────────────────────────
|
||||
async def _create_txtai_agent(self):
|
||||
"""Create a specialized txtai Agent for content review."""
|
||||
if not TXTAI_AVAILABLE or Agent is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
_llm_for_agent = getattr(self.llm, "llm", self.llm)
|
||||
return Agent(
|
||||
tools=[
|
||||
{
|
||||
"name": "brand_voice_checker",
|
||||
"description": "Checks content against brand voice guidelines",
|
||||
"target": self._check_brand_voice
|
||||
}
|
||||
],
|
||||
llm=_llm_for_agent,
|
||||
max_iterations=3
|
||||
)
|
||||
tools=[{"name": "brand_voice_checker", "description": "Checks content against brand voice guidelines", "target": self._check_brand_voice}],
|
||||
llm=_llm_for_agent, max_iterations=3)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create txtai agent for ContentGuardian: {e}")
|
||||
raise e
|
||||
logger.error(f"Failed to create txtai agent for ContentGuardian: {e}"); raise e
|
||||
|
||||
def _check_brand_voice(self, content: str) -> Dict[str, Any]:
|
||||
"""Tool to check brand voice consistency."""
|
||||
# This would use semantic search to compare against brand guidelines
|
||||
return {
|
||||
"consistent": True,
|
||||
"score": 0.95,
|
||||
"notes": "Content aligns with professional/authoritative tone."
|
||||
}
|
||||
return {"consistent": True, "score": 0.95, "notes": "Content aligns with professional/authoritative tone."}
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""Propose quality assurance tasks."""
|
||||
proposals = []
|
||||
return [TaskProposal(title="Audit Old Content", description="Review top performing posts from >6 months ago for updates.", pillar_id="create", priority="low", estimated_time=30, source_agent="ContentGuardianAgent", reasoning="Maintains content relevance and authority.", action_type="navigate", action_url="/content-planning-dashboard")]
|
||||
|
||||
# 1. Content Freshness Audit
|
||||
proposals.append(TaskProposal(
|
||||
title="Audit Old Content",
|
||||
description="Review top performing posts from >6 months ago for updates.",
|
||||
pillar_id="create",
|
||||
priority="low",
|
||||
estimated_time=30,
|
||||
source_agent="ContentGuardianAgent",
|
||||
reasoning="Maintains content relevance and authority.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
async def perform_site_audit(self, website_url: str) -> Dict[str, Any]:
|
||||
self._log_agent_operation("Performing site audit", website_url=website_url)
|
||||
try:
|
||||
results = await self.intelligence.search(f"website content analysis {website_url}", limit=10)
|
||||
audit: Dict[str, Any] = {"website_url": website_url, "audit_timestamp": datetime.utcnow().isoformat(), "total_pages_crawled": len(results), "content_quality": None, "brand_voice_consistency": None, "safety_issues": None, "cannibalization_issues": None}
|
||||
if not results: return audit
|
||||
quality_scores, style_scores, safety_flags = [], [], []
|
||||
for result in results:
|
||||
text = result.get("text", "") or result.get("id", "")
|
||||
if len(text) < 50: continue
|
||||
quality = await self.assess_content_quality({"description": text, "title": website_url}); quality_scores.append(quality.get("score", 0.0))
|
||||
style = await self.style_enforcer(text); style_scores.append(style.get("compliance_score", 0.0))
|
||||
safety = await self.safety_filter(text)
|
||||
if not safety.get("is_safe", True): safety_flags.append(safety.get("flags", []))
|
||||
audit["content_quality"] = {"score": round(sum(quality_scores)/max(len(quality_scores),1),4), "pages_analyzed": len(quality_scores)}
|
||||
audit["brand_voice_consistency"] = {"compliance_score": round(sum(style_scores)/max(len(style_scores),1),4), "pages_checked": len(style_scores)}
|
||||
audit["safety_issues"] = {"has_issues": len(safety_flags)>0, "flagged_pages": len(safety_flags)}
|
||||
audit["cannibalization_issues"] = await self.check_cannibalization(website_url)
|
||||
return audit
|
||||
except Exception as e: logger.error(f"[{self.__class__.__name__}] Site audit failed: {e}"); return {"website_url": website_url, "error": str(e), "audit_timestamp": datetime.utcnow().isoformat()}
|
||||
|
||||
return proposals
|
||||
async def assess_content_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
self._log_agent_operation("Assessing content quality")
|
||||
try:
|
||||
text = website_data.get('description','') or website_data.get('title','')
|
||||
if not text: return {"score":0.5,"reason":"No content to analyze"}
|
||||
style = await self.style_enforcer(text); safety = await self.safety_filter(text)
|
||||
base = style.get('compliance_score',0.8)
|
||||
if safety.get('action')=='flag_for_review': base*=0.5
|
||||
return {"score":base,"style_analysis":style,"safety_analysis":safety,"analyzed_text_length":len(text)}
|
||||
except Exception as e: return {"score":0.0,"error":str(e)}
|
||||
|
||||
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
|
||||
self._log_agent_operation("Checking for semantic cannibalization", draft_length=len(new_draft))
|
||||
try:
|
||||
if not await self._ensure_intelligence_ready(): return {"warning":False,"error":"Service not initialized"}
|
||||
if not new_draft or len(new_draft.strip())<50: return {"warning":False,"reason":"Draft too short"}
|
||||
results = await self.intelligence.search(new_draft, limit=1)
|
||||
if not results: return {"warning":False,"uniqueness_score":1.0}
|
||||
score = results[0].get('score',0.0)
|
||||
if score > self.CANNIBALIZATION_THRESHOLD: return {"warning":True,"similar_to":results[0].get('id','unknown'),"score":score,"threshold":self.CANNIBALIZATION_THRESHOLD,"recommendation":"Consider revising the draft to target a different angle or merge with existing content"}
|
||||
return {"warning":False,"uniqueness_score":1.0-score}
|
||||
except Exception as e: return {"warning":False,"error":str(e)}
|
||||
|
||||
async def verify_originality(self, text: str, competitor_index: Any) -> Dict[str, Any]:
|
||||
"""(unchanged — kept for backward compat)"""
|
||||
self._log_agent_operation("Verifying originality against competitors", text_length=len(text))
|
||||
try:
|
||||
if not text or len(text.strip())<50: return {"originality_score":0.0,"reason":"Text too short"}
|
||||
query = text.strip(); competitor_results = []; method="user_index_competitor_filter"
|
||||
if competitor_index is not None and hasattr(competitor_index,"search"):
|
||||
method="competitor_index_search"; raw=competitor_index.search(query,limit=5)
|
||||
if asyncio.iscoroutine(raw): raw=await raw
|
||||
competitor_results=raw or []
|
||||
else:
|
||||
raw=await self.intelligence.search(query,limit=10)
|
||||
for r in raw or []:
|
||||
m_raw=r.get("object"); m=m_raw if isinstance(m_raw,dict) else {}
|
||||
if not m and isinstance(m_raw,str):
|
||||
try: m=json.loads(m_raw)
|
||||
except Exception: m={}
|
||||
if "competitor" in str(m.get("type","")).lower() or "competitor" in str(m.get("source","")).lower():
|
||||
competitor_results.append(r)
|
||||
if not competitor_results: return {"originality_score":1.0,"confidence":0.6,"method":method,"notes":"No competitor overlap detected"}
|
||||
top=max(competitor_results,key=lambda i:float(i.get("score",0.0))); s=max(0.0,min(1.0,float(top.get("score",0.0))))
|
||||
os_=max(0.0,round(1.0-s,4)); c=round(min(1.0,0.55+(min(len(competitor_results),5)*0.07)),3)
|
||||
return {"originality_score":os_,"confidence":c,"method":method,"warning":os_<self.ORIGINALITY_THRESHOLD,"threshold":self.ORIGINALITY_THRESHOLD,"top_competitor_match":{"id":top.get("id"),"score":round(s,4)},"matches_evaluated":len(competitor_results)}
|
||||
except Exception as e: return {"originality_score":0.0,"error":str(e)}
|
||||
|
||||
async def style_enforcer(self, text: str, style_guidelines: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
self._log_agent_operation("Enforcing style guidelines", text_length=len(text))
|
||||
try:
|
||||
if not text: return {"compliance_score":0.0,"issues":["No text provided"]}
|
||||
if not style_guidelines and self.sif_service:
|
||||
try:
|
||||
r=await self.intelligence.search("website analysis brand voice style",limit=1)
|
||||
if r:
|
||||
m_raw=r[0].get('object'); m=json.loads(m_raw) if isinstance(m_raw,str) else (m_raw or r[0])
|
||||
if m.get('type')=='website_analysis':
|
||||
rep=m.get('full_report',{}); style_guidelines={"tone":rep.get('brand_analysis',{}).get('brand_voice','neutral'),"style_patterns":rep.get('style_patterns',{}),"writing_style":rep.get('writing_style',{})}
|
||||
except Exception: pass
|
||||
issues=[]; score=1.0
|
||||
tone=(style_guidelines or {}).get('tone','').lower()
|
||||
if 'formal' in tone or 'professional' in tone:
|
||||
found=[c for c in ["can't","won't","don't","it's"] if c in text.lower()]
|
||||
if found: issues.append(f"Found contractions in formal text: {', '.join(found[:3])}..."); score-=0.1
|
||||
sentences=text.split('.'); avg=sum(len(s.split()) for s in sentences if s)/max(1,len(sentences))
|
||||
if avg>25: issues.append("Average sentence length is too high (>25 words). Consider shortening."); score-=0.1
|
||||
return {"compliance_score":max(0.0,score),"issues":issues,"is_compliant":score>0.8,"guidelines_source":"sif_index" if not style_guidelines and self.sif_service else "provided"}
|
||||
except Exception as e: return {"error":str(e)}
|
||||
|
||||
async def safety_filter(self, text: str) -> Dict[str, Any]:
|
||||
self._log_agent_operation("Running safety filter", text_length=len(text))
|
||||
try:
|
||||
kw=["hate","kill","murder","attack","destroy","scam","fraud","steal","explicit","adult"]
|
||||
found=[k for k in kw if f" {k} " in text.lower()]
|
||||
ok=len(found)==0
|
||||
return {"is_safe":ok,"flags":found,"safety_score":1.0 if ok else 0.0,"action":"approve" if ok else "flag_for_review"}
|
||||
except Exception as e: return {"error":str(e)}
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# COMMITTEE WATCHDOG — the core audit entry point
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
async def audit_committee(self, proposals: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Audits a batch of committee proposals and returns a structured report.
|
||||
|
||||
proposals: list of dicts with at minimum:
|
||||
agent, title, pillar_id, priority, reasoning, accepted, valid
|
||||
"""
|
||||
if not proposals:
|
||||
return {
|
||||
"health_score": 0, "verdict": "No proposals received from any agent",
|
||||
"agent_critiques": [], "coverage_gaps": [], "overlaps": [],
|
||||
"alerts": []
|
||||
}
|
||||
|
||||
by_agent: Dict[str, List[Dict]] = {}
|
||||
for p in proposals:
|
||||
by_agent.setdefault(p.get("agent", "unknown"), []).append(p)
|
||||
|
||||
# 1. Critique each agent
|
||||
agent_critiques = []
|
||||
for agent_name, agent_props in sorted(by_agent.items()):
|
||||
critique = self._critique_agent(agent_name, agent_props)
|
||||
agent_critiques.append(critique)
|
||||
|
||||
# 2. Coverage check
|
||||
coverage_gaps = self._find_coverage_gaps(proposals)
|
||||
overstuffed = self._find_overstuffed_pillars(proposals)
|
||||
|
||||
# 3. Overlap detection
|
||||
overlaps = self._find_overlaps(proposals)
|
||||
|
||||
# 4. Overall health score
|
||||
health_score = self._compute_health_score(agent_critiques, coverage_gaps, overlaps)
|
||||
|
||||
# 5. Generate actionable alerts
|
||||
alerts = self._generate_alerts(agent_critiques, coverage_gaps, overlaps)
|
||||
|
||||
verdict = self._verdict_text(health_score, agent_critiques, coverage_gaps)
|
||||
|
||||
return {
|
||||
"health_score": health_score,
|
||||
"verdict": verdict,
|
||||
"agent_critiques": agent_critiques,
|
||||
"coverage_gaps": coverage_gaps,
|
||||
"overstuffed_pillars": overstuffed,
|
||||
"overlaps": overlaps,
|
||||
"alerts": alerts,
|
||||
"audit_timestamp": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
# ── agent critique ────────────────────────────────────────────
|
||||
def _critique_agent(self, agent_name: str, proposals: List[Dict]) -> Dict[str, Any]:
|
||||
info = KNOWN_AGENTS.get(agent_name, {"label": agent_name, "short": agent_name[:6], "pillar_focus": None})
|
||||
total = len(proposals)
|
||||
accepted = sum(1 for p in proposals if p.get("accepted"))
|
||||
rejected = total - accepted
|
||||
acceptance_rate = accepted / total if total > 0 else 0
|
||||
|
||||
weak_reasoning = []
|
||||
poor_priority = []
|
||||
off_pillar = []
|
||||
for p in proposals:
|
||||
# Reasoning quality
|
||||
reason = (p.get("reasoning") or "").strip()
|
||||
r_score = self._reasoning_score(reason)
|
||||
if r_score < 0.5:
|
||||
weak_reasoning.append({"title": p.get("title",""), "reasoning": reason, "score": r_score})
|
||||
|
||||
# Priority appropriateness
|
||||
pr = (p.get("priority") or "").lower()
|
||||
if info["pillar_focus"] and pr == "low" and p.get("pillar_id") == info["pillar_focus"]:
|
||||
poor_priority.append({"title": p.get("title",""), "pillar": p.get("pillar_id",""), "priority": pr,
|
||||
"note": f"Pillar '{info['pillar_focus']}' is {info['label']}'s core — low priority seems wrong"})
|
||||
|
||||
# Pillar relevance
|
||||
if info["pillar_focus"] and p.get("pillar_id") and p["pillar_id"] != info["pillar_focus"]:
|
||||
off_pillar.append({"title": p.get("title",""), "proposed_pillar": p.get("pillar_id",""),
|
||||
"expected_pillar": info["pillar_focus"],
|
||||
"note": f"'{info['label']}' proposed for '{p['pillar_id']}' pillar but typically operates in '{info['pillar_focus']}'"})
|
||||
|
||||
issues = []
|
||||
if weak_reasoning:
|
||||
issues.append({"type": "weak_reasoning", "severity": "warning", "count": len(weak_reasoning),
|
||||
"summary": f"{len(weak_reasoning)} proposal(s) with vague or empty reasoning",
|
||||
"details": weak_reasoning,
|
||||
"action_label": "Improve reasoning", "action_url": None})
|
||||
if poor_priority:
|
||||
issues.append({"type": "poor_priority", "severity": "warning", "count": len(poor_priority),
|
||||
"summary": f"{len(poor_priority)} proposal(s) under-prioritised for core pillar",
|
||||
"details": poor_priority,
|
||||
"action_label": "Review priorities", "action_url": None})
|
||||
if off_pillar:
|
||||
issues.append({"type": "off_pillar", "severity": "info", "count": len(off_pillar),
|
||||
"summary": f"{len(off_pillar)} proposal(s) outside usual pillar",
|
||||
"details": off_pillar,
|
||||
"action_label": "Review pillar assignment", "action_url": None})
|
||||
if rejected > 0:
|
||||
issues.append({"type": "rejected_proposals", "severity": "error" if acceptance_rate < 0.3 else "warning",
|
||||
"count": rejected,
|
||||
"summary": f"{rejected} proposal(s) rejected by committee" if rejected > 0 else "",
|
||||
"details": [{"title": p.get("title",""), "reason": p.get("rejected_reason","no reason")} for p in proposals if not p.get("accepted")],
|
||||
"action_label": "Review rejections", "action_url": None})
|
||||
|
||||
# Agent score (0-100)
|
||||
score = 100
|
||||
if weak_reasoning: score -= len(weak_reasoning) * 15
|
||||
if poor_priority: score -= len(poor_priority) * 10
|
||||
if acceptance_rate < 0.3: score -= 20
|
||||
if acceptance_rate == 0: score = max(0, score - 30)
|
||||
score = max(0, min(100, score))
|
||||
|
||||
health = "good" if score >= 80 else "warning" if score >= 50 else "failing"
|
||||
|
||||
return {
|
||||
"agent": agent_name,
|
||||
"label": info["label"],
|
||||
"short": info["short"],
|
||||
"score": score,
|
||||
"health": health,
|
||||
"total_proposals": total,
|
||||
"accepted": accepted,
|
||||
"rejected": rejected,
|
||||
"acceptance_rate": round(acceptance_rate, 2),
|
||||
"issues": issues,
|
||||
"summary": self._agent_summary(health, score, accepted, total, weak_reasoning, poor_priority),
|
||||
}
|
||||
|
||||
# ── reasoning quality ─────────────────────────────────────────
|
||||
def _reasoning_score(self, reasoning: str) -> float:
|
||||
if not reasoning or len(reasoning) < 10:
|
||||
return 0.0
|
||||
# Short = weak
|
||||
if len(reasoning) < 25:
|
||||
return 0.2
|
||||
if len(reasoning) < 50:
|
||||
return 0.4
|
||||
# Has specifics
|
||||
specifics = ["because", "since", "based on", "data", "metric", "trend", "observed",
|
||||
"target", "audience", "competitor", "gap", "opportunity", "improve",
|
||||
"increase", "reduce", "goal", "kpi", "score", "result"]
|
||||
found = sum(1 for s in specifics if s in reasoning.lower())
|
||||
base = min(1.0, 0.4 + found * 0.1)
|
||||
# Length bonus
|
||||
if len(reasoning) > 100:
|
||||
base = min(1.0, base + 0.15)
|
||||
return min(1.0, base)
|
||||
|
||||
# ── coverage ──────────────────────────────────────────────────
|
||||
def _find_coverage_gaps(self, proposals: List[Dict]) -> List[Dict]:
|
||||
covered = set()
|
||||
for p in proposals:
|
||||
pid = p.get("pillar_id")
|
||||
if pid and pid in PILLAR_IDS:
|
||||
covered.add(pid)
|
||||
gaps = []
|
||||
for pid in sorted(PILLAR_IDS):
|
||||
if pid not in covered:
|
||||
gaps.append({"pillar_id": pid, "severity": "warning",
|
||||
"summary": f"Pillar '{pid}' has no proposals from any agent",
|
||||
"action_label": "Add task", "action_url": None})
|
||||
return gaps
|
||||
|
||||
def _find_overstuffed_pillars(self, proposals: List[Dict]) -> List[Dict]:
|
||||
counts: Dict[str, int] = {}
|
||||
for p in proposals:
|
||||
pid = p.get("pillar_id")
|
||||
if pid and pid in PILLAR_IDS:
|
||||
counts[pid] = counts.get(pid, 0) + 1
|
||||
total = len(proposals)
|
||||
overstuffed = []
|
||||
for pid, count in sorted(counts.items()):
|
||||
if total > 0 and count / total > 0.5:
|
||||
overstuffed.append({"pillar_id": pid, "count": count, "total": total,
|
||||
"severity": "info",
|
||||
"summary": f"Pillar '{pid}' has {count}/{total} proposals ({count/total*100:.0f}%) — may be over-represented",
|
||||
"action_label": None, "action_url": None})
|
||||
return overstuffed
|
||||
|
||||
# ── overlap detection ─────────────────────────────────────────
|
||||
def _find_overlaps(self, proposals: List[Dict]) -> List[Dict]:
|
||||
overlaps = []
|
||||
by_title: Dict[str, List[Dict]] = {}
|
||||
for p in proposals:
|
||||
t = (p.get("title") or "").strip().lower()
|
||||
by_title.setdefault(t, []).append(p)
|
||||
for title, dups in by_title.items():
|
||||
if len(dups) > 1 and title:
|
||||
agents = [d.get("agent","?") for d in dups]
|
||||
overlaps.append({"title": dups[0].get("title",""), "pillar": dups[0].get("pillar_id",""),
|
||||
"agents": agents, "count": len(dups),
|
||||
"severity": "warning",
|
||||
"summary": f"{len(dups)} agents proposed '{dups[0].get('title','')}': {', '.join(agents)}",
|
||||
"action_label": "Resolve conflict", "action_url": None})
|
||||
return overlaps
|
||||
|
||||
# ── health ────────────────────────────────────────────────────
|
||||
def _compute_health_score(self, critiques: List[Dict], gaps: List[Dict], overlaps: List[Dict]) -> int:
|
||||
score = 100
|
||||
for c in critiques:
|
||||
if c["health"] == "failing": score -= 15
|
||||
elif c["health"] == "warning": score -= 8
|
||||
score -= len(gaps) * 10
|
||||
score -= len(overlaps) * 5
|
||||
return max(0, min(100, score))
|
||||
|
||||
def _verdict_text(self, health: int, critiques: List[Dict], gaps: List[Dict]) -> str:
|
||||
if health >= 90:
|
||||
return "Committee is performing well — all agents submitting quality proposals with good coverage."
|
||||
failing = [c for c in critiques if c["health"] == "failing"]
|
||||
warning = [c for c in critiques if c["health"] == "warning"]
|
||||
parts = []
|
||||
if failing:
|
||||
parts.append(f"{len(failing)} agent(s) need attention: {', '.join(c['label'] for c in failing)}")
|
||||
if warning:
|
||||
parts.append(f"{len(warning)} agent(s) showing issues: {', '.join(c['label'] for c in warning)}")
|
||||
if gaps:
|
||||
parts.append(f"Missing coverage: {', '.join(g['pillar_id'] for g in gaps)}")
|
||||
if not parts:
|
||||
parts.append("Minor issues detected — monitoring.")
|
||||
return " — ".join(parts)
|
||||
|
||||
def _agent_summary(self, health: str, score: int, accepted: int, total: int, weak: List, poor: List) -> str:
|
||||
if health == "failing":
|
||||
return f"Score {score}/100 — {accepted}/{total} accepted, {len(weak)} weak reasoning, {len(poor)} under-prioritised"
|
||||
if health == "warning":
|
||||
return f"Score {score}/100 — {accepted}/{total} accepted, {len(weak)} weak reasoning"
|
||||
return f"Score {score}/100 — {accepted}/{total} accepted"
|
||||
|
||||
# ── alerts ────────────────────────────────────────────────────
|
||||
def _generate_alerts(self, critiques: List[Dict], gaps: List[Dict], overlaps: List[Dict]) -> List[Dict]:
|
||||
alerts = []
|
||||
for c in critiques:
|
||||
if c["health"] == "failing":
|
||||
alerts.append({
|
||||
"type": "agent_failing", "severity": "error",
|
||||
"agent": c["agent"], "label": c["label"],
|
||||
"title": f"{c['label']} needs attention",
|
||||
"message": c["summary"],
|
||||
"cta_path": None,
|
||||
})
|
||||
for issue in c.get("issues", []):
|
||||
if issue["type"] == "weak_reasoning" and issue["count"] >= 3:
|
||||
alerts.append({
|
||||
"type": "weak_reasoning", "severity": "warning",
|
||||
"agent": c["agent"], "label": c["label"],
|
||||
"title": f"{c['label']}: {issue['count']} proposals with weak reasoning",
|
||||
"message": issue["summary"],
|
||||
"cta_path": None,
|
||||
})
|
||||
for g in gaps:
|
||||
alerts.append({
|
||||
"type": "coverage_gap", "severity": "warning",
|
||||
"agent": None, "label": None,
|
||||
"title": f"Coverage gap: pillar '{g['pillar_id']}'",
|
||||
"message": g["summary"],
|
||||
"cta_path": None,
|
||||
})
|
||||
for o in overlaps:
|
||||
alerts.append({
|
||||
"type": "proposal_overlap", "severity": "warning",
|
||||
"agent": None, "label": None,
|
||||
"title": f"Duplicate proposal: '{o['title']}'",
|
||||
"message": o["summary"],
|
||||
"cta_path": None,
|
||||
})
|
||||
return alerts
|
||||
|
||||
@@ -294,21 +294,95 @@ class ContentStrategyAgent(BaseALwrityAgent):
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""
|
||||
Propose strategic tasks based on content analysis.
|
||||
Propose strategic tasks based on user onboarding context.
|
||||
Derives content pillars, industry, and competitor info to
|
||||
generate personalized daily content suggestions.
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
# 1. Content Refresh
|
||||
onboarding = context.get("onboarding_data", {})
|
||||
if not isinstance(onboarding, dict):
|
||||
return proposals
|
||||
|
||||
# Extract user profile hints from onboarding data
|
||||
industry = ""
|
||||
content_pillars = []
|
||||
competitor_domains = []
|
||||
try:
|
||||
cp = onboarding.get("core_persona") or {}
|
||||
if isinstance(cp, dict):
|
||||
industry = str(cp.get("industry") or cp.get("company_type") or "")
|
||||
step2 = onboarding.get("step2_summary") or onboarding.get("industry_context") or {}
|
||||
if isinstance(step2, dict):
|
||||
content_pillars = (
|
||||
step2.get("content_pillars")
|
||||
or step2.get("topics")
|
||||
or onboarding.get("content_pillars")
|
||||
or []
|
||||
)
|
||||
cf = onboarding.get("competitor_focus") or {}
|
||||
if isinstance(cf, dict):
|
||||
competitor_domains = cf.get("top_competitor_domains") or []
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Task 1: Create content for a key pillar (generate)
|
||||
if content_pillars:
|
||||
pillar_topic = content_pillars[0] if isinstance(content_pillars[0], str) else (
|
||||
content_pillars[0].get("topic") or content_pillars[0].get("name") or "your audience"
|
||||
)
|
||||
proposals.append(TaskProposal(
|
||||
title="Refresh 'SEO Basics'",
|
||||
description="Update your SEO basics guide with 2024 trends.",
|
||||
pillar_id="create",
|
||||
title=f"Create content for '{pillar_topic}'",
|
||||
description=f"Write a blog post or social content around your {pillar_topic} content pillar.",
|
||||
pillar_id="generate",
|
||||
priority="high",
|
||||
estimated_time=45,
|
||||
source_agent="ContentStrategyAgent",
|
||||
reasoning="Declining traffic and outdated references.",
|
||||
reasoning=f"'{pillar_topic}' is a core content pillar in your strategy. Regular publishing keeps your topical authority growing.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
action_url="/blog-writer",
|
||||
context_data={"pillar_topic": pillar_topic, "industry": industry},
|
||||
))
|
||||
else:
|
||||
proposals.append(TaskProposal(
|
||||
title="Define your content pillars",
|
||||
description="Set up your core content topics to get personalized daily suggestions.",
|
||||
pillar_id="plan",
|
||||
priority="high",
|
||||
estimated_time=20,
|
||||
source_agent="ContentStrategyAgent",
|
||||
reasoning="Content pillars drive every other task in your workflow. Defining them unlocks the full agent committee.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard",
|
||||
))
|
||||
|
||||
# Task 2: Competitor content review (analyze)
|
||||
if competitor_domains:
|
||||
domain = competitor_domains[0]
|
||||
proposals.append(TaskProposal(
|
||||
title=f"Review competitor: {domain}",
|
||||
description=f"Analyze recently published content from {domain} to find gaps and opportunities.",
|
||||
pillar_id="analyze",
|
||||
priority="medium",
|
||||
estimated_time=25,
|
||||
source_agent="ContentStrategyAgent",
|
||||
reasoning=f"{domain} is your top tracked competitor. Regular reviews help you stay ahead of their content strategy moves.",
|
||||
action_type="navigate",
|
||||
action_url="/seo-dashboard",
|
||||
context_data={"competitor_domain": domain},
|
||||
))
|
||||
|
||||
# Task 3: Content audit (analyze) — always suggested
|
||||
proposals.append(TaskProposal(
|
||||
title="Quick content performance audit",
|
||||
description="Review your top 3 pieces from last month. Identify what worked and what to update.",
|
||||
pillar_id="analyze",
|
||||
priority="medium",
|
||||
estimated_time=20,
|
||||
source_agent="ContentStrategyAgent",
|
||||
reasoning="Regular audits surface declining pages that need refreshing and winning formats to double down on.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard",
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -168,25 +168,25 @@ class SEOOptimizationAgent(BaseALwrityAgent):
|
||||
proposals.append(TaskProposal(
|
||||
title="Review SEO Issues",
|
||||
description=f"SIF indexed content suggests {issues_found} areas that may need SEO attention.",
|
||||
pillar_id="distribute",
|
||||
pillar_id="analyze",
|
||||
priority="high",
|
||||
estimated_time=30,
|
||||
source_agent="SEOOptimizationAgent",
|
||||
reasoning="Addressing SEO gaps improves organic visibility.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
action_url="/seo-dashboard"
|
||||
))
|
||||
else:
|
||||
proposals.append(TaskProposal(
|
||||
title="Run SEO Audit",
|
||||
description="Perform a comprehensive SEO audit to identify optimization opportunities.",
|
||||
pillar_id="distribute",
|
||||
pillar_id="analyze",
|
||||
priority="medium",
|
||||
estimated_time=15,
|
||||
source_agent="SEOOptimizationAgent",
|
||||
reasoning="Regular audits prevent SEO degradation.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
action_url="/seo-dashboard"
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -126,21 +126,85 @@ class SocialAmplificationAgent(BaseALwrityAgent):
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""
|
||||
Propose social media tasks.
|
||||
Propose social media tasks based on user's onboarding context.
|
||||
Derives platforms and content types from user data.
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
# 1. Social Post Creation
|
||||
onboarding = context.get("onboarding_data", {})
|
||||
if not isinstance(onboarding, dict):
|
||||
return proposals
|
||||
|
||||
# Extract selected platforms from onboarding step 5
|
||||
selected_platforms = []
|
||||
try:
|
||||
step5 = onboarding.get("step5_summary") or onboarding.get("distribution_channels") or {}
|
||||
if isinstance(step5, dict):
|
||||
sp = step5.get("selected_platforms") or step5.get("platforms") or []
|
||||
selected_platforms = [p for p in sp if isinstance(p, str)]
|
||||
if not selected_platforms:
|
||||
# Fallback: check top-level keys
|
||||
for key in ("selected_platforms", "platforms", "social_platforms"):
|
||||
val = onboarding.get(key)
|
||||
if isinstance(val, list):
|
||||
selected_platforms = [p for p in val if isinstance(p, str)]
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
platform_urls = {
|
||||
"linkedin": "/linkedin-writer",
|
||||
"facebook": "/facebook-writer",
|
||||
"twitter": "/linkedin-writer", # no dedicated twitter writer, use linkedin as fallback
|
||||
"instagram": "/linkedin-writer",
|
||||
"tiktok": "/linkedin-writer",
|
||||
"youtube": "/linkedin-writer",
|
||||
}
|
||||
|
||||
target_platforms = [p for p in selected_platforms if p.lower() in platform_urls]
|
||||
if not target_platforms:
|
||||
# No known platforms configured — generic engage task
|
||||
proposals.append(TaskProposal(
|
||||
title="Create LinkedIn Thread",
|
||||
description="Summarize your latest blog post into a 5-tweet thread.",
|
||||
pillar_id="distribute",
|
||||
title="Share content on social media",
|
||||
description="Promote your latest published piece across your social channels.",
|
||||
pillar_id="engage",
|
||||
priority="medium",
|
||||
estimated_time=20,
|
||||
source_agent="SocialAmplificationAgent",
|
||||
reasoning="Repurpose existing content.",
|
||||
reasoning="Social distribution drives referral traffic and builds audience engagement.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
action_url="/linkedin-writer",
|
||||
))
|
||||
return proposals
|
||||
|
||||
platform = target_platforms[0]
|
||||
platform_label = platform.capitalize()
|
||||
proposals.append(TaskProposal(
|
||||
title=f"Share content on {platform_label}",
|
||||
description=f"Adapt and publish your latest content as a {platform_label} post to drive engagement.",
|
||||
pillar_id="engage",
|
||||
priority="medium",
|
||||
estimated_time=20,
|
||||
source_agent="SocialAmplificationAgent",
|
||||
reasoning=f"Consistent {platform_label} posting maintains audience engagement and extends content reach.",
|
||||
action_type="navigate",
|
||||
action_url=platform_urls[platform.lower()],
|
||||
context_data={"platform": platform.lower()},
|
||||
))
|
||||
|
||||
if len(target_platforms) > 1:
|
||||
platform2 = target_platforms[1]
|
||||
proposals.append(TaskProposal(
|
||||
title=f"Cross-post to {platform2.capitalize()}",
|
||||
description=f"Repurpose your latest content for your {platform2.capitalize()} audience.",
|
||||
pillar_id="engage",
|
||||
priority="low",
|
||||
estimated_time=15,
|
||||
source_agent="SocialAmplificationAgent",
|
||||
reasoning=f"Cross-posting to {platform2.capitalize()} increases reach without additional content creation cost.",
|
||||
action_type="navigate",
|
||||
action_url=platform_urls[platform2.lower()],
|
||||
context_data={"platform": platform2.lower()},
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -587,334 +587,6 @@ class StrategyArchitectAgent(SIFBaseAgent):
|
||||
|
||||
return samples
|
||||
|
||||
class ContentGuardianAgent(SIFBaseAgent):
|
||||
"""Agent for preventing cannibalization and ensuring content originality."""
|
||||
|
||||
CANNIBALIZATION_THRESHOLD = 0.85 # Similarity threshold for cannibalization warning
|
||||
ORIGINALITY_THRESHOLD = 0.75 # Minimum originality score
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None):
|
||||
super().__init__(intelligence_service, user_id, agent_type="content_guardian")
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def perform_site_audit(self, website_url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform a comprehensive content audit on the indexed website content.
|
||||
Called by the SIF indexing executor after content sync completes.
|
||||
Returns a structured audit report with quality, brand voice, and safety assessments.
|
||||
"""
|
||||
self._log_agent_operation("Performing site audit", website_url=website_url)
|
||||
try:
|
||||
# Search the user's SIF index for website content
|
||||
results = await self.intelligence.search(
|
||||
f"website content analysis {website_url}", limit=10
|
||||
)
|
||||
|
||||
audit: Dict[str, Any] = {
|
||||
"website_url": website_url,
|
||||
"audit_timestamp": datetime.utcnow().isoformat(),
|
||||
"total_pages_crawled": len(results),
|
||||
"content_quality": None,
|
||||
"brand_voice_consistency": None,
|
||||
"safety_issues": None,
|
||||
"cannibalization_issues": None,
|
||||
}
|
||||
|
||||
if not results:
|
||||
logger.warning(f"[{self.__class__.__name__}] No indexed content found for {website_url}")
|
||||
return audit
|
||||
|
||||
# Run assessments on each indexed page
|
||||
quality_scores = []
|
||||
style_scores = []
|
||||
safety_flags = []
|
||||
|
||||
for result in results:
|
||||
text = result.get("text", "") or result.get("id", "")
|
||||
if len(text) < 50:
|
||||
continue
|
||||
|
||||
quality = await self.assess_content_quality({"description": text, "title": website_url})
|
||||
quality_scores.append(quality.get("score", 0.0))
|
||||
|
||||
style = await self.style_enforcer(text)
|
||||
style_scores.append(style.get("compliance_score", 0.0))
|
||||
|
||||
safety = await self.safety_filter(text)
|
||||
if not safety.get("is_safe", True):
|
||||
safety_flags.append(safety.get("flags", []))
|
||||
|
||||
audit["content_quality"] = {
|
||||
"score": round(sum(quality_scores) / max(len(quality_scores), 1), 4),
|
||||
"pages_analyzed": len(quality_scores),
|
||||
}
|
||||
audit["brand_voice_consistency"] = {
|
||||
"compliance_score": round(sum(style_scores) / max(len(style_scores), 1), 4),
|
||||
"pages_checked": len(style_scores),
|
||||
}
|
||||
audit["safety_issues"] = {
|
||||
"has_issues": len(safety_flags) > 0,
|
||||
"flagged_pages": len(safety_flags),
|
||||
}
|
||||
|
||||
cannibalization = await self.check_cannibalization(website_url)
|
||||
audit["cannibalization_issues"] = cannibalization
|
||||
|
||||
logger.info(
|
||||
f"[{self.__class__.__name__}] Site audit complete for {website_url}: "
|
||||
f"quality={audit['content_quality']['score']}, "
|
||||
f"brand_voice={audit['brand_voice_consistency']['compliance_score']}"
|
||||
)
|
||||
return audit
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Site audit failed for {website_url}: {e}")
|
||||
return {
|
||||
"website_url": website_url,
|
||||
"error": str(e),
|
||||
"audit_timestamp": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
async def assess_content_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess overall content quality based on website data."""
|
||||
self._log_agent_operation("Assessing content quality")
|
||||
try:
|
||||
# Extract sample text or description from website_data
|
||||
text_to_analyze = website_data.get('description', '') or website_data.get('title', '')
|
||||
if not text_to_analyze:
|
||||
return {"score": 0.5, "reason": "No content to analyze"}
|
||||
|
||||
# Run style check
|
||||
style_result = await self.style_enforcer(text_to_analyze)
|
||||
|
||||
# Run safety check
|
||||
safety_result = await self.safety_filter(text_to_analyze)
|
||||
|
||||
# Calculate aggregate score
|
||||
base_score = style_result.get('compliance_score', 0.8)
|
||||
if safety_result.get('action') == 'flag_for_review':
|
||||
base_score *= 0.5
|
||||
|
||||
return {
|
||||
"score": base_score,
|
||||
"style_analysis": style_result,
|
||||
"safety_analysis": safety_result,
|
||||
"analyzed_text_length": len(text_to_analyze)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Quality assessment failed: {e}")
|
||||
return {"score": 0.0, "error": str(e)}
|
||||
|
||||
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
|
||||
"""Check if a new draft competes semantically with existing pages."""
|
||||
self._log_agent_operation("Checking for semantic cannibalization", draft_length=len(new_draft))
|
||||
|
||||
try:
|
||||
if not await self._ensure_intelligence_ready():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return {"warning": False, "error": "Service not initialized"}
|
||||
|
||||
if not new_draft or len(new_draft.strip()) < 50:
|
||||
logger.warning(f"[{self.__class__.__name__}] Draft too short for meaningful analysis")
|
||||
return {"warning": False, "reason": "Draft too short"}
|
||||
|
||||
results = await self.intelligence.search(new_draft, limit=1)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{self.__class__.__name__}] No similar content found - draft is unique")
|
||||
return {"warning": False, "uniqueness_score": 1.0}
|
||||
|
||||
top_result = results[0]
|
||||
similarity_score = top_result.get('score', 0.0)
|
||||
|
||||
logger.debug(f"[{self.__class__.__name__}] Top similarity score: {similarity_score:.4f}")
|
||||
|
||||
if similarity_score > self.CANNIBALIZATION_THRESHOLD:
|
||||
warning_data = {
|
||||
"warning": True,
|
||||
"similar_to": top_result.get('id', 'unknown'),
|
||||
"score": similarity_score,
|
||||
"threshold": self.CANNIBALIZATION_THRESHOLD,
|
||||
"recommendation": "Consider revising the draft to target a different angle or merge with existing content"
|
||||
}
|
||||
logger.warning(f"[{self.__class__.__name__}] Cannibalization detected: {warning_data}")
|
||||
return warning_data
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] No cannibalization detected. Draft is sufficiently unique.")
|
||||
return {"warning": False, "uniqueness_score": 1.0 - similarity_score}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to check cannibalization: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return {"warning": False, "error": str(e)}
|
||||
|
||||
async def verify_originality(self, text: str, competitor_index: Any) -> Dict[str, Any]:
|
||||
"""Verify originality against competitor content index."""
|
||||
self._log_agent_operation("Verifying originality against competitors", text_length=len(text))
|
||||
|
||||
try:
|
||||
if not text or len(text.strip()) < 50:
|
||||
logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
|
||||
return {"originality_score": 0.0, "reason": "Text too short"}
|
||||
|
||||
query = text.strip()
|
||||
competitor_results = []
|
||||
method = "user_index_competitor_filter"
|
||||
|
||||
if competitor_index is not None and hasattr(competitor_index, "search"):
|
||||
method = "competitor_index_search"
|
||||
raw_results = competitor_index.search(query, limit=5)
|
||||
if asyncio.iscoroutine(raw_results):
|
||||
raw_results = await raw_results
|
||||
competitor_results = raw_results or []
|
||||
else:
|
||||
raw_results = await self.intelligence.search(query, limit=10)
|
||||
for result in raw_results or []:
|
||||
metadata_raw = result.get("object")
|
||||
metadata = metadata_raw if isinstance(metadata_raw, dict) else {}
|
||||
if not metadata and isinstance(metadata_raw, str):
|
||||
try:
|
||||
metadata = json.loads(metadata_raw)
|
||||
except Exception:
|
||||
metadata = {}
|
||||
|
||||
doc_type = str((metadata or {}).get("type", "")).lower()
|
||||
source = str((metadata or {}).get("source", "")).lower()
|
||||
if "competitor" in doc_type or "competitor" in source:
|
||||
competitor_results.append(result)
|
||||
|
||||
if not competitor_results:
|
||||
return {
|
||||
"originality_score": 1.0,
|
||||
"confidence": 0.6,
|
||||
"method": method,
|
||||
"notes": "No competitor overlap detected in available index"
|
||||
}
|
||||
|
||||
top_match = max(competitor_results, key=lambda item: float(item.get("score", 0.0)))
|
||||
top_score = max(0.0, min(1.0, float(top_match.get("score", 0.0))))
|
||||
originality_score = max(0.0, round(1.0 - top_score, 4))
|
||||
confidence = round(min(1.0, 0.55 + (min(len(competitor_results), 5) * 0.07)), 3)
|
||||
warning = originality_score < self.ORIGINALITY_THRESHOLD
|
||||
|
||||
return {
|
||||
"originality_score": originality_score,
|
||||
"confidence": confidence,
|
||||
"method": method,
|
||||
"warning": warning,
|
||||
"threshold": self.ORIGINALITY_THRESHOLD,
|
||||
"top_competitor_match": {
|
||||
"id": top_match.get("id"),
|
||||
"score": round(top_score, 4)
|
||||
},
|
||||
"matches_evaluated": len(competitor_results)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to verify originality: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return {"originality_score": 0.0, "error": str(e)}
|
||||
|
||||
async def style_enforcer(self, text: str, style_guidelines: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Ensures content adheres to brand voice and style guidelines.
|
||||
"""
|
||||
self._log_agent_operation("Enforcing style guidelines", text_length=len(text))
|
||||
|
||||
try:
|
||||
if not text:
|
||||
return {"compliance_score": 0.0, "issues": ["No text provided"]}
|
||||
|
||||
# 1. Fetch Style Guidelines from SIF if not provided
|
||||
if not style_guidelines and self.sif_service:
|
||||
try:
|
||||
# Search for website analysis to get brand voice/style
|
||||
# We assume the most relevant 'website_analysis' doc contains the guidelines
|
||||
results = await self.intelligence.search("website analysis brand voice style", limit=1)
|
||||
if results:
|
||||
import json
|
||||
res = results[0]
|
||||
metadata_str = res.get('object')
|
||||
metadata = json.loads(metadata_str) if isinstance(metadata_str, str) else (metadata_str or res)
|
||||
|
||||
if metadata.get('type') == 'website_analysis':
|
||||
report = metadata.get('full_report', {})
|
||||
style_guidelines = {
|
||||
"tone": report.get('brand_analysis', {}).get('brand_voice', 'neutral'),
|
||||
"style_patterns": report.get('style_patterns', {}),
|
||||
"writing_style": report.get('writing_style', {})
|
||||
}
|
||||
logger.info(f"[{self.__class__.__name__}] Retrieved style guidelines from SIF: {style_guidelines.get('tone')}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.__class__.__name__}] Failed to retrieve style guidelines from SIF: {e}")
|
||||
|
||||
issues = []
|
||||
score = 1.0
|
||||
|
||||
# Basic Heuristic Checks (Placeholder for LLM-based style analysis)
|
||||
|
||||
# 1. Tone Check (e.g., formal vs casual)
|
||||
# If guidelines specify 'formal', check for contractions
|
||||
tone = style_guidelines.get('tone', '').lower() if style_guidelines else ''
|
||||
if 'formal' in tone or 'professional' in tone:
|
||||
contractions = ["can't", "won't", "don't", "it's"]
|
||||
found_contractions = [c for c in contractions if c in text.lower()]
|
||||
if found_contractions:
|
||||
issues.append(f"Found contractions in formal text: {', '.join(found_contractions[:3])}...")
|
||||
score -= 0.1
|
||||
|
||||
# 2. Length/Sentence Structure (simple metric)
|
||||
sentences = text.split('.')
|
||||
avg_len = sum(len(s.split()) for s in sentences if s) / max(1, len(sentences))
|
||||
if avg_len > 25:
|
||||
issues.append("Average sentence length is too high (>25 words). Consider shortening.")
|
||||
score -= 0.1
|
||||
|
||||
return {
|
||||
"compliance_score": max(0.0, score),
|
||||
"issues": issues,
|
||||
"is_compliant": score > 0.8,
|
||||
"guidelines_source": "sif_index" if not style_guidelines and self.sif_service else "provided"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Style enforcement failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def safety_filter(self, text: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Flags potentially harmful, offensive, or sensitive content.
|
||||
"""
|
||||
self._log_agent_operation("Running safety filter", text_length=len(text))
|
||||
|
||||
try:
|
||||
# Basic Keyword Blocklist (Placeholder for LLM/Safety Model)
|
||||
# In production, this should call a dedicated safety API (e.g., OpenAI Moderation, Llama Guard)
|
||||
unsafe_keywords = [
|
||||
"hate", "kill", "murder", "attack", "destroy", # Violent
|
||||
"scam", "fraud", "steal", # Illegal
|
||||
"explicit", "adult" # NSFW
|
||||
]
|
||||
|
||||
found_flags = []
|
||||
text_lower = text.lower()
|
||||
|
||||
for keyword in unsafe_keywords:
|
||||
if f" {keyword} " in text_lower: # Simple word boundary check
|
||||
found_flags.append(keyword)
|
||||
|
||||
is_safe = len(found_flags) == 0
|
||||
|
||||
return {
|
||||
"is_safe": is_safe,
|
||||
"flags": found_flags,
|
||||
"safety_score": 1.0 if is_safe else 0.0,
|
||||
"action": "approve" if is_safe else "flag_for_review"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Safety filter failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
class LinkGraphAgent(SIFBaseAgent):
|
||||
"""
|
||||
|
||||
@@ -709,6 +709,47 @@ class SIFIntegrationService:
|
||||
if themes:
|
||||
text_content += f"Augmented Themes: {', '.join(themes[:5])}. "
|
||||
|
||||
freshness = adv_insights.get('freshness', {})
|
||||
if freshness:
|
||||
text_content += (f"Content Freshness Score: {freshness.get('freshness_score', 'N/A')}. "
|
||||
f"Publishing Velocity: {freshness.get('publishing_velocity', 0)}/week. "
|
||||
f"Trend: {freshness.get('publishing_trend', 'unknown')}. "
|
||||
f"Last 30d: {freshness.get('publishing_recency', {}).get('last_30d', 0)} pages. ")
|
||||
|
||||
link_health = adv_insights.get('link_health', {})
|
||||
if link_health and 'error' not in link_health:
|
||||
text_content += (f"Internal Links: {link_health.get('internal_link_count', 0)}. "
|
||||
f"External Links: {link_health.get('external_link_count', 0)}. "
|
||||
f"Nofollow: {link_health.get('nofollow_link_count', 0)}. "
|
||||
f"Avg Links/Page: {link_health.get('avg_links_per_page', 0)}. ")
|
||||
|
||||
redirects = adv_insights.get('redirect_audit', {})
|
||||
if redirects and 'error' not in redirects:
|
||||
text_content += (f"Redirects: {redirects.get('total_redirects', 0)} total, "
|
||||
f"{redirects.get('multi_hop_chains', 0)} multi-hop. ")
|
||||
|
||||
image_seo = adv_insights.get('image_seo', {})
|
||||
if image_seo and 'error' not in image_seo:
|
||||
text_content += (f"Images: {image_seo.get('total_images', 0)} total, "
|
||||
f"Alt Coverage: {image_seo.get('alt_coverage_percentage', 0)}%. ")
|
||||
|
||||
url_struct = adv_insights.get('url_structure', {})
|
||||
if url_struct:
|
||||
text_content += (f"URL Structure: {url_struct.get('total_urls_analyzed', 0)} URLs, "
|
||||
f"Avg Depth: {url_struct.get('directory_depth', {}).get('average_depth', 0)}. "
|
||||
f"Params: {url_struct.get('parameter_usage', {}).get('percentage_with_params', 0)}%. ")
|
||||
|
||||
robots = adv_insights.get('robots_txt', {})
|
||||
if robots and robots.get('success'):
|
||||
text_content += (f"Robots.txt: {robots.get('total_directives', 0)} directives, "
|
||||
f"Compliance: {robots.get('compliance_score', 0)}/100. "
|
||||
f"Issues: {len(robots.get('issues', []))}. ")
|
||||
|
||||
budget = adv_insights.get('crawl_budget', {})
|
||||
if budget and budget.get('success'):
|
||||
text_content += (f"Crawl Budget: {budget.get('pages_crawled', 0)} crawled of {budget.get('sitemap_total_urls', 0)} URLs. "
|
||||
f"Waste: {budget.get('waste_percentage', 0)}%. "
|
||||
f"Score: {budget.get('optimization_score', 0)}. ")
|
||||
# Add Technical SEO overview
|
||||
tech_audit = dashboard_data.get('technical_seo_audit', {})
|
||||
if tech_audit:
|
||||
|
||||
@@ -17,13 +17,13 @@ from .content_generator_prompts import (
|
||||
VideoScriptGenerator
|
||||
)
|
||||
|
||||
# Import new image generation services
|
||||
# Import image generation services
|
||||
from .image_generation import (
|
||||
LinkedInImageGenerator,
|
||||
LinkedInImageEditor,
|
||||
LinkedInImageStorage
|
||||
)
|
||||
from .image_prompts import LinkedInPromptGenerator
|
||||
from .carousel import LinkedInCarouselPDFRenderer
|
||||
|
||||
__all__ = [
|
||||
# Content Generation
|
||||
@@ -42,9 +42,10 @@ __all__ = [
|
||||
|
||||
# Image Generation Services
|
||||
'LinkedInImageGenerator',
|
||||
'LinkedInImageEditor',
|
||||
'LinkedInImageStorage',
|
||||
'LinkedInPromptGenerator'
|
||||
'LinkedInPromptGenerator',
|
||||
# Carousel Rendering
|
||||
'LinkedInCarouselPDFRenderer',
|
||||
]
|
||||
|
||||
# Version information
|
||||
|
||||
3
backend/services/linkedin/carousel/__init__.py
Normal file
3
backend/services/linkedin/carousel/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .carousel_renderer import LinkedInCarouselPDFRenderer
|
||||
|
||||
__all__ = ['LinkedInCarouselPDFRenderer']
|
||||
336
backend/services/linkedin/carousel/carousel_renderer.py
Normal file
336
backend/services/linkedin/carousel/carousel_renderer.py
Normal file
@@ -0,0 +1,336 @@
|
||||
"""
|
||||
LinkedIn Carousel PDF Renderer
|
||||
|
||||
Renders text-based carousel slides into visually appealing PNG images
|
||||
and composes them into a LinkedIn-compatible PDF document (1.91:1 ratio).
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
from PIL import Image, ImageDraw, ImageFont, ImageFilter
|
||||
from reportlab.lib.pagesizes import landscape
|
||||
from reportlab.lib.units import mm
|
||||
from reportlab.platypus import SimpleDocTemplate, Image as RLImage, PageBreak
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LinkedInCarouselPDFRenderer:
|
||||
|
||||
COLOR_SCHEMES = {
|
||||
'professional': {
|
||||
'background_start': (25, 55, 109),
|
||||
'background_end': (41, 128, 185),
|
||||
'title_color': (255, 255, 255),
|
||||
'content_color': (236, 240, 241),
|
||||
'accent_color': (52, 152, 219),
|
||||
},
|
||||
'creative': {
|
||||
'background_start': (142, 68, 173),
|
||||
'background_end': (231, 76, 60),
|
||||
'title_color': (255, 255, 255),
|
||||
'content_color': (245, 245, 245),
|
||||
'accent_color': (241, 196, 15),
|
||||
},
|
||||
'industry': {
|
||||
'background_start': (39, 174, 96),
|
||||
'background_end': (44, 62, 80),
|
||||
'title_color': (255, 255, 255),
|
||||
'content_color': (236, 240, 241),
|
||||
'accent_color': (46, 204, 113),
|
||||
},
|
||||
'dark': {
|
||||
'background_start': (20, 20, 30),
|
||||
'background_end': (60, 60, 80),
|
||||
'title_color': (255, 255, 255),
|
||||
'content_color': (200, 200, 210),
|
||||
'accent_color': (100, 200, 255),
|
||||
},
|
||||
'minimal': {
|
||||
'background_start': (245, 245, 250),
|
||||
'background_end': (255, 255, 255),
|
||||
'title_color': (44, 62, 80),
|
||||
'content_color': (80, 80, 90),
|
||||
'accent_color': (52, 152, 219),
|
||||
},
|
||||
}
|
||||
|
||||
def __init__(self, output_dir: str = None):
|
||||
self.slide_width = 1200
|
||||
self.slide_height = 627
|
||||
self.slide_aspect_ratio = "1.91:1"
|
||||
self.max_file_size_bytes = 100 * 1024 * 1024
|
||||
self.max_slides = 300
|
||||
self.output_dir = output_dir or "data/media/linkedin_carousels"
|
||||
|
||||
async def render_carousel_to_pdf(
|
||||
self,
|
||||
carousel_data: Dict[str, Any],
|
||||
color_scheme: str = 'professional',
|
||||
user_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
start_time = datetime.now()
|
||||
os.makedirs(self.output_dir, exist_ok=True)
|
||||
|
||||
try:
|
||||
slides = carousel_data.get('slides', [])
|
||||
if not slides:
|
||||
return {'success': False, 'error': 'No slides to render'}
|
||||
|
||||
title = carousel_data.get('title', 'LinkedIn Carousel')
|
||||
cover_slide = carousel_data.get('cover_slide')
|
||||
cta_slide = carousel_data.get('cta_slide')
|
||||
total_slides = len(slides) + (1 if cover_slide else 0) + (1 if cta_slide else 0)
|
||||
|
||||
if total_slides > self.max_slides:
|
||||
error = f'Too many slides: {total_slides} exceeds max {self.max_slides}'
|
||||
return {'success': False, 'error': error}
|
||||
|
||||
session_id = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
image_paths = []
|
||||
|
||||
if cover_slide:
|
||||
path = self._render_slide(
|
||||
slide=cover_slide, slide_number=0, session_id=session_id,
|
||||
color_scheme=color_scheme, is_cover=True, carousel_title=title,
|
||||
)
|
||||
if path:
|
||||
image_paths.append(path)
|
||||
|
||||
for i, slide in enumerate(slides):
|
||||
path = self._render_slide(
|
||||
slide=slide, slide_number=i + 1, session_id=session_id,
|
||||
color_scheme=color_scheme, is_cover=False,
|
||||
)
|
||||
if path:
|
||||
image_paths.append(path)
|
||||
|
||||
if cta_slide:
|
||||
path = self._render_slide(
|
||||
slide=cta_slide, slide_number=len(slides) + 1, session_id=session_id,
|
||||
color_scheme=color_scheme, is_cta=True,
|
||||
)
|
||||
if path:
|
||||
image_paths.append(path)
|
||||
|
||||
if not image_paths:
|
||||
return {'success': False, 'error': 'No slide images generated'}
|
||||
|
||||
pdf_filename = f"linkedin_carousel_{session_id}.pdf"
|
||||
pdf_path = os.path.join(self.output_dir, pdf_filename)
|
||||
pdf_bytes = self._compose_pdf(image_paths, pdf_path)
|
||||
|
||||
file_size = len(pdf_bytes)
|
||||
if file_size > self.max_file_size_bytes:
|
||||
logger.warning("PDF size %.2f MB exceeds max %.2f MB",
|
||||
file_size / (1024 * 1024), self.max_file_size_bytes / (1024 * 1024))
|
||||
|
||||
generation_time = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'pdf_bytes': pdf_bytes,
|
||||
'pdf_path': pdf_path,
|
||||
'metadata': {
|
||||
'slide_count': len(image_paths),
|
||||
'generation_time': generation_time,
|
||||
'file_size': file_size,
|
||||
'file_size_mb': round(file_size / (1024 * 1024), 2),
|
||||
'dimensions': f'{self.slide_width}x{self.slide_height}',
|
||||
'aspect_ratio': self.slide_aspect_ratio,
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error rendering carousel PDF: %s", str(e))
|
||||
return {'success': False, 'error': f'Carousel PDF rendering failed: {str(e)}'}
|
||||
|
||||
def _render_slide(
|
||||
self,
|
||||
slide: Dict[str, Any],
|
||||
slide_number: int,
|
||||
session_id: str,
|
||||
color_scheme: str = 'professional',
|
||||
is_cover: bool = False,
|
||||
is_cta: bool = False,
|
||||
carousel_title: str = '',
|
||||
) -> Optional[str]:
|
||||
try:
|
||||
colors = self.COLOR_SCHEMES.get(color_scheme, self.COLOR_SCHEMES['professional'])
|
||||
|
||||
img = Image.new('RGB', (self.slide_width, self.slide_height))
|
||||
draw = ImageDraw.Draw(img)
|
||||
|
||||
self._draw_gradient(draw, colors)
|
||||
|
||||
draw.rectangle([0, self.slide_height - 6, self.slide_width, self.slide_height], fill=colors['accent_color'])
|
||||
|
||||
if is_cover:
|
||||
self._draw_centered_text(draw, carousel_title or slide.get('title', ''),
|
||||
(self.slide_width // 2, 180), colors['title_color'],
|
||||
font_size=42, max_width=self.slide_width - 160)
|
||||
|
||||
subtitle = slide.get('content', '')
|
||||
if subtitle:
|
||||
self._draw_centered_text(draw, subtitle,
|
||||
(self.slide_width // 2, 320), colors['content_color'],
|
||||
font_size=24, max_width=self.slide_width - 200, max_lines=3)
|
||||
|
||||
self._draw_centered_text(draw, "Swipe to explore →",
|
||||
(self.slide_width // 2, 480), colors['accent_color'],
|
||||
font_size=18)
|
||||
elif is_cta:
|
||||
self._draw_text(draw, slide.get('title', ''), (60, 160), colors['title_color'],
|
||||
font_size=36, max_width=self.slide_width - 120, max_lines=2)
|
||||
|
||||
content = slide.get('content', '')
|
||||
if content:
|
||||
self._draw_text(draw, content, (60, 260), colors['content_color'],
|
||||
font_size=22, max_width=self.slide_width - 120, max_lines=6)
|
||||
|
||||
btn_x, btn_y = self.slide_width // 2 - 200, 440
|
||||
draw.rounded_rectangle([btn_x, btn_y, btn_x + 400, btn_y + 55], radius=27, fill=colors['accent_color'])
|
||||
self._draw_centered_text(draw, "Share Your Thoughts →",
|
||||
(self.slide_width // 2, btn_y + 27), (255, 255, 255), font_size=22)
|
||||
else:
|
||||
self._draw_text(draw, str(slide_number),
|
||||
(self.slide_width - 50, 20), colors['accent_color'], font_size=16)
|
||||
|
||||
title = slide.get('title', '')
|
||||
if title:
|
||||
self._draw_text(draw, title, (60, 50), colors['title_color'],
|
||||
font_size=30, max_width=self.slide_width - 120, max_lines=2)
|
||||
|
||||
content = slide.get('content', '')
|
||||
if content:
|
||||
self._draw_text(draw, content, (60, 145), colors['content_color'],
|
||||
font_size=20, max_width=self.slide_width - 120, max_lines=10)
|
||||
|
||||
visual_elements = slide.get('visual_elements', [])
|
||||
if visual_elements:
|
||||
self._draw_visual_elements(draw, visual_elements, colors)
|
||||
|
||||
filename = f"slide_{session_id}_{slide_number:03d}.png"
|
||||
filepath = os.path.join(self.output_dir, filename)
|
||||
img.save(filepath, 'PNG', optimize=True)
|
||||
return filepath
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error rendering slide %d: %s", slide_number, str(e))
|
||||
return None
|
||||
|
||||
def _draw_gradient(self, draw: ImageDraw.Draw, colors: Dict):
|
||||
sr, sg, sb = colors['background_start']
|
||||
er, eg, eb = colors['background_end']
|
||||
for y in range(self.slide_height):
|
||||
t = y / self.slide_height
|
||||
draw.line([(0, y), (self.slide_width, y)],
|
||||
fill=(int(sr + (er - sr) * t), int(sg + (eg - sg) * t), int(sb + (eb - sb) * t)))
|
||||
|
||||
def _draw_text(self, draw: ImageDraw.Draw, text: str, position: tuple, color: tuple,
|
||||
font_size: int = 20, max_width: int = None, max_lines: int = None, bold: bool = False):
|
||||
font = self._get_font(font_size, bold)
|
||||
x, y = position
|
||||
|
||||
words = text.split()
|
||||
lines = []
|
||||
current_line = ""
|
||||
for word in words:
|
||||
test_line = f"{current_line} {word}".strip()
|
||||
bb = draw.textbbox((0, 0), test_line, font=font)
|
||||
tw = bb[2] - bb[0]
|
||||
if max_width and tw > max_width and current_line:
|
||||
lines.append(current_line)
|
||||
if max_lines and len(lines) >= max_lines:
|
||||
lines[-1] = lines[-1][:-3] + "..."
|
||||
break
|
||||
current_line = word
|
||||
else:
|
||||
current_line = test_line
|
||||
if current_line and (not max_lines or len(lines) < max_lines):
|
||||
lines.append(current_line)
|
||||
|
||||
line_height = int(font_size * 1.4)
|
||||
for i, line in enumerate(lines):
|
||||
draw.text((x, y + i * line_height), line, fill=color, font=font)
|
||||
|
||||
def _draw_centered_text(self, draw: ImageDraw.Draw, text: str, center: tuple, color: tuple,
|
||||
font_size: int = 20, max_width: int = None, max_lines: int = None, bold: bool = False):
|
||||
font = self._get_font(font_size, bold)
|
||||
cx, cy = center
|
||||
|
||||
words = text.split()
|
||||
lines = []
|
||||
current_line = ""
|
||||
for word in words:
|
||||
test_line = f"{current_line} {word}".strip()
|
||||
bb = draw.textbbox((0, 0), test_line, font=font)
|
||||
tw = bb[2] - bb[0]
|
||||
if max_width and tw > max_width and current_line:
|
||||
lines.append(current_line)
|
||||
if max_lines and len(lines) >= max_lines:
|
||||
lines[-1] = lines[-1][:-3] + "..."
|
||||
break
|
||||
current_line = word
|
||||
else:
|
||||
current_line = test_line
|
||||
if current_line and (not max_lines or len(lines) < max_lines):
|
||||
lines.append(current_line)
|
||||
|
||||
line_height = int(font_size * 1.4)
|
||||
total_height = len(lines) * line_height
|
||||
start_y = cy - total_height // 2
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
bb = draw.textbbox((0, 0), line, font=font)
|
||||
tw = bb[2] - bb[0]
|
||||
x = cx - tw // 2
|
||||
draw.text((x, start_y + i * line_height), line, fill=color, font=font)
|
||||
|
||||
def _draw_visual_elements(self, draw: ImageDraw.Draw, elements: List[str], colors: Dict):
|
||||
y_start = self.slide_height - 60
|
||||
x_start = 60
|
||||
for i, element in enumerate(elements[:4]):
|
||||
cx = x_start + i * 280
|
||||
draw.ellipse([cx, y_start, cx + 12, y_start + 12], fill=colors['accent_color'])
|
||||
font = self._get_font(12, False)
|
||||
draw.text((cx + 20, y_start - 2), element[:25], fill=colors['content_color'], font=font)
|
||||
|
||||
def _get_font(self, size: int, bold: bool = False):
|
||||
try:
|
||||
return ImageFont.truetype("arialbd.ttf" if bold else "arial.ttf", size)
|
||||
except (IOError, OSError):
|
||||
try:
|
||||
return ImageFont.truetype("DejaVuSans-Bold.ttf" if bold else "DejaVuSans.ttf", size)
|
||||
except (IOError, OSError):
|
||||
return ImageFont.load_default()
|
||||
|
||||
def _compose_pdf(self, image_paths: List[str], output_path: str) -> bytes:
|
||||
pw = self.slide_width
|
||||
ph = self.slide_height
|
||||
# Leave 1pt margin to avoid ReportLab frame size issues
|
||||
m = 1
|
||||
iw = pw - 2 * m
|
||||
ih = ph - 2 * m
|
||||
|
||||
from reportlab.platypus import BaseDocTemplate, Frame, PageTemplate
|
||||
from reportlab.lib.pagesizes import landscape
|
||||
|
||||
frame = Frame(m, m, iw, ih, id="slide_frame",
|
||||
leftPadding=0, rightPadding=0, topPadding=0, bottomPadding=0)
|
||||
template = PageTemplate(id="slide", frames=[frame], pagesize=(pw, ph))
|
||||
doc = BaseDocTemplate(output_path, pagesize=(pw, ph))
|
||||
doc.addPageTemplates([template])
|
||||
|
||||
story = []
|
||||
for i, img_path in enumerate(image_paths):
|
||||
story.append(RLImage(img_path, width=iw, height=ih))
|
||||
if i < len(image_paths) - 1:
|
||||
story.append(PageBreak())
|
||||
|
||||
doc.build(story)
|
||||
|
||||
with open(output_path, 'rb') as f:
|
||||
return f.read()
|
||||
@@ -2,6 +2,7 @@
|
||||
Content Generator for LinkedIn Content Generation
|
||||
|
||||
Handles the main content generation logic for posts and articles.
|
||||
Uses llm_text_gen for provider-agnostic LLM access (respects GPT_PROVIDER).
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
@@ -21,6 +22,7 @@ from services.linkedin.content_generator_prompts import (
|
||||
CarouselGenerator,
|
||||
VideoScriptGenerator
|
||||
)
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from services.persona_analysis_service import PersonaAnalysisService
|
||||
import time
|
||||
|
||||
@@ -28,11 +30,9 @@ import time
|
||||
class ContentGenerator:
|
||||
"""Handles content generation for all LinkedIn content types."""
|
||||
|
||||
def __init__(self, citation_manager=None, quality_analyzer=None, gemini_grounded=None, fallback_provider=None):
|
||||
def __init__(self, citation_manager=None, quality_analyzer=None):
|
||||
self.citation_manager = citation_manager
|
||||
self.quality_analyzer = quality_analyzer
|
||||
self.gemini_grounded = gemini_grounded
|
||||
self.fallback_provider = fallback_provider
|
||||
|
||||
# Persona caching
|
||||
self._persona_cache: Dict[str, Dict[str, Any]] = {}
|
||||
@@ -105,22 +105,24 @@ class ContentGenerator:
|
||||
del self._cache_timestamps[key]
|
||||
logger.info(f"Cleared persona cache for user {user_id}")
|
||||
|
||||
def _transform_gemini_sources(self, gemini_sources):
|
||||
"""Transform Gemini sources to ResearchSource format."""
|
||||
transformed_sources = []
|
||||
for source in gemini_sources:
|
||||
transformed_source = ResearchSource(
|
||||
title=source.get('title', 'Unknown Source'),
|
||||
url=source.get('url', ''),
|
||||
content=f"Source from {source.get('title', 'Unknown')}",
|
||||
relevance_score=0.8, # Default relevance score
|
||||
credibility_score=0.7, # Default credibility score
|
||||
domain_authority=0.6, # Default domain authority
|
||||
source_type=source.get('type', 'web'),
|
||||
publication_date=datetime.now().strftime('%Y-%m-%d')
|
||||
)
|
||||
transformed_sources.append(transformed_source)
|
||||
return transformed_sources
|
||||
def _build_research_context(self, research_sources: List) -> str:
|
||||
"""Build research context string from research sources for prompt injection."""
|
||||
if not research_sources:
|
||||
return ""
|
||||
|
||||
context_parts = ["\n\nRESEARCH CONTEXT (use this information to ground your content with facts and data):"]
|
||||
for i, source in enumerate(research_sources[:5], 1): # Limit to top 5 sources
|
||||
title = getattr(source, 'title', f'Source {i}')
|
||||
url = getattr(source, 'url', '')
|
||||
content = getattr(source, 'content', '')
|
||||
context_parts.append(f"\n{i}. {title}")
|
||||
if url:
|
||||
context_parts.append(f" URL: {url}")
|
||||
if content:
|
||||
context_parts.append(f" Key insight: {content[:300]}")
|
||||
|
||||
context_parts.append("\nInstructions: Use the research above to include specific data points, statistics, and factual claims in your content. Cite sources where appropriate.")
|
||||
return "\n".join(context_parts)
|
||||
|
||||
async def generate_post(
|
||||
self,
|
||||
@@ -155,21 +157,12 @@ class ContentGenerator:
|
||||
logger.info(f" - First research source: {research_sources[0] if research_sources else 'None'}")
|
||||
logger.info(f" - Research sources types: {[type(s) for s in research_sources[:3]]}")
|
||||
|
||||
# Step 3: Add citations if requested - POST METHOD
|
||||
# Step 3: Add citations if requested
|
||||
citations = []
|
||||
source_list = None
|
||||
final_research_sources = research_sources # Default to passed research_sources
|
||||
final_research_sources = research_sources
|
||||
|
||||
# Use sources and citations from content_result if available (from Gemini grounding)
|
||||
if content_result.get('citations') and content_result.get('sources'):
|
||||
logger.info(f"Using citations and sources from Gemini grounding: {len(content_result['citations'])} citations, {len(content_result['sources'])} sources")
|
||||
citations = content_result['citations']
|
||||
# Transform Gemini sources to ResearchSource format
|
||||
gemini_sources = self._transform_gemini_sources(content_result['sources'])
|
||||
source_list = self.citation_manager.generate_source_list(gemini_sources) if self.citation_manager else None
|
||||
# Use transformed sources for the response
|
||||
final_research_sources = gemini_sources
|
||||
elif request.include_citations and research_sources and self.citation_manager:
|
||||
if request.include_citations and research_sources and self.citation_manager:
|
||||
try:
|
||||
logger.info(f"Processing citations for content length: {len(content_result['content'])}")
|
||||
citations = self.citation_manager.extract_citations(content_result['content'])
|
||||
@@ -224,7 +217,7 @@ class ContentGenerator:
|
||||
data=post_content,
|
||||
research_sources=final_research_sources, # Use final_research_sources
|
||||
generation_metadata={
|
||||
'model_used': 'gemini-2.0-flash-001',
|
||||
'model_used': 'llm_text_gen',
|
||||
'generation_time': generation_time,
|
||||
'research_time': research_time,
|
||||
'grounding_enabled': grounding_enabled
|
||||
@@ -251,21 +244,12 @@ class ContentGenerator:
|
||||
try:
|
||||
start_time = datetime.now()
|
||||
|
||||
# Step 3: Add citations if requested - ARTICLE METHOD
|
||||
# Step 3: Add citations if requested
|
||||
citations = []
|
||||
source_list = None
|
||||
final_research_sources = research_sources # Default to passed research_sources
|
||||
final_research_sources = research_sources
|
||||
|
||||
# Use sources and citations from content_result if available (from Gemini grounding)
|
||||
if content_result.get('citations') and content_result.get('sources'):
|
||||
logger.info(f"Using citations and sources from Gemini grounding: {len(content_result['citations'])} citations, {len(content_result['sources'])} sources")
|
||||
citations = content_result['citations']
|
||||
# Transform Gemini sources to ResearchSource format
|
||||
gemini_sources = self._transform_gemini_sources(content_result['sources'])
|
||||
source_list = self.citation_manager.generate_source_list(gemini_sources) if self.citation_manager else None
|
||||
# Use transformed sources for the response
|
||||
final_research_sources = gemini_sources
|
||||
elif request.include_citations and research_sources and self.citation_manager:
|
||||
if request.include_citations and research_sources and self.citation_manager:
|
||||
try:
|
||||
citations = self.citation_manager.extract_citations(content_result['content'])
|
||||
source_list = self.citation_manager.generate_source_list(research_sources)
|
||||
@@ -317,7 +301,7 @@ class ContentGenerator:
|
||||
data=article_content,
|
||||
research_sources=final_research_sources, # Use final_research_sources
|
||||
generation_metadata={
|
||||
'model_used': 'gemini-2.0-flash-001',
|
||||
'model_used': 'llm_text_gen',
|
||||
'generation_time': generation_time,
|
||||
'research_time': research_time,
|
||||
'grounding_enabled': grounding_enabled
|
||||
@@ -386,7 +370,7 @@ class ContentGenerator:
|
||||
'alternative_responses': content_result.get('alternative_responses', []),
|
||||
'tone_analysis': content_result.get('tone_analysis'),
|
||||
'generation_metadata': {
|
||||
'model_used': 'gemini-2.0-flash-001',
|
||||
'model_used': 'llm_text_gen',
|
||||
'generation_time': generation_time,
|
||||
'research_time': research_time,
|
||||
'grounding_enabled': grounding_enabled
|
||||
@@ -402,19 +386,14 @@ class ContentGenerator:
|
||||
}
|
||||
|
||||
# Grounded content generation methods
|
||||
async def generate_grounded_post_content(self, request, research_sources: List) -> Dict[str, Any]:
|
||||
"""Generate grounded post content using the enhanced Gemini provider with native grounding."""
|
||||
async def generate_grounded_post_content(self, request, research_sources: List, user_id: str = None) -> Dict[str, Any]:
|
||||
"""Generate post content using provider-agnostic llm_text_gen."""
|
||||
try:
|
||||
if not self.gemini_grounded:
|
||||
logger.error("Gemini Grounded Provider not available - cannot generate content without AI provider")
|
||||
raise Exception("Gemini Grounded Provider not available - cannot generate content without AI provider")
|
||||
|
||||
# Build the prompt for grounded generation using persona if available (DB vs session override)
|
||||
user_id = int(getattr(request, "user_id", 0) or 0)
|
||||
persona_data = self._get_cached_persona_data(user_id, 'linkedin')
|
||||
# Build the prompt using persona if available
|
||||
uid = int(getattr(request, "user_id", 0) or 0)
|
||||
persona_data = self._get_cached_persona_data(uid, 'linkedin')
|
||||
if getattr(request, 'persona_override', None):
|
||||
try:
|
||||
# Merge shallowly: override core and platform adaptation parts
|
||||
override = request.persona_override
|
||||
if persona_data:
|
||||
core = persona_data.get('core_persona', {})
|
||||
@@ -431,61 +410,40 @@ class ContentGenerator:
|
||||
pass
|
||||
prompt = PostPromptBuilder.build_post_prompt(request, persona=persona_data)
|
||||
|
||||
# Generate grounded content using native Google Search grounding
|
||||
result = await self.gemini_grounded.generate_grounded_content(
|
||||
# Inject research context into prompt
|
||||
research_context = self._build_research_context(research_sources)
|
||||
if research_context:
|
||||
prompt += research_context
|
||||
|
||||
# Generate content using provider-agnostic gateway
|
||||
raw_response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
content_type="linkedin_post",
|
||||
temperature=0.7,
|
||||
max_tokens=request.max_length
|
||||
user_id=user_id,
|
||||
flow_type="linkedin_post",
|
||||
max_tokens=request.max_length,
|
||||
temperature=0.7
|
||||
)
|
||||
|
||||
return result
|
||||
content_text = raw_response if isinstance(raw_response, str) else str(raw_response or "")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating grounded post content: {str(e)}")
|
||||
logger.info("Attempting fallback to standard content generation...")
|
||||
|
||||
# Fallback to standard content generation without grounding
|
||||
try:
|
||||
if not self.fallback_provider:
|
||||
raise Exception("No fallback provider available")
|
||||
|
||||
# Build a simpler prompt for fallback generation
|
||||
prompt = PostPromptBuilder.build_post_prompt(request)
|
||||
|
||||
# Generate content using fallback provider (it's a dict with functions)
|
||||
if 'generate_text' in self.fallback_provider:
|
||||
result = await self.fallback_provider['generate_text'](
|
||||
prompt=prompt,
|
||||
temperature=0.7,
|
||||
max_tokens=request.max_length
|
||||
)
|
||||
else:
|
||||
raise Exception("Fallback provider doesn't have generate_text method")
|
||||
|
||||
# Return result in the expected format
|
||||
return {
|
||||
'content': result.get('content', '') if isinstance(result, dict) else str(result),
|
||||
'content': content_text,
|
||||
'sources': [],
|
||||
'citations': [],
|
||||
'grounding_enabled': False,
|
||||
'fallback_used': True
|
||||
'grounding_enabled': bool(research_sources),
|
||||
'fallback_used': False
|
||||
}
|
||||
|
||||
except Exception as fallback_error:
|
||||
logger.error(f"Fallback generation also failed: {str(fallback_error)}")
|
||||
raise Exception(f"Failed to generate content: {str(e)}. Fallback also failed: {str(fallback_error)}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating post content: {str(e)}")
|
||||
raise Exception(f"Failed to generate LinkedIn post: {str(e)}")
|
||||
|
||||
async def generate_grounded_article_content(self, request, research_sources: List) -> Dict[str, Any]:
|
||||
"""Generate grounded article content using the enhanced Gemini provider with native grounding."""
|
||||
async def generate_grounded_article_content(self, request, research_sources: List, user_id: str = None) -> Dict[str, Any]:
|
||||
"""Generate article content using provider-agnostic llm_text_gen."""
|
||||
try:
|
||||
if not self.gemini_grounded:
|
||||
logger.error("Gemini Grounded Provider not available - cannot generate content without AI provider")
|
||||
raise Exception("Gemini Grounded Provider not available - cannot generate content without AI provider")
|
||||
|
||||
# Build the prompt for grounded generation using persona if available (DB vs session override)
|
||||
user_id = int(getattr(request, "user_id", 0) or 0)
|
||||
persona_data = self._get_cached_persona_data(user_id, 'linkedin')
|
||||
# Build the prompt using persona if available
|
||||
uid = int(getattr(request, "user_id", 0) or 0)
|
||||
persona_data = self._get_cached_persona_data(uid, 'linkedin')
|
||||
if getattr(request, 'persona_override', None):
|
||||
try:
|
||||
override = request.persona_override
|
||||
@@ -504,88 +462,146 @@ class ContentGenerator:
|
||||
pass
|
||||
prompt = ArticlePromptBuilder.build_article_prompt(request, persona=persona_data)
|
||||
|
||||
# Generate grounded content using native Google Search grounding
|
||||
result = await self.gemini_grounded.generate_grounded_content(
|
||||
# Inject research context into prompt
|
||||
research_context = self._build_research_context(research_sources)
|
||||
if research_context:
|
||||
prompt += research_context
|
||||
|
||||
# Generate content using provider-agnostic gateway
|
||||
raw_response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
content_type="linkedin_article",
|
||||
temperature=0.7,
|
||||
max_tokens=request.word_count * 10 # Approximate character count
|
||||
user_id=user_id,
|
||||
flow_type="linkedin_article",
|
||||
max_tokens=request.word_count * 10,
|
||||
temperature=0.7
|
||||
)
|
||||
|
||||
return result
|
||||
content_text = raw_response if isinstance(raw_response, str) else str(raw_response or "")
|
||||
|
||||
# Extract title from article content (first markdown heading or first line)
|
||||
title = ""
|
||||
for line in content_text.split('\n'):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith('# '):
|
||||
title = stripped[2:].strip()
|
||||
break
|
||||
if not title:
|
||||
for line in content_text.split('\n'):
|
||||
stripped = line.strip()
|
||||
if stripped:
|
||||
title = stripped[:100].strip()
|
||||
break
|
||||
if not title:
|
||||
title = request.topic or "LinkedIn Article"
|
||||
|
||||
return {
|
||||
'content': content_text,
|
||||
'title': title,
|
||||
'sources': [],
|
||||
'citations': [],
|
||||
'grounding_enabled': bool(research_sources),
|
||||
'fallback_used': False
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating grounded article content: {str(e)}")
|
||||
raise Exception(f"Failed to generate grounded article content: {str(e)}")
|
||||
logger.error(f"Error generating article content: {str(e)}")
|
||||
raise Exception(f"Failed to generate LinkedIn article: {str(e)}")
|
||||
|
||||
async def generate_grounded_carousel_content(self, request, research_sources: List) -> Dict[str, Any]:
|
||||
"""Generate grounded carousel content using the enhanced Gemini provider with native grounding."""
|
||||
async def generate_grounded_carousel_content(self, request, research_sources: List, user_id: str = None) -> Dict[str, Any]:
|
||||
"""Generate carousel content using provider-agnostic llm_text_gen."""
|
||||
try:
|
||||
if not self.gemini_grounded:
|
||||
logger.error("Gemini Grounded Provider not available - cannot generate content without AI provider")
|
||||
raise Exception("Gemini Grounded Provider not available - cannot generate content without AI provider")
|
||||
|
||||
# Build the prompt for grounded generation using the new prompt builder
|
||||
prompt = CarouselPromptBuilder.build_carousel_prompt(request)
|
||||
|
||||
# Generate grounded content using native Google Search grounding
|
||||
result = await self.gemini_grounded.generate_grounded_content(
|
||||
# Inject research context into prompt
|
||||
research_context = self._build_research_context(research_sources)
|
||||
if research_context:
|
||||
prompt += research_context
|
||||
|
||||
# Generate content using provider-agnostic gateway
|
||||
raw_response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
content_type="linkedin_carousel",
|
||||
temperature=0.7,
|
||||
max_tokens=2000
|
||||
user_id=user_id,
|
||||
flow_type="linkedin_carousel",
|
||||
max_tokens=2000,
|
||||
temperature=0.7
|
||||
)
|
||||
|
||||
return result
|
||||
content_text = raw_response if isinstance(raw_response, str) else str(raw_response or "")
|
||||
|
||||
return {
|
||||
'content': content_text,
|
||||
'sources': [],
|
||||
'citations': [],
|
||||
'grounding_enabled': bool(research_sources),
|
||||
'fallback_used': False
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating grounded carousel content: {str(e)}")
|
||||
raise Exception(f"Failed to generate grounded carousel content: {str(e)}")
|
||||
logger.error(f"Error generating carousel content: {str(e)}")
|
||||
raise Exception(f"Failed to generate LinkedIn carousel: {str(e)}")
|
||||
|
||||
async def generate_grounded_video_script_content(self, request, research_sources: List) -> Dict[str, Any]:
|
||||
"""Generate grounded video script content using the enhanced Gemini provider with native grounding."""
|
||||
async def generate_grounded_video_script_content(self, request, research_sources: List, user_id: str = None) -> Dict[str, Any]:
|
||||
"""Generate video script content using provider-agnostic llm_text_gen."""
|
||||
try:
|
||||
if not self.gemini_grounded:
|
||||
logger.error("Gemini Grounded Provider not available - cannot generate content without AI provider")
|
||||
raise Exception("Gemini Grounded Provider not available - cannot generate content without AI provider")
|
||||
|
||||
# Build the prompt for grounded generation using the new prompt builder
|
||||
prompt = VideoScriptPromptBuilder.build_video_script_prompt(request)
|
||||
|
||||
# Generate grounded content using native Google Search grounding
|
||||
result = await self.gemini_grounded.generate_grounded_content(
|
||||
# Inject research context into prompt
|
||||
research_context = self._build_research_context(research_sources)
|
||||
if research_context:
|
||||
prompt += research_context
|
||||
|
||||
# Generate content using provider-agnostic gateway
|
||||
raw_response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
content_type="linkedin_video_script",
|
||||
temperature=0.7,
|
||||
max_tokens=1500
|
||||
user_id=user_id,
|
||||
flow_type="linkedin_video_script",
|
||||
max_tokens=1500,
|
||||
temperature=0.7
|
||||
)
|
||||
|
||||
return result
|
||||
content_text = raw_response if isinstance(raw_response, str) else str(raw_response or "")
|
||||
|
||||
return {
|
||||
'content': content_text,
|
||||
'sources': [],
|
||||
'citations': [],
|
||||
'grounding_enabled': bool(research_sources),
|
||||
'fallback_used': False
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating grounded video script content: {str(e)}")
|
||||
raise Exception(f"Failed to generate grounded video script content: {str(e)}")
|
||||
logger.error(f"Error generating video script content: {str(e)}")
|
||||
raise Exception(f"Failed to generate LinkedIn video script: {str(e)}")
|
||||
|
||||
async def generate_grounded_comment_response(self, request, research_sources: List) -> Dict[str, Any]:
|
||||
"""Generate grounded comment response using the enhanced Gemini provider with native grounding."""
|
||||
async def generate_grounded_comment_response(self, request, research_sources: List, user_id: str = None) -> Dict[str, Any]:
|
||||
"""Generate comment response using provider-agnostic llm_text_gen."""
|
||||
try:
|
||||
if not self.gemini_grounded:
|
||||
logger.error("Gemini Grounded Provider not available - cannot generate content without AI provider")
|
||||
raise Exception("Gemini Grounded Provider not available - cannot generate content without AI provider")
|
||||
|
||||
# Build the prompt for grounded generation using the new prompt builder
|
||||
prompt = CommentResponsePromptBuilder.build_comment_response_prompt(request)
|
||||
|
||||
# Generate grounded content using native Google Search grounding
|
||||
result = await self.gemini_grounded.generate_grounded_content(
|
||||
# Inject research context into prompt
|
||||
research_context = self._build_research_context(research_sources)
|
||||
if research_context:
|
||||
prompt += research_context
|
||||
|
||||
# Generate content using provider-agnostic gateway
|
||||
raw_response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
content_type="linkedin_comment_response",
|
||||
temperature=0.7,
|
||||
max_tokens=2000
|
||||
user_id=user_id,
|
||||
flow_type="linkedin_comment_response",
|
||||
max_tokens=2000,
|
||||
temperature=0.7
|
||||
)
|
||||
|
||||
return result
|
||||
content_text = raw_response if isinstance(raw_response, str) else str(raw_response or "")
|
||||
|
||||
return {
|
||||
'content': content_text,
|
||||
'sources': [],
|
||||
'citations': [],
|
||||
'grounding_enabled': bool(research_sources),
|
||||
'fallback_used': False
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating grounded comment response: {str(e)}")
|
||||
raise Exception(f"Failed to generate grounded comment response: {str(e)}")
|
||||
logger.error(f"Error generating comment response: {str(e)}")
|
||||
raise Exception(f"Failed to generate LinkedIn comment response: {str(e)}")
|
||||
|
||||
@@ -96,7 +96,7 @@ class CarouselGenerator:
|
||||
'data': carousel_content,
|
||||
'research_sources': research_sources,
|
||||
'generation_metadata': {
|
||||
'model_used': 'gemini-2.0-flash-001',
|
||||
'model_used': 'llm_text_gen',
|
||||
'generation_time': generation_time,
|
||||
'research_time': research_time,
|
||||
'grounding_enabled': grounding_enabled
|
||||
|
||||
@@ -81,7 +81,7 @@ class VideoScriptGenerator:
|
||||
'data': video_script,
|
||||
'research_sources': research_sources,
|
||||
'generation_metadata': {
|
||||
'model_used': 'gemini-2.0-flash-001',
|
||||
'model_used': 'llm_text_gen',
|
||||
'generation_time': generation_time,
|
||||
'research_time': research_time,
|
||||
'grounding_enabled': grounding_enabled
|
||||
|
||||
@@ -2,17 +2,15 @@
|
||||
LinkedIn Image Generation Package
|
||||
|
||||
This package provides AI-powered image generation capabilities for LinkedIn content
|
||||
using Google's Gemini API. It includes image generation, editing, storage, and
|
||||
management services optimized for professional business use.
|
||||
using the common llm_providers infrastructure. It includes image generation, storage,
|
||||
and management services optimized for professional business use.
|
||||
"""
|
||||
|
||||
from .linkedin_image_generator import LinkedInImageGenerator
|
||||
from .linkedin_image_editor import LinkedInImageEditor
|
||||
from .linkedin_image_storage import LinkedInImageStorage
|
||||
|
||||
__all__ = [
|
||||
'LinkedInImageGenerator',
|
||||
'LinkedInImageEditor',
|
||||
'LinkedInImageStorage'
|
||||
]
|
||||
|
||||
|
||||
@@ -1,530 +0,0 @@
|
||||
"""
|
||||
LinkedIn Image Editor Service
|
||||
|
||||
This service handles image editing capabilities for LinkedIn content using Gemini's
|
||||
conversational editing features. It provides professional image refinement and
|
||||
optimization specifically for LinkedIn use cases.
|
||||
"""
|
||||
|
||||
import os
|
||||
import base64
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime
|
||||
from PIL import Image, ImageEnhance, ImageFilter
|
||||
from io import BytesIO
|
||||
from loguru import logger
|
||||
|
||||
# Import existing infrastructure
|
||||
from ...onboarding.api_key_manager import APIKeyManager
|
||||
|
||||
|
||||
class LinkedInImageEditor:
|
||||
"""
|
||||
Handles LinkedIn image editing and refinement using Gemini's capabilities.
|
||||
|
||||
This service provides both AI-powered editing through Gemini and traditional
|
||||
image processing for LinkedIn-specific optimizations.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key_manager: Optional[APIKeyManager] = None):
|
||||
"""
|
||||
Initialize the LinkedIn Image Editor.
|
||||
|
||||
Args:
|
||||
api_key_manager: API key manager for Gemini authentication
|
||||
"""
|
||||
self.api_key_manager = api_key_manager or APIKeyManager()
|
||||
self.model = "gemini-2.5-flash-image-preview"
|
||||
|
||||
# LinkedIn-specific editing parameters
|
||||
self.enhancement_factors = {
|
||||
'brightness': 1.1, # Slightly brighter for mobile viewing
|
||||
'contrast': 1.05, # Subtle contrast enhancement
|
||||
'sharpness': 1.2, # Enhanced sharpness for clarity
|
||||
'saturation': 1.05 # Slight saturation boost
|
||||
}
|
||||
|
||||
logger.info("LinkedIn Image Editor initialized")
|
||||
|
||||
async def edit_image_conversationally(
|
||||
self,
|
||||
base_image: bytes,
|
||||
edit_prompt: str,
|
||||
content_context: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Edit image using Gemini's conversational editing capabilities.
|
||||
|
||||
Args:
|
||||
base_image: Base image data in bytes
|
||||
edit_prompt: Natural language description of desired edits
|
||||
content_context: LinkedIn content context for optimization
|
||||
|
||||
Returns:
|
||||
Dict containing edited image result and metadata
|
||||
"""
|
||||
try:
|
||||
start_time = datetime.now()
|
||||
logger.info(f"Starting conversational image editing: {edit_prompt[:100]}...")
|
||||
|
||||
# Enhance edit prompt for LinkedIn optimization
|
||||
enhanced_prompt = self._enhance_edit_prompt_for_linkedin(
|
||||
edit_prompt, content_context
|
||||
)
|
||||
|
||||
# TODO: Implement Gemini conversational editing when available
|
||||
# For now, we'll use traditional image processing based on prompt analysis
|
||||
edited_image = await self._apply_traditional_editing(
|
||||
base_image, edit_prompt, content_context
|
||||
)
|
||||
|
||||
if not edited_image.get('success'):
|
||||
return edited_image
|
||||
|
||||
generation_time = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'image_data': edited_image['image_data'],
|
||||
'metadata': {
|
||||
'edit_prompt': edit_prompt,
|
||||
'enhanced_prompt': enhanced_prompt,
|
||||
'editing_method': 'traditional_processing',
|
||||
'editing_time': generation_time,
|
||||
'content_context': content_context,
|
||||
'model_used': self.model
|
||||
},
|
||||
'linkedin_optimization': {
|
||||
'mobile_optimized': True,
|
||||
'professional_aesthetic': True,
|
||||
'brand_compliant': True,
|
||||
'engagement_optimized': True
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in conversational image editing: {str(e)}")
|
||||
return {
|
||||
'success': False,
|
||||
'error': f"Conversational editing failed: {str(e)}",
|
||||
'generation_time': (datetime.now() - start_time).total_seconds() if 'start_time' in locals() else 0
|
||||
}
|
||||
|
||||
async def apply_style_transfer(
|
||||
self,
|
||||
base_image: bytes,
|
||||
style_reference: bytes,
|
||||
content_context: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply style transfer from reference image to base image.
|
||||
|
||||
Args:
|
||||
base_image: Base image data in bytes
|
||||
style_reference: Reference image for style transfer
|
||||
content_context: LinkedIn content context
|
||||
|
||||
Returns:
|
||||
Dict containing style-transferred image result
|
||||
"""
|
||||
try:
|
||||
start_time = datetime.now()
|
||||
logger.info("Starting style transfer for LinkedIn image")
|
||||
|
||||
# TODO: Implement Gemini style transfer when available
|
||||
# For now, return placeholder implementation
|
||||
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'Style transfer not yet implemented - coming in next Gemini API update',
|
||||
'generation_time': (datetime.now() - start_time).total_seconds()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in style transfer: {str(e)}")
|
||||
return {
|
||||
'success': False,
|
||||
'error': f"Style transfer failed: {str(e)}",
|
||||
'generation_time': (datetime.now() - start_time).total_seconds() if 'start_time' in locals() else 0
|
||||
}
|
||||
|
||||
async def enhance_image_quality(
|
||||
self,
|
||||
image_data: bytes,
|
||||
enhancement_type: str = "linkedin_optimized",
|
||||
content_context: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Enhance image quality using traditional image processing.
|
||||
|
||||
Args:
|
||||
image_data: Image data in bytes
|
||||
enhancement_type: Type of enhancement to apply
|
||||
content_context: LinkedIn content context for optimization
|
||||
|
||||
Returns:
|
||||
Dict containing enhanced image result
|
||||
"""
|
||||
try:
|
||||
start_time = datetime.now()
|
||||
logger.info(f"Starting image quality enhancement: {enhancement_type}")
|
||||
|
||||
# Open image for processing
|
||||
image = Image.open(BytesIO(image_data))
|
||||
original_size = image.size
|
||||
|
||||
# Apply LinkedIn-specific enhancements
|
||||
if enhancement_type == "linkedin_optimized":
|
||||
enhanced_image = self._apply_linkedin_enhancements(image, content_context)
|
||||
elif enhancement_type == "professional":
|
||||
enhanced_image = self._apply_professional_enhancements(image)
|
||||
elif enhancement_type == "creative":
|
||||
enhanced_image = self._apply_creative_enhancements(image)
|
||||
else:
|
||||
enhanced_image = self._apply_linkedin_enhancements(image, content_context)
|
||||
|
||||
# Convert back to bytes
|
||||
output_buffer = BytesIO()
|
||||
enhanced_image.save(output_buffer, format=image.format or "PNG", optimize=True)
|
||||
enhanced_data = output_buffer.getvalue()
|
||||
|
||||
enhancement_time = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'image_data': enhanced_data,
|
||||
'metadata': {
|
||||
'enhancement_type': enhancement_type,
|
||||
'original_size': original_size,
|
||||
'enhanced_size': enhanced_image.size,
|
||||
'enhancement_time': enhancement_time,
|
||||
'content_context': content_context
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in image quality enhancement: {str(e)}")
|
||||
return {
|
||||
'success': False,
|
||||
'error': f"Quality enhancement failed: {str(e)}",
|
||||
'generation_time': (datetime.now() - start_time).total_seconds() if 'start_time' in locals() else 0
|
||||
}
|
||||
|
||||
def _enhance_edit_prompt_for_linkedin(
|
||||
self,
|
||||
edit_prompt: str,
|
||||
content_context: Dict[str, Any]
|
||||
) -> str:
|
||||
"""
|
||||
Enhance edit prompt for LinkedIn optimization.
|
||||
|
||||
Args:
|
||||
edit_prompt: Original edit prompt
|
||||
content_context: LinkedIn content context
|
||||
|
||||
Returns:
|
||||
Enhanced edit prompt
|
||||
"""
|
||||
industry = content_context.get('industry', 'business')
|
||||
content_type = content_context.get('content_type', 'post')
|
||||
|
||||
linkedin_edit_enhancements = [
|
||||
f"Maintain professional business aesthetic for {industry} industry",
|
||||
f"Ensure mobile-optimized composition for LinkedIn {content_type}",
|
||||
"Keep professional color scheme and typography",
|
||||
"Maintain brand consistency and visual hierarchy",
|
||||
"Optimize for LinkedIn feed viewing and engagement"
|
||||
]
|
||||
|
||||
enhanced_prompt = f"{edit_prompt}\n\n"
|
||||
enhanced_prompt += "\n".join(linkedin_edit_enhancements)
|
||||
|
||||
return enhanced_prompt
|
||||
|
||||
async def _apply_traditional_editing(
|
||||
self,
|
||||
base_image: bytes,
|
||||
edit_prompt: str,
|
||||
content_context: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply traditional image processing based on edit prompt analysis.
|
||||
|
||||
Args:
|
||||
base_image: Base image data in bytes
|
||||
edit_prompt: Description of desired edits
|
||||
content_context: LinkedIn content context
|
||||
|
||||
Returns:
|
||||
Dict containing edited image result
|
||||
"""
|
||||
try:
|
||||
# Open image for processing
|
||||
image = Image.open(BytesIO(base_image))
|
||||
|
||||
# Analyze edit prompt and apply appropriate processing
|
||||
edit_prompt_lower = edit_prompt.lower()
|
||||
|
||||
if any(word in edit_prompt_lower for word in ['brighter', 'light', 'lighting']):
|
||||
image = self._adjust_brightness(image, 1.2)
|
||||
logger.info("Applied brightness adjustment")
|
||||
|
||||
if any(word in edit_prompt_lower for word in ['sharper', 'sharp', 'clear']):
|
||||
image = self._apply_sharpening(image)
|
||||
logger.info("Applied sharpening")
|
||||
|
||||
if any(word in edit_prompt_lower for word in ['warmer', 'warm', 'color']):
|
||||
image = self._adjust_color_temperature(image, 'warm')
|
||||
logger.info("Applied warm color adjustment")
|
||||
|
||||
if any(word in edit_prompt_lower for word in ['professional', 'business']):
|
||||
image = self._apply_professional_enhancements(image)
|
||||
logger.info("Applied professional enhancements")
|
||||
|
||||
# Convert back to bytes
|
||||
output_buffer = BytesIO()
|
||||
image.save(output_buffer, format=image.format or "PNG", optimize=True)
|
||||
edited_data = output_buffer.getvalue()
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'image_data': edited_data
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in traditional editing: {str(e)}")
|
||||
return {
|
||||
'success': False,
|
||||
'error': f"Traditional editing failed: {str(e)}"
|
||||
}
|
||||
|
||||
def _apply_linkedin_enhancements(
|
||||
self,
|
||||
image: Image.Image,
|
||||
content_context: Optional[Dict[str, Any]] = None
|
||||
) -> Image.Image:
|
||||
"""
|
||||
Apply LinkedIn-specific image enhancements.
|
||||
|
||||
Args:
|
||||
image: PIL Image object
|
||||
content_context: LinkedIn content context
|
||||
|
||||
Returns:
|
||||
Enhanced image
|
||||
"""
|
||||
try:
|
||||
# Apply standard LinkedIn optimizations
|
||||
image = self._adjust_brightness(image, self.enhancement_factors['brightness'])
|
||||
image = self._adjust_contrast(image, self.enhancement_factors['contrast'])
|
||||
image = self._apply_sharpening(image)
|
||||
image = self._adjust_saturation(image, self.enhancement_factors['saturation'])
|
||||
|
||||
# Ensure professional appearance
|
||||
image = self._ensure_professional_appearance(image, content_context)
|
||||
|
||||
return image
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error applying LinkedIn enhancements: {str(e)}")
|
||||
return image
|
||||
|
||||
def _apply_professional_enhancements(self, image: Image.Image) -> Image.Image:
|
||||
"""
|
||||
Apply professional business aesthetic enhancements.
|
||||
|
||||
Args:
|
||||
image: PIL Image object
|
||||
|
||||
Returns:
|
||||
Enhanced image
|
||||
"""
|
||||
try:
|
||||
# Subtle enhancements for professional appearance
|
||||
image = self._adjust_brightness(image, 1.05)
|
||||
image = self._adjust_contrast(image, 1.03)
|
||||
image = self._apply_sharpening(image)
|
||||
|
||||
return image
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error applying professional enhancements: {str(e)}")
|
||||
return image
|
||||
|
||||
def _apply_creative_enhancements(self, image: Image.Image) -> Image.Image:
|
||||
"""
|
||||
Apply creative and engaging enhancements.
|
||||
|
||||
Args:
|
||||
image: PIL Image object
|
||||
|
||||
Returns:
|
||||
Enhanced image
|
||||
"""
|
||||
try:
|
||||
# More pronounced enhancements for creative appeal
|
||||
image = self._adjust_brightness(image, 1.1)
|
||||
image = self._adjust_contrast(image, 1.08)
|
||||
image = self._adjust_saturation(image, 1.1)
|
||||
image = self._apply_sharpening(image)
|
||||
|
||||
return image
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error applying creative enhancements: {str(e)}")
|
||||
return image
|
||||
|
||||
def _adjust_brightness(self, image: Image.Image, factor: float) -> Image.Image:
|
||||
"""Adjust image brightness."""
|
||||
try:
|
||||
enhancer = ImageEnhance.Brightness(image)
|
||||
return enhancer.enhance(factor)
|
||||
except Exception as e:
|
||||
logger.error(f"Error adjusting brightness: {str(e)}")
|
||||
return image
|
||||
|
||||
def _adjust_contrast(self, image: Image.Image, factor: float) -> Image.Image:
|
||||
"""Adjust image contrast."""
|
||||
try:
|
||||
enhancer = ImageEnhance.Contrast(image)
|
||||
return enhancer.enhance(factor)
|
||||
except Exception as e:
|
||||
logger.error(f"Error adjusting contrast: {str(e)}")
|
||||
return image
|
||||
|
||||
def _adjust_saturation(self, image: Image.Image, factor: float) -> Image.Image:
|
||||
"""Adjust image saturation."""
|
||||
try:
|
||||
enhancer = ImageEnhance.Color(image)
|
||||
return enhancer.enhance(factor)
|
||||
except Exception as e:
|
||||
logger.error(f"Error adjusting saturation: {str(e)}")
|
||||
return image
|
||||
|
||||
def _apply_sharpening(self, image: Image.Image) -> Image.Image:
|
||||
"""Apply image sharpening."""
|
||||
try:
|
||||
# Apply unsharp mask for professional sharpening
|
||||
return image.filter(ImageFilter.UnsharpMask(radius=1, percent=150, threshold=3))
|
||||
except Exception as e:
|
||||
logger.error(f"Error applying sharpening: {str(e)}")
|
||||
return image
|
||||
|
||||
def _adjust_color_temperature(self, image: Image.Image, temperature: str) -> Image.Image:
|
||||
"""Adjust image color temperature."""
|
||||
try:
|
||||
if temperature == 'warm':
|
||||
# Apply warm color adjustment
|
||||
enhancer = ImageEnhance.Color(image)
|
||||
image = enhancer.enhance(1.1)
|
||||
|
||||
# Slight red tint for warmth
|
||||
# This is a simplified approach - more sophisticated color grading could be implemented
|
||||
return image
|
||||
else:
|
||||
return image
|
||||
except Exception as e:
|
||||
logger.error(f"Error adjusting color temperature: {str(e)}")
|
||||
return image
|
||||
|
||||
def _ensure_professional_appearance(
|
||||
self,
|
||||
image: Image.Image,
|
||||
content_context: Optional[Dict[str, Any]] = None
|
||||
) -> Image.Image:
|
||||
"""
|
||||
Ensure image meets professional LinkedIn standards.
|
||||
|
||||
Args:
|
||||
image: PIL Image object
|
||||
content_context: LinkedIn content context
|
||||
|
||||
Returns:
|
||||
Professionally optimized image
|
||||
"""
|
||||
try:
|
||||
# Ensure minimum quality standards
|
||||
if image.mode in ('RGBA', 'LA', 'P'):
|
||||
# Convert to RGB for better compatibility
|
||||
background = Image.new('RGB', image.size, (255, 255, 255))
|
||||
if image.mode == 'P':
|
||||
image = image.convert('RGBA')
|
||||
background.paste(image, mask=image.split()[-1] if image.mode == 'RGBA' else None)
|
||||
image = background
|
||||
|
||||
# Ensure minimum resolution for LinkedIn
|
||||
min_resolution = (1024, 1024)
|
||||
if image.size[0] < min_resolution[0] or image.size[1] < min_resolution[1]:
|
||||
# Resize to minimum resolution while maintaining aspect ratio
|
||||
ratio = max(min_resolution[0] / image.size[0], min_resolution[1] / image.size[1])
|
||||
new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
|
||||
image = image.resize(new_size, Image.Resampling.LANCZOS)
|
||||
logger.info(f"Resized image to {new_size} for LinkedIn professional standards")
|
||||
|
||||
return image
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error ensuring professional appearance: {str(e)}")
|
||||
return image
|
||||
|
||||
async def get_editing_suggestions(
|
||||
self,
|
||||
image_data: bytes,
|
||||
content_context: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get AI-powered editing suggestions for LinkedIn image.
|
||||
|
||||
Args:
|
||||
image_data: Image data in bytes
|
||||
content_context: LinkedIn content context
|
||||
|
||||
Returns:
|
||||
List of editing suggestions
|
||||
"""
|
||||
try:
|
||||
# Analyze image and provide contextual suggestions
|
||||
suggestions = []
|
||||
|
||||
# Professional enhancement suggestions
|
||||
suggestions.append({
|
||||
'id': 'professional_enhancement',
|
||||
'title': 'Professional Enhancement',
|
||||
'description': 'Apply subtle professional enhancements for business appeal',
|
||||
'prompt': 'Enhance this image with professional business aesthetics',
|
||||
'priority': 'high'
|
||||
})
|
||||
|
||||
# Mobile optimization suggestions
|
||||
suggestions.append({
|
||||
'id': 'mobile_optimization',
|
||||
'title': 'Mobile Optimization',
|
||||
'description': 'Optimize for LinkedIn mobile feed viewing',
|
||||
'prompt': 'Optimize this image for mobile LinkedIn viewing',
|
||||
'priority': 'medium'
|
||||
})
|
||||
|
||||
# Industry-specific suggestions
|
||||
industry = content_context.get('industry', 'business')
|
||||
suggestions.append({
|
||||
'id': 'industry_optimization',
|
||||
'title': f'{industry.title()} Industry Optimization',
|
||||
'description': f'Apply {industry} industry-specific visual enhancements',
|
||||
'prompt': f'Enhance this image with {industry} industry aesthetics',
|
||||
'priority': 'medium'
|
||||
})
|
||||
|
||||
# Engagement optimization suggestions
|
||||
suggestions.append({
|
||||
'id': 'engagement_optimization',
|
||||
'title': 'Engagement Optimization',
|
||||
'description': 'Make this image more engaging for LinkedIn audience',
|
||||
'prompt': 'Make this image more engaging and shareable for LinkedIn',
|
||||
'priority': 'low'
|
||||
})
|
||||
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting editing suggestions: {str(e)}")
|
||||
return []
|
||||
@@ -1,8 +1,9 @@
|
||||
"""
|
||||
LinkedIn Image Generator Service
|
||||
|
||||
This service generates LinkedIn-optimized images using Google's Gemini API.
|
||||
It provides professional, business-appropriate imagery for LinkedIn content.
|
||||
This service generates LinkedIn-optimized images using the common
|
||||
llm_providers infrastructure. It provides professional, business-appropriate
|
||||
imagery for LinkedIn content.
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -17,6 +18,7 @@ from io import BytesIO
|
||||
# Import existing infrastructure
|
||||
from ...onboarding.api_key_manager import APIKeyManager
|
||||
from ...llm_providers.main_image_generation import generate_image
|
||||
from ...llm_providers.main_image_editing import edit_image as common_edit_image
|
||||
|
||||
# Set up logging
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -24,9 +26,9 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class LinkedInImageGenerator:
|
||||
"""
|
||||
Handles LinkedIn-optimized image generation using Gemini API.
|
||||
Handles LinkedIn-optimized image generation using common infrastructure.
|
||||
|
||||
This service integrates with the existing Gemini provider infrastructure
|
||||
This service integrates with the llm_providers image generation system
|
||||
and provides LinkedIn-specific image optimization, quality assurance,
|
||||
and professional business aesthetics.
|
||||
"""
|
||||
@@ -36,10 +38,9 @@ class LinkedInImageGenerator:
|
||||
Initialize the LinkedIn Image Generator.
|
||||
|
||||
Args:
|
||||
api_key_manager: API key manager for Gemini authentication
|
||||
api_key_manager: API key manager for authentication
|
||||
"""
|
||||
self.api_key_manager = api_key_manager or APIKeyManager()
|
||||
self.model = "gemini-2.5-flash-image-preview"
|
||||
self.default_aspect_ratio = "1:1" # LinkedIn post optimal ratio
|
||||
self.max_retries = 3
|
||||
|
||||
@@ -55,16 +56,18 @@ class LinkedInImageGenerator:
|
||||
prompt: str,
|
||||
content_context: Dict[str, Any],
|
||||
aspect_ratio: str = "1:1",
|
||||
style_preference: str = "professional"
|
||||
style_preference: str = "professional",
|
||||
user_id: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate LinkedIn-optimized image using Gemini API.
|
||||
Generate LinkedIn-optimized image using AI provider.
|
||||
|
||||
Args:
|
||||
prompt: User's image generation prompt
|
||||
content_context: LinkedIn content context (topic, industry, content_type)
|
||||
aspect_ratio: Image aspect ratio (1:1, 16:9, 4:3)
|
||||
aspect_ratio: Image aspect ratio (1:1, 16:9, 4:3, 1.91:1, 1:1.25)
|
||||
style_preference: Style preference (professional, creative, industry-specific)
|
||||
user_id: User ID for tenant provider resolution
|
||||
|
||||
Returns:
|
||||
Dict containing generation result, image data, and metadata
|
||||
@@ -78,8 +81,8 @@ class LinkedInImageGenerator:
|
||||
prompt, content_context, style_preference, aspect_ratio
|
||||
)
|
||||
|
||||
# Generate image using existing Gemini infrastructure
|
||||
generation_result = await self._generate_with_gemini(enhanced_prompt, aspect_ratio)
|
||||
# Generate image using tenant-aware provider selection
|
||||
generation_result = await self._generate_with_provider(enhanced_prompt, aspect_ratio, user_id)
|
||||
|
||||
if not generation_result.get('success'):
|
||||
return {
|
||||
@@ -108,7 +111,7 @@ class LinkedInImageGenerator:
|
||||
'aspect_ratio': aspect_ratio,
|
||||
'content_context': content_context,
|
||||
'generation_time': generation_time,
|
||||
'model_used': self.model,
|
||||
'model_used': generation_result.get('model'),
|
||||
'image_format': processed_image['format'],
|
||||
'image_size': processed_image['size'],
|
||||
'resolution': processed_image['resolution']
|
||||
@@ -131,17 +134,19 @@ class LinkedInImageGenerator:
|
||||
|
||||
async def edit_image(
|
||||
self,
|
||||
base_image: bytes,
|
||||
input_image_bytes: bytes,
|
||||
edit_prompt: str,
|
||||
content_context: Dict[str, Any]
|
||||
content_context: Dict[str, Any],
|
||||
user_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Edit existing image using Gemini's conversational editing capabilities.
|
||||
Edit existing image using unified image editing infrastructure.
|
||||
|
||||
Args:
|
||||
base_image: Base image data in bytes
|
||||
input_image_bytes: Input image bytes to edit
|
||||
edit_prompt: Description of desired edits
|
||||
content_context: LinkedIn content context for optimization
|
||||
user_id: User ID for tenant provider resolution and subscription checks
|
||||
|
||||
Returns:
|
||||
Dict containing edited image result and metadata
|
||||
@@ -155,18 +160,46 @@ class LinkedInImageGenerator:
|
||||
edit_prompt, content_context
|
||||
)
|
||||
|
||||
# Use Gemini's image editing capabilities
|
||||
# Note: This will be implemented when Gemini's image editing is fully available
|
||||
# For now, we'll return a placeholder implementation
|
||||
# Use unified image editing system.
|
||||
# common_edit_image() handles: provider resolution, pre-flight validation,
|
||||
# generation, and usage tracking — all via user_id.
|
||||
result = common_edit_image(
|
||||
input_image_bytes=input_image_bytes,
|
||||
prompt=enhanced_edit_prompt,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
if result and result.image_bytes:
|
||||
generation_time = (datetime.now() - start_time).total_seconds()
|
||||
logger.info(
|
||||
"LinkedIn image edited successfully via provider=%s model=%s in %.2fs",
|
||||
result.provider, result.model, generation_time,
|
||||
)
|
||||
return {
|
||||
'success': True,
|
||||
'image_data': result.image_bytes,
|
||||
'image_url': None, # not using URL-based retrieval
|
||||
'width': result.width,
|
||||
'height': result.height,
|
||||
'provider': result.provider,
|
||||
'model': result.model,
|
||||
'metadata': {
|
||||
'original_prompt': edit_prompt,
|
||||
'enhanced_prompt': enhanced_edit_prompt,
|
||||
'generation_time': generation_time,
|
||||
'content_context': content_context,
|
||||
},
|
||||
}
|
||||
else:
|
||||
logger.warning("LinkedIn image editing returned no result")
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'Image editing not yet implemented - coming in next Gemini API update',
|
||||
'generation_time': (datetime.now() - start_time).total_seconds()
|
||||
'error': 'Image editing returned no result',
|
||||
'generation_time': (datetime.now() - start_time).total_seconds(),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in LinkedIn image editing: {str(e)}")
|
||||
logger.error(f"Error in LinkedIn image editing: {str(e)}", exc_info=True)
|
||||
return {
|
||||
'success': False,
|
||||
'error': f"Image editing failed: {str(e)}",
|
||||
@@ -268,13 +301,16 @@ class LinkedInImageGenerator:
|
||||
|
||||
return enhanced_edit_prompt
|
||||
|
||||
async def _generate_with_gemini(self, prompt: str, aspect_ratio: str) -> Dict[str, Any]:
|
||||
async def _generate_with_provider(self, prompt: str, aspect_ratio: str, user_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate image using unified image generation infrastructure.
|
||||
Provider resolution, pre-flight validation, and usage tracking
|
||||
are all handled by generate_image() from main_image_generation.
|
||||
|
||||
Args:
|
||||
prompt: Enhanced prompt for image generation
|
||||
aspect_ratio: Desired aspect ratio
|
||||
user_id: User ID for tenant provider resolution and subscription checks
|
||||
|
||||
Returns:
|
||||
Generation result from image generation provider
|
||||
@@ -285,26 +321,31 @@ class LinkedInImageGenerator:
|
||||
"1:1": (1024, 1024),
|
||||
"16:9": (1920, 1080),
|
||||
"4:3": (1366, 1024),
|
||||
"9:16": (1080, 1920), # Portrait for stories
|
||||
"9:16": (1080, 1920),
|
||||
"1.91:1": (1200, 627), # LinkedIn recommended landscape
|
||||
"1:1.25": (1080, 1350), # LinkedIn recommended portrait
|
||||
}
|
||||
width, height = aspect_map.get(aspect_ratio, (1024, 1024))
|
||||
|
||||
# Use unified image generation system (defaults to provider based on GPT_PROVIDER)
|
||||
# Delegate to unified image generation system.
|
||||
# Generate_image() handles: provider resolution, pre-flight validation,
|
||||
# model auto-detection, generation, and usage tracking.
|
||||
# We do NOT pass explicit provider or model — let generate_image() resolve
|
||||
# them from tenant config and user defaults.
|
||||
result = generate_image(
|
||||
prompt=prompt,
|
||||
options={
|
||||
"provider": "gemini", # LinkedIn uses Gemini by default
|
||||
"model": self.model if hasattr(self, 'model') else None,
|
||||
"width": width,
|
||||
"height": height,
|
||||
}
|
||||
},
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
if result and result.image_bytes:
|
||||
return {
|
||||
'success': True,
|
||||
'image_data': result.image_bytes,
|
||||
'image_path': None, # No file path, using bytes directly
|
||||
'image_path': None,
|
||||
'width': result.width,
|
||||
'height': result.height,
|
||||
'provider': result.provider,
|
||||
@@ -487,6 +528,9 @@ class LinkedInImageGenerator:
|
||||
(1.6, 1.8), # 16:9 (landscape)
|
||||
(0.7, 0.8), # 4:3 (portrait)
|
||||
(1.2, 1.4), # 5:4 (landscape)
|
||||
(1.85, 2.0), # 1.91:1 (LinkedIn recommended landscape)
|
||||
(0.6, 0.72), # 1:1.25 (LinkedIn recommended portrait, ~0.8)
|
||||
(0.65, 0.85), # 1:1.25 broader match
|
||||
]
|
||||
|
||||
for min_ratio, max_ratio in suitable_ratios:
|
||||
|
||||
@@ -6,8 +6,10 @@ It provides secure storage, efficient retrieval, and metadata management for gen
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import hashlib
|
||||
import json
|
||||
import shutil
|
||||
from typing import Dict, Any, Optional, List, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
@@ -58,6 +60,8 @@ class LinkedInImageStorage:
|
||||
self.max_storage_size_gb = 10 # Maximum storage size in GB
|
||||
self.image_retention_days = 30 # Days to keep images
|
||||
self.max_image_size_mb = 10 # Maximum individual image size in MB
|
||||
self.max_images_per_user = 100 # Maximum images per user
|
||||
self._uuid_pattern = re.compile(r'^[a-f0-9]{16}$')
|
||||
|
||||
logger.info(f"LinkedIn Image Storage initialized at {self.base_storage_path}")
|
||||
|
||||
@@ -102,6 +106,22 @@ class LinkedInImageStorage:
|
||||
try:
|
||||
start_time = datetime.now()
|
||||
|
||||
# Check per-user storage quota
|
||||
if user_id:
|
||||
user_count = await self._count_user_images(user_id)
|
||||
if user_count >= self.max_images_per_user:
|
||||
return {
|
||||
'success': False,
|
||||
'error': f"User image limit ({self.max_images_per_user}) reached. Delete existing images or increase limit."
|
||||
}
|
||||
|
||||
# Check disk space
|
||||
if not await self._check_disk_space(len(image_data)):
|
||||
return {
|
||||
'success': False,
|
||||
'error': "Insufficient disk space for image storage."
|
||||
}
|
||||
|
||||
# Generate unique image ID
|
||||
image_id = self._generate_image_id(image_data, metadata)
|
||||
|
||||
@@ -170,6 +190,9 @@ class LinkedInImageStorage:
|
||||
Dict containing image data and metadata
|
||||
"""
|
||||
try:
|
||||
if not self._validate_image_id(image_id):
|
||||
return {'success': False, 'error': f'Invalid image ID format: {image_id}'}
|
||||
|
||||
# Find image file
|
||||
image_path = await self._find_image_by_id(image_id, user_id)
|
||||
if not image_path:
|
||||
@@ -216,6 +239,9 @@ class LinkedInImageStorage:
|
||||
Dict containing deletion result
|
||||
"""
|
||||
try:
|
||||
if not self._validate_image_id(image_id):
|
||||
return {'success': False, 'error': f'Invalid image ID format: {image_id}'}
|
||||
|
||||
# Find image file
|
||||
image_path = await self._find_image_by_id(image_id, user_id)
|
||||
if not image_path:
|
||||
@@ -418,6 +444,32 @@ class LinkedInImageStorage:
|
||||
'error': f"Failed to get storage stats: {str(e)}"
|
||||
}
|
||||
|
||||
def _validate_image_id(self, image_id: str) -> bool:
|
||||
"""Validate image_id against expected format to prevent path traversal."""
|
||||
return bool(self._uuid_pattern.match(image_id))
|
||||
|
||||
async def _count_user_images(self, user_id: str) -> int:
|
||||
"""Count total images stored for a given user."""
|
||||
try:
|
||||
images_path, _ = self._get_workspace_paths(user_id)
|
||||
count = 0
|
||||
if images_path.exists():
|
||||
for content_dir in images_path.iterdir():
|
||||
if content_dir.is_dir():
|
||||
count += sum(1 for f in content_dir.glob("*.png") if f.is_file())
|
||||
return count
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting images for user {user_id}: {e}")
|
||||
return 0
|
||||
|
||||
async def _check_disk_space(self, required_bytes: int) -> bool:
|
||||
"""Check if sufficient disk space is available."""
|
||||
try:
|
||||
usage = shutil.disk_usage(self.base_storage_path)
|
||||
return usage.free > required_bytes * 2 # require 2x headroom
|
||||
except Exception:
|
||||
return True # if we can't check, allow the write
|
||||
|
||||
def _generate_image_id(self, image_data: bytes, metadata: Dict[str, Any]) -> str:
|
||||
"""Generate unique image ID based on content and metadata."""
|
||||
# Create hash from image data and key metadata
|
||||
@@ -569,6 +621,9 @@ class LinkedInImageStorage:
|
||||
Returns:
|
||||
Dict containing image metadata if found
|
||||
"""
|
||||
if not self._validate_image_id(image_id):
|
||||
logger.warning(f"Invalid image ID format in metadata request: {image_id}")
|
||||
return None
|
||||
return await self._load_metadata(image_id, user_id)
|
||||
|
||||
async def _load_metadata(self, image_id: str, user_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
LinkedIn Image Prompts Package
|
||||
|
||||
This package provides AI-powered image prompt generation for LinkedIn content
|
||||
using Google's Gemini API. It creates three distinct prompt styles optimized
|
||||
for professional business image generation.
|
||||
using the provider-agnostic llm_text_gen gateway. It creates three distinct
|
||||
prompt styles optimized for professional business image generation.
|
||||
"""
|
||||
|
||||
from .linkedin_prompt_generator import LinkedInPromptGenerator
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
"""
|
||||
LinkedIn Image Prompt Generator Service
|
||||
|
||||
This service generates AI-optimized image prompts for LinkedIn content using Gemini's
|
||||
capabilities. It creates three distinct prompt styles (professional, creative, industry-specific)
|
||||
following best practices for image generation.
|
||||
This service generates AI-optimized image prompts for LinkedIn content using
|
||||
the provider-agnostic llm_text_gen gateway. It creates three distinct prompt
|
||||
styles (professional, creative, industry-specific) following best practices
|
||||
for image generation.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
@@ -13,14 +14,14 @@ from loguru import logger
|
||||
|
||||
# Import existing infrastructure
|
||||
from ...onboarding.api_key_manager import APIKeyManager
|
||||
from ...llm_providers.gemini_provider import gemini_text_response
|
||||
from ...llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
class LinkedInPromptGenerator:
|
||||
"""
|
||||
Generates AI-optimized image prompts for LinkedIn content.
|
||||
|
||||
This service creates three distinct prompt styles following Gemini API best practices:
|
||||
This service creates three distinct prompt styles following best practices:
|
||||
1. Professional Style - Corporate aesthetics, clean lines, business colors
|
||||
2. Creative Style - Engaging visuals, vibrant colors, social media appeal
|
||||
3. Industry-Specific Style - Tailored to specific business sectors
|
||||
@@ -31,10 +32,9 @@ class LinkedInPromptGenerator:
|
||||
Initialize the LinkedIn Prompt Generator.
|
||||
|
||||
Args:
|
||||
api_key_manager: API key manager for Gemini authentication
|
||||
api_key_manager: API key manager for authentication
|
||||
"""
|
||||
self.api_key_manager = api_key_manager or APIKeyManager()
|
||||
self.model = "gemini-2.0-flash-exp"
|
||||
|
||||
# Prompt generation configuration
|
||||
self.max_prompt_length = 500
|
||||
@@ -49,7 +49,8 @@ class LinkedInPromptGenerator:
|
||||
async def generate_three_prompts(
|
||||
self,
|
||||
linkedin_content: Dict[str, Any],
|
||||
aspect_ratio: str = "1:1"
|
||||
aspect_ratio: str = "1:1",
|
||||
user_id: str = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate three AI-optimized image prompts for LinkedIn content.
|
||||
@@ -57,6 +58,7 @@ class LinkedInPromptGenerator:
|
||||
Args:
|
||||
linkedin_content: LinkedIn content context (topic, industry, content_type, content)
|
||||
aspect_ratio: Desired image aspect ratio
|
||||
user_id: User ID for subscription checking
|
||||
|
||||
Returns:
|
||||
List of three prompt objects with style, prompt, and description
|
||||
@@ -65,11 +67,11 @@ class LinkedInPromptGenerator:
|
||||
start_time = datetime.now()
|
||||
logger.info(f"Generating image prompts for LinkedIn content: {linkedin_content.get('topic', 'Unknown')}")
|
||||
|
||||
# Generate prompts using Gemini
|
||||
prompts = await self._generate_prompts_with_gemini(linkedin_content, aspect_ratio)
|
||||
# Generate prompts using provider-agnostic gateway
|
||||
prompts = await self._generate_prompts_with_llm(linkedin_content, aspect_ratio, user_id)
|
||||
|
||||
if not prompts or len(prompts) < 3:
|
||||
logger.warning("Gemini prompt generation failed, using fallback prompts")
|
||||
logger.warning("Prompt generation failed, using fallback prompts")
|
||||
prompts = self._get_fallback_prompts(linkedin_content, aspect_ratio)
|
||||
|
||||
# Ensure exactly 3 prompts
|
||||
@@ -92,62 +94,65 @@ class LinkedInPromptGenerator:
|
||||
logger.error(f"Error generating LinkedIn image prompts: {str(e)}")
|
||||
return self._get_fallback_prompts(linkedin_content, aspect_ratio)
|
||||
|
||||
async def _generate_prompts_with_gemini(
|
||||
async def _generate_prompts_with_llm(
|
||||
self,
|
||||
linkedin_content: Dict[str, Any],
|
||||
aspect_ratio: str
|
||||
aspect_ratio: str,
|
||||
user_id: str = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate image prompts using Gemini AI.
|
||||
Generate image prompts using provider-agnostic llm_text_gen.
|
||||
|
||||
Args:
|
||||
linkedin_content: LinkedIn content context
|
||||
aspect_ratio: Image aspect ratio
|
||||
user_id: User ID for subscription checking
|
||||
|
||||
Returns:
|
||||
List of generated prompts
|
||||
"""
|
||||
try:
|
||||
# Build the prompt for Gemini
|
||||
gemini_prompt = self._build_gemini_prompt(linkedin_content, aspect_ratio)
|
||||
# Build the prompt
|
||||
prompt = self._build_image_prompt(linkedin_content, aspect_ratio)
|
||||
|
||||
# Generate response using Gemini
|
||||
response = gemini_text_response(
|
||||
prompt=gemini_prompt,
|
||||
temperature=0.7,
|
||||
top_p=0.8,
|
||||
n=1,
|
||||
# Generate response using provider-agnostic gateway
|
||||
response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an expert AI image prompt engineer specializing in LinkedIn content optimization.",
|
||||
user_id=user_id,
|
||||
flow_type="linkedin_image_prompts",
|
||||
max_tokens=1000,
|
||||
system_prompt="You are an expert AI image prompt engineer specializing in LinkedIn content optimization."
|
||||
temperature=0.7
|
||||
)
|
||||
|
||||
if not response:
|
||||
logger.warning("No response from Gemini prompt generation")
|
||||
logger.warning("No response from prompt generation")
|
||||
return []
|
||||
|
||||
# Parse Gemini response into structured prompts
|
||||
prompts = self._parse_gemini_response(response, linkedin_content)
|
||||
# Parse response into structured prompts
|
||||
response_text = response if isinstance(response, str) else str(response or "")
|
||||
prompts = self._parse_llm_response(response_text, linkedin_content)
|
||||
|
||||
return prompts
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Gemini prompt generation: {str(e)}")
|
||||
logger.error(f"Error in prompt generation: {str(e)}")
|
||||
return []
|
||||
|
||||
def _build_gemini_prompt(
|
||||
def _build_image_prompt(
|
||||
self,
|
||||
linkedin_content: Dict[str, Any],
|
||||
aspect_ratio: str
|
||||
) -> str:
|
||||
"""
|
||||
Build comprehensive prompt for Gemini to generate image prompts.
|
||||
Build comprehensive prompt for LLM to generate image prompts.
|
||||
|
||||
Args:
|
||||
linkedin_content: LinkedIn content context
|
||||
aspect_ratio: Image aspect ratio
|
||||
|
||||
Returns:
|
||||
Formatted prompt for Gemini
|
||||
Formatted prompt for LLM
|
||||
"""
|
||||
topic = linkedin_content.get('topic', 'business')
|
||||
industry = linkedin_content.get('industry', 'business')
|
||||
@@ -428,16 +433,16 @@ class LinkedInPromptGenerator:
|
||||
else:
|
||||
return 'Informational & Awareness'
|
||||
|
||||
def _parse_gemini_response(
|
||||
def _parse_llm_response(
|
||||
self,
|
||||
response: str,
|
||||
linkedin_content: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Parse Gemini response into structured prompt objects.
|
||||
Parse LLM response into structured prompt objects.
|
||||
|
||||
Args:
|
||||
response: Raw response from Gemini
|
||||
response: Raw response from LLM
|
||||
linkedin_content: LinkedIn content context
|
||||
|
||||
Returns:
|
||||
@@ -462,7 +467,7 @@ class LinkedInPromptGenerator:
|
||||
return self._parse_response_manually(response, linkedin_content)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing Gemini response: {str(e)}")
|
||||
logger.error(f"Error parsing LLM response: {str(e)}")
|
||||
return self._parse_response_manually(response, linkedin_content)
|
||||
|
||||
def _parse_response_manually(
|
||||
@@ -474,7 +479,7 @@ class LinkedInPromptGenerator:
|
||||
Manually parse response if JSON parsing fails.
|
||||
|
||||
Args:
|
||||
response: Raw response from Gemini
|
||||
response: Raw response from LLM
|
||||
linkedin_content: LinkedIn content context
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
Research Handler for LinkedIn Content Generation
|
||||
|
||||
Handles research operations and timing for content generation.
|
||||
Uses common Exa/Tavily infrastructure with pre-flight validation.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from models.linkedin_models import ResearchSource
|
||||
@@ -21,11 +22,19 @@ class ResearchHandler:
|
||||
request,
|
||||
research_enabled: bool,
|
||||
search_engine: str,
|
||||
max_results: int = 10
|
||||
max_results: int = 10,
|
||||
user_id: Optional[str] = None
|
||||
) -> tuple[List[ResearchSource], float]:
|
||||
"""
|
||||
Conduct research if enabled and return sources with timing.
|
||||
|
||||
Args:
|
||||
request: Generation request object
|
||||
research_enabled: Whether research is enabled
|
||||
search_engine: Search engine to use (exa, tavily)
|
||||
max_results: Maximum number of results
|
||||
user_id: User ID for pre-flight validation and usage tracking
|
||||
|
||||
Returns:
|
||||
Tuple of (research_sources, research_time)
|
||||
"""
|
||||
@@ -33,7 +42,6 @@ class ResearchHandler:
|
||||
research_time = 0
|
||||
|
||||
if research_enabled:
|
||||
# Debug: Log the search engine value being passed
|
||||
logger.info(f"ResearchHandler: search_engine='{search_engine}' (type: {type(search_engine)})")
|
||||
|
||||
research_start = datetime.now()
|
||||
@@ -41,7 +49,8 @@ class ResearchHandler:
|
||||
topic=request.topic,
|
||||
industry=request.industry,
|
||||
search_engine=search_engine,
|
||||
max_results=max_results
|
||||
max_results=max_results,
|
||||
user_id=user_id
|
||||
)
|
||||
research_time = (datetime.now() - research_start).total_seconds()
|
||||
logger.info(f"Research completed in {research_time:.2f}s, found {len(research_sources)} sources")
|
||||
@@ -67,10 +76,5 @@ class ResearchHandler:
|
||||
if not research_enabled or level == 'none':
|
||||
return False
|
||||
|
||||
# For Google native grounding, Gemini returns sources in the generation metadata,
|
||||
# so we should not require pre-fetched research_sources.
|
||||
if engine_str == 'google':
|
||||
return True
|
||||
|
||||
# For other engines, require that research actually returned sources
|
||||
return bool(research_sources)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user