Merge branch 'recover-stash'
This commit is contained in:
@@ -44,7 +44,7 @@ CORE_ROUTER_REGISTRY = [
|
||||
OPTIONAL_ROUTER_REGISTRY = [
|
||||
{"name": "blog_writer", "module": "api.blog_writer.router", "attr": "router", "features": {"all", "blog_writer"}},
|
||||
{"name": "story_writer", "module": "api.story_writer.router", "attr": "router", "features": {"all", "story_writer"}},
|
||||
{"name": "wix", "module": "api.wix_routes", "attr": "router", "features": {"all"}},
|
||||
{"name": "wix", "module": "api.wix_routes", "attr": "router", "features": {"all", "blog_writer"}},
|
||||
{"name": "wix_test", "module": "api.wix_routes", "attr": "qa_router", "features": {"all"}},
|
||||
{"name": "blog_seo_analysis", "module": "api.blog_writer.seo_analysis", "attr": "router", "features": {"all", "blog_writer"}},
|
||||
{"name": "persona", "module": "api.persona_routes", "attr": "router", "features": {"all", "persona"}},
|
||||
|
||||
@@ -9,10 +9,12 @@ from fastapi import APIRouter, HTTPException, Depends
|
||||
from typing import Any, Dict, List, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from loguru import logger
|
||||
from datetime import datetime
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from sqlalchemy.orm import Session
|
||||
from services.database import get_db as get_db_dependency
|
||||
from utils.text_asset_tracker import save_and_track_text_content
|
||||
from models.content_asset_models import AssetType, AssetSource
|
||||
|
||||
from models.blog_models import (
|
||||
BlogResearchRequest,
|
||||
@@ -36,6 +38,7 @@ from models.blog_models import (
|
||||
from services.blog_writer.blog_service import BlogWriterService
|
||||
from services.blog_writer.seo.blog_seo_recommendation_applier import BlogSEORecommendationApplier
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from services.content_asset_service import ContentAssetService
|
||||
from .task_manager import task_manager
|
||||
from .cache_manager import cache_manager
|
||||
from models.blog_models import MediumBlogGenerateRequest
|
||||
@@ -1260,3 +1263,233 @@ async def save_complete_blog_asset(
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save complete blog asset: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# ---------------------------------------
|
||||
# Blog Asset API (phase-by-phase saving via ContentAsset)
|
||||
# ---------------------------------------
|
||||
|
||||
|
||||
class BlogAssetCreateRequest(BaseModel):
|
||||
research_keywords: str = Field(..., max_length=2000, description="Research keywords / topic")
|
||||
topic: Optional[str] = Field(default=None, max_length=500)
|
||||
word_count_target: Optional[int] = Field(default=None, ge=100, le=20000)
|
||||
|
||||
|
||||
class BlogAssetUpdateRequest(BaseModel):
|
||||
phase: Optional[str] = Field(default=None, pattern=r"^(research|outline|content|seo|publish)$")
|
||||
topic: Optional[str] = Field(default=None, max_length=500)
|
||||
selected_title: Optional[str] = Field(default=None, max_length=500)
|
||||
word_count_target: Optional[int] = Field(default=None, ge=100, le=20000)
|
||||
research_data: Optional[Dict[str, Any]] = None
|
||||
outline_data: Optional[Dict[str, Any]] = None
|
||||
content_data: Optional[Dict[str, Any]] = None
|
||||
seo_data: Optional[Dict[str, Any]] = None
|
||||
publish_data: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
def _normalize_keywords(kw: str) -> str:
|
||||
"""Normalize keywords for duplicate comparison."""
|
||||
return " ".join(sorted(kw.lower().split()))
|
||||
|
||||
|
||||
@router.post("/asset", response_model=Dict[str, Any])
|
||||
async def create_blog_asset(
|
||||
request: BlogAssetCreateRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Create a blog ContentAsset on research start.
|
||||
Returns existing asset if duplicate keywords found (unique topics only).
|
||||
"""
|
||||
try:
|
||||
if not current_user:
|
||||
raise HTTPException(status_code=401, detail="Authentication required")
|
||||
user_id = str(current_user.get("id", ""))
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="Invalid user ID")
|
||||
|
||||
svc = ContentAssetService(db)
|
||||
normalized_kw = _normalize_keywords(request.research_keywords)
|
||||
|
||||
# Duplicate check — search existing blog assets for matching keywords
|
||||
existing_assets, _ = svc.get_user_assets(
|
||||
user_id=user_id,
|
||||
source_module=AssetSource.BLOG_WRITER,
|
||||
asset_type=AssetType.TEXT,
|
||||
limit=100,
|
||||
)
|
||||
for asset in existing_assets:
|
||||
meta = asset.asset_metadata or {}
|
||||
if meta.get("normalized_keywords") == normalized_kw:
|
||||
logger.info(f"Duplicate blog asset found: {asset.id}, returning existing")
|
||||
return {
|
||||
"success": True,
|
||||
"asset": _asset_to_response(asset),
|
||||
"existing": True,
|
||||
}
|
||||
|
||||
# Create new ContentAsset for this blog
|
||||
title = request.topic or request.research_keywords[:200]
|
||||
asset_metadata = {
|
||||
"phase": "research",
|
||||
"research_keywords": request.research_keywords,
|
||||
"normalized_keywords": normalized_kw,
|
||||
"word_count_target": request.word_count_target,
|
||||
"topic": request.topic,
|
||||
"research_data": None,
|
||||
"outline_data": None,
|
||||
"content_data": None,
|
||||
"seo_data": None,
|
||||
"publish_data": None,
|
||||
}
|
||||
asset = svc.create_asset(
|
||||
user_id=user_id,
|
||||
asset_type=AssetType.TEXT,
|
||||
source_module=AssetSource.BLOG_WRITER,
|
||||
filename=f"blog_{int(datetime.utcnow().timestamp())}.md",
|
||||
file_url=f"/api/blog/content/pending",
|
||||
title=title,
|
||||
description=f"Blog: {title}",
|
||||
tags=["blog", "research"],
|
||||
asset_metadata=asset_metadata,
|
||||
)
|
||||
logger.info(f"✅ Created blog asset: {asset.id}")
|
||||
return {
|
||||
"success": True,
|
||||
"asset": _asset_to_response(asset),
|
||||
"existing": False,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create blog asset: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.put("/asset/{asset_id}", response_model=Dict[str, Any])
|
||||
async def update_blog_asset(
|
||||
asset_id: int,
|
||||
request: BlogAssetUpdateRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Update a blog asset's phase, metadata, and tags."""
|
||||
try:
|
||||
if not current_user:
|
||||
raise HTTPException(status_code=401, detail="Authentication required")
|
||||
user_id = str(current_user.get("id", ""))
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="Invalid user ID")
|
||||
|
||||
svc = ContentAssetService(db)
|
||||
asset = svc.get_asset_by_id(asset_id, user_id)
|
||||
if not asset:
|
||||
raise HTTPException(status_code=404, detail="Blog asset not found")
|
||||
|
||||
meta = dict(asset.asset_metadata or {})
|
||||
tags = list(asset.tags or [])
|
||||
|
||||
if request.phase is not None:
|
||||
meta["phase"] = request.phase
|
||||
# Update tags to reflect phase
|
||||
new_tags = [t for t in tags if t not in ("research", "outline", "content", "seo", "publish")]
|
||||
new_tags.append(request.phase)
|
||||
if "blog" not in new_tags:
|
||||
new_tags.append("blog")
|
||||
tags = new_tags
|
||||
|
||||
if request.topic is not None:
|
||||
meta["topic"] = request.topic
|
||||
if request.selected_title is not None:
|
||||
meta["selected_title"] = request.selected_title
|
||||
if request.word_count_target is not None:
|
||||
meta["word_count_target"] = request.word_count_target
|
||||
|
||||
for field in ("research_data", "outline_data", "content_data", "seo_data", "publish_data"):
|
||||
val = getattr(request, field, None)
|
||||
if val is not None:
|
||||
meta[field] = val
|
||||
|
||||
if meta.get("selected_title"):
|
||||
new_title = meta["selected_title"]
|
||||
elif meta.get("topic"):
|
||||
new_title = meta["topic"]
|
||||
else:
|
||||
new_title = asset.title or "Blog Post"
|
||||
|
||||
updated = svc.update_asset(
|
||||
asset_id=asset_id,
|
||||
user_id=user_id,
|
||||
title=new_title[:500],
|
||||
tags=tags,
|
||||
asset_metadata=meta,
|
||||
)
|
||||
if not updated:
|
||||
raise HTTPException(status_code=500, detail="Failed to update asset")
|
||||
|
||||
logger.info(f"✅ Updated blog asset {asset_id}: phase={meta.get('phase')}")
|
||||
return {"success": True, "asset": _asset_to_response(updated)}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update blog asset {asset_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/asset/{asset_id}", response_model=Dict[str, Any])
|
||||
async def get_blog_asset(
|
||||
asset_id: int,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Get a blog asset with all phase data."""
|
||||
try:
|
||||
if not current_user:
|
||||
raise HTTPException(status_code=401, detail="Authentication required")
|
||||
user_id = str(current_user.get("id", ""))
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="Invalid user ID")
|
||||
|
||||
svc = ContentAssetService(db)
|
||||
asset = svc.get_asset_by_id(asset_id, user_id)
|
||||
if not asset:
|
||||
raise HTTPException(status_code=404, detail="Blog asset not found")
|
||||
|
||||
return {"success": True, "asset": _asset_to_response(asset, full=True)}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get blog asset {asset_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
def _asset_to_response(asset: Any, full: bool = False) -> Dict[str, Any]:
|
||||
"""Convert a ContentAsset to a blog asset response dict."""
|
||||
meta = asset.asset_metadata or {}
|
||||
resp: Dict[str, Any] = {
|
||||
"id": asset.id,
|
||||
"title": asset.title,
|
||||
"description": asset.description,
|
||||
"tags": asset.tags or [],
|
||||
"phase": meta.get("phase", "research"),
|
||||
"research_keywords": meta.get("research_keywords"),
|
||||
"topic": meta.get("topic"),
|
||||
"selected_title": meta.get("selected_title"),
|
||||
"word_count_target": meta.get("word_count_target"),
|
||||
"has_research": meta.get("research_data") is not None,
|
||||
"has_outline": meta.get("outline_data") is not None,
|
||||
"has_content": meta.get("content_data") is not None,
|
||||
"has_seo": meta.get("seo_data") is not None,
|
||||
"has_publish": meta.get("publish_data") is not None,
|
||||
"created_at": asset.created_at.isoformat() if asset.created_at else None,
|
||||
"updated_at": asset.updated_at.isoformat() if asset.updated_at else None,
|
||||
}
|
||||
if full:
|
||||
resp["research_data"] = meta.get("research_data")
|
||||
resp["outline_data"] = meta.get("outline_data")
|
||||
resp["content_data"] = meta.get("content_data")
|
||||
resp["seo_data"] = meta.get("seo_data")
|
||||
resp["publish_data"] = meta.get("publish_data")
|
||||
return resp
|
||||
|
||||
@@ -256,7 +256,8 @@ class TaskManager:
|
||||
self.task_storage[task_id]["status"] = "running"
|
||||
self.task_storage[task_id]["progress_messages"] = []
|
||||
|
||||
await self.update_progress(task_id, "📦 Packaging outline and metadata...")
|
||||
await self.update_progress(task_id, "📝 Alwrity is preparing your blog content — this usually takes 20–40 seconds.")
|
||||
await self.update_progress(task_id, "📦 Packaging your outline sections and research data...")
|
||||
|
||||
# Basic guard: respect global target words
|
||||
total_target = int(request.globalTargetWords or 1000)
|
||||
@@ -281,16 +282,22 @@ class TaskManager:
|
||||
# Check if result came from cache
|
||||
cache_hit = getattr(result, 'cache_hit', False)
|
||||
if cache_hit:
|
||||
await self.update_progress(task_id, "⚡ Found cached content - loading instantly!")
|
||||
await self.update_progress(task_id, "⚡ Found existing content in cache — no need to regenerate!")
|
||||
else:
|
||||
await self.update_progress(task_id, "🤖 Generated fresh content with AI...")
|
||||
await self.update_progress(task_id, "✨ Post-processing and assembling sections...")
|
||||
await self.update_progress(task_id, "🧠 AI is writing each section with research-backed insights and natural flow...")
|
||||
await self.update_progress(task_id, "✨ Polishing content — improving structure, readability, and transitions...")
|
||||
|
||||
# Mark completed
|
||||
self.task_storage[task_id]["status"] = "completed"
|
||||
self.task_storage[task_id]["result"] = result.dict()
|
||||
await self.update_progress(task_id, f"✅ Generated {len(result.sections)} sections successfully.")
|
||||
|
||||
section_count = len(result.sections)
|
||||
total_words = sum(getattr(s, 'wordCount', 0) or 0 for s in result.sections)
|
||||
await self.update_progress(
|
||||
task_id,
|
||||
f"✅ Content generation complete! {section_count} sections written ({total_words} words). "
|
||||
"Next up: SEO Analysis to optimize your blog for search engines."
|
||||
)
|
||||
|
||||
# Note: Blog content tracking is handled in the status endpoint
|
||||
# to ensure we have proper database session and user context
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ async def detect_hallucinations(request: HallucinationDetectionRequest, current_
|
||||
text=source.get('text', ''),
|
||||
published_date=source.get('publishedDate'),
|
||||
author=source.get('author'),
|
||||
score=source.get('score', 0.5)
|
||||
score=source.get('score') if source.get('score') is not None else 0.5
|
||||
)
|
||||
for source in claim.supporting_sources
|
||||
]
|
||||
@@ -83,7 +83,7 @@ async def detect_hallucinations(request: HallucinationDetectionRequest, current_
|
||||
text=source.get('text', ''),
|
||||
published_date=source.get('publishedDate'),
|
||||
author=source.get('author'),
|
||||
score=source.get('score', 0.5)
|
||||
score=source.get('score') if source.get('score') is not None else 0.5
|
||||
)
|
||||
for source in claim.refuting_sources
|
||||
]
|
||||
@@ -214,7 +214,7 @@ async def verify_claim(request: ClaimVerificationRequest, current_user: Dict[str
|
||||
text=source.get('text', ''),
|
||||
published_date=source.get('publishedDate'),
|
||||
author=source.get('author'),
|
||||
score=source.get('score', 0.5)
|
||||
score=source.get('score') if source.get('score') is not None else 0.5
|
||||
)
|
||||
for source in claim_result.supporting_sources
|
||||
]
|
||||
@@ -226,7 +226,7 @@ async def verify_claim(request: ClaimVerificationRequest, current_user: Dict[str
|
||||
text=source.get('text', ''),
|
||||
published_date=source.get('publishedDate'),
|
||||
author=source.get('author'),
|
||||
score=source.get('score', 0.5)
|
||||
score=source.get('score') if source.get('score') is not None else 0.5
|
||||
)
|
||||
for source in claim_result.refuting_sources
|
||||
]
|
||||
|
||||
@@ -12,6 +12,7 @@ router = APIRouter(prefix="/api/writing-assistant", tags=["writing-assistant"])
|
||||
|
||||
class SuggestRequest(BaseModel):
|
||||
text: str
|
||||
cursor_position: int | None = None
|
||||
|
||||
|
||||
class SourceModel(BaseModel):
|
||||
@@ -32,6 +33,7 @@ class SuggestionModel(BaseModel):
|
||||
class SuggestResponse(BaseModel):
|
||||
success: bool
|
||||
suggestions: List[SuggestionModel]
|
||||
message: str = ""
|
||||
|
||||
|
||||
assistant_service = WritingAssistantService()
|
||||
@@ -41,9 +43,9 @@ assistant_service = WritingAssistantService()
|
||||
async def suggest_endpoint(req: SuggestRequest, current_user: Dict[str, Any] = Depends(get_current_user)) -> SuggestResponse:
|
||||
try:
|
||||
user_id = current_user.get("id")
|
||||
suggestions = await assistant_service.suggest(req.text, user_id=user_id)
|
||||
suggestions = await assistant_service.suggest(req.text, user_id=user_id, cursor_position=req.cursor_position)
|
||||
return SuggestResponse(
|
||||
success=True,
|
||||
success=len(suggestions) > 0,
|
||||
suggestions=[
|
||||
SuggestionModel(
|
||||
text=s.text,
|
||||
|
||||
@@ -679,9 +679,6 @@ if _is_full_mode():
|
||||
if campaign_creator_router:
|
||||
app.include_router(campaign_creator_router)
|
||||
|
||||
# Include content assets router
|
||||
from api.content_assets.router import router as content_assets_router
|
||||
app.include_router(content_assets_router)
|
||||
router_group_status["platform_extensions"] = {
|
||||
"mounted": True,
|
||||
"reason": "Full mode",
|
||||
@@ -692,6 +689,10 @@ else:
|
||||
"reason": "Skipped in feature-only mode",
|
||||
}
|
||||
|
||||
# Include content assets router (always — core utility, not feature-specific)
|
||||
from api.content_assets.router import router as content_assets_router
|
||||
app.include_router(content_assets_router)
|
||||
|
||||
# Include Podcast Maker router (only when podcast feature is enabled)
|
||||
if _is_feature_enabled("podcast") and "all" not in get_enabled_features():
|
||||
from api.podcast.router import router as podcast_router
|
||||
|
||||
@@ -76,12 +76,22 @@ async def handle_gsc_callback(
|
||||
|
||||
success = gsc_service.handle_oauth_callback(code, state)
|
||||
|
||||
# If state verification failed, check if user is already connected
|
||||
# (handles duplicate callbacks where state was consumed by a prior request)
|
||||
if not success:
|
||||
user_id_from_state = state.split(':')[0] if ':' in state else None
|
||||
if user_id_from_state:
|
||||
existing_creds = gsc_service.load_user_credentials(user_id_from_state)
|
||||
if existing_creds:
|
||||
logger.info(f"GSC OAuth state already consumed, but user {user_id_from_state} has valid credentials — treating as success")
|
||||
success = True
|
||||
|
||||
if success:
|
||||
logger.info("GSC OAuth callback handled successfully")
|
||||
|
||||
# Create GSC insights task immediately after successful connection
|
||||
try:
|
||||
from services.database import SessionLocal
|
||||
from services.database import get_session_for_user
|
||||
from services.platform_insights_monitoring_service import create_platform_insights_task
|
||||
|
||||
# Get user_id from state (stored during OAuth flow)
|
||||
@@ -89,23 +99,24 @@ async def handle_gsc_callback(
|
||||
user_id = state.split(':')[0] if ':' in state else None
|
||||
|
||||
if user_id:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Create insights task without site_url to avoid API calls
|
||||
# The executor will fetch it when the task runs (weekly)
|
||||
task_result = create_platform_insights_task(
|
||||
user_id=user_id,
|
||||
platform='gsc',
|
||||
site_url=None, # Will be fetched by executor when task runs
|
||||
db=db
|
||||
)
|
||||
|
||||
if task_result.get('success'):
|
||||
logger.info(f"Created GSC insights task for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"Failed to create GSC insights task: {task_result.get('error')}")
|
||||
finally:
|
||||
db.close()
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
try:
|
||||
task_result = create_platform_insights_task(
|
||||
user_id=user_id,
|
||||
platform='gsc',
|
||||
site_url=None,
|
||||
db=db
|
||||
)
|
||||
|
||||
if task_result.get('success'):
|
||||
logger.info(f"Created GSC insights task for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"Failed to create GSC insights task: {task_result.get('error')}")
|
||||
finally:
|
||||
db.close()
|
||||
else:
|
||||
logger.warning(f"Could not create DB session for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"Could not extract user_id from state: {state}")
|
||||
except Exception as e:
|
||||
@@ -125,7 +136,10 @@ async def handle_gsc_callback(
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return HTMLResponse(content=html)
|
||||
return HTMLResponse(
|
||||
content=html,
|
||||
headers={"Cross-Origin-Opener-Policy": "unsafe-none"},
|
||||
)
|
||||
else:
|
||||
logger.error("Failed to handle GSC OAuth callback")
|
||||
html = """
|
||||
@@ -140,7 +154,11 @@ async def handle_gsc_callback(
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return HTMLResponse(status_code=400, content=html)
|
||||
return HTMLResponse(
|
||||
status_code=400,
|
||||
content=html,
|
||||
headers={"Cross-Origin-Opener-Policy": "unsafe-none"},
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling GSC OAuth callback: {e}")
|
||||
@@ -157,7 +175,11 @@ async def handle_gsc_callback(
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return HTMLResponse(status_code=500, content=html)
|
||||
return HTMLResponse(
|
||||
status_code=500,
|
||||
content=html,
|
||||
headers={"Cross-Origin-Opener-Policy": "unsafe-none"},
|
||||
)
|
||||
|
||||
@router.get("/sites")
|
||||
async def get_gsc_sites(user: dict = Depends(get_current_user)):
|
||||
|
||||
@@ -122,9 +122,6 @@ class MediumBlogGenerator:
|
||||
payload = {
|
||||
"title": req.title,
|
||||
"globalTargetWords": req.globalTargetWords or 1000,
|
||||
"persona": req.persona.dict() if req.persona else None,
|
||||
"tone": req.tone,
|
||||
"audience": req.audience,
|
||||
"sections": [section_block(s) for s in req.sections],
|
||||
}
|
||||
|
||||
@@ -136,7 +133,6 @@ class MediumBlogGenerator:
|
||||
- Industry: {req.persona.industry or 'General'}
|
||||
- Tone: {req.persona.tone or 'Professional'}
|
||||
- Audience: {req.persona.audience or 'General readers'}
|
||||
- Persona ID: {req.persona.persona_id or 'Default'}
|
||||
|
||||
Write content that reflects this persona's expertise and communication style.
|
||||
Use industry-specific terminology and examples where appropriate.
|
||||
@@ -154,40 +150,19 @@ class MediumBlogGenerator:
|
||||
"Return ONLY valid JSON with no markdown formatting or explanations."
|
||||
)
|
||||
|
||||
# Build persona-specific content instructions
|
||||
persona_instructions = ""
|
||||
if req.persona:
|
||||
industry = req.persona.industry or 'General'
|
||||
tone = req.persona.tone or 'Professional'
|
||||
audience = req.persona.audience or 'General readers'
|
||||
|
||||
persona_instructions = f"""
|
||||
PERSONA-DRIVEN CONTENT REQUIREMENTS:
|
||||
- Write as an expert in {industry} industry
|
||||
- Use {tone} tone appropriate for {audience}
|
||||
- Include industry-specific examples and terminology
|
||||
- Demonstrate authority and expertise in the field
|
||||
- Use language that resonates with {audience}
|
||||
- Maintain consistent voice that reflects this persona's expertise
|
||||
"""
|
||||
|
||||
prompt = (
|
||||
f"Write blog content for the following sections. Each section should be {req.globalTargetWords or 1000} words total, distributed across all sections.\n\n"
|
||||
f"Write blog content for the following sections. Total target: {req.globalTargetWords or 1000} words, distributed across all sections.\n\n"
|
||||
f"Blog Title: {req.title}\n\n"
|
||||
"For each section, write engaging content that:\n"
|
||||
"- Follows the key points provided\n"
|
||||
"- Uses the suggested keywords naturally\n"
|
||||
"- Meets the target word count\n"
|
||||
"- Maintains professional tone\n"
|
||||
"- References the provided sources when relevant\n"
|
||||
"- Breaks content into clear paragraphs (2-4 sentences each)\n"
|
||||
"- Uses double line breaks (\\n\\n) between paragraphs for proper formatting\n"
|
||||
"- Uses double line breaks (\\n\\n) between paragraphs\n"
|
||||
"- Starts with an engaging opening paragraph\n"
|
||||
"- Ends with a strong concluding paragraph\n"
|
||||
f"{persona_instructions}\n"
|
||||
"IMPORTANT: Format the 'content' field with proper paragraph breaks using \\n\\n between paragraphs.\n\n"
|
||||
"Return a JSON object with 'title' and 'sections' array. Each section should have 'id', 'heading', 'content', and 'wordCount'.\n\n"
|
||||
f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
|
||||
"- Ends with a strong concluding paragraph\n\n"
|
||||
"Return a JSON object with 'title' and 'sections' array. Each section must have 'id', 'heading', 'content', 'wordCount', and 'sources'.\n\n"
|
||||
f"Sections:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -195,7 +170,9 @@ class MediumBlogGenerator:
|
||||
prompt=prompt,
|
||||
json_struct=schema,
|
||||
system_prompt=system,
|
||||
user_id=user_id
|
||||
user_id=user_id,
|
||||
max_tokens=None,
|
||||
temperature=0.3,
|
||||
)
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve error details
|
||||
|
||||
@@ -322,7 +322,7 @@ class ExaResearchProvider(BaseProvider):
|
||||
'text': getattr(result, 'text', ''),
|
||||
'publishedDate': getattr(result, 'publishedDate', ''),
|
||||
'author': getattr(result, 'author', ''),
|
||||
'score': getattr(result, 'score', 0.5),
|
||||
'score': (lambda v: v if v is not None else 0.5)(getattr(result, 'score', 0.5)),
|
||||
})
|
||||
|
||||
# Track usage
|
||||
|
||||
@@ -31,6 +31,7 @@ from models.product_marketing_models import Campaign, CampaignProposal, Campaign
|
||||
from models.product_asset_models import ProductAsset, ProductStyleTemplate, EcommerceExport
|
||||
# Podcast Maker models use SubscriptionBase, but import to ensure models are registered
|
||||
from models.podcast_models import PodcastProject
|
||||
|
||||
# Research models use SubscriptionBase
|
||||
from models.research_models import ResearchProject
|
||||
# Video Studio models
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
GSC Brainstorm Service for ALwrity.
|
||||
|
||||
Analyzes Google Search Console data to suggest blog topics the user should write about.
|
||||
Combines rule-based heuristics (high-impression/low-CTR keywords, near-page-1 positions)
|
||||
with LLM-powered strategic recommendations tailored to the user's topic intent.
|
||||
Combines rule-based heuristics with LLM-powered strategic recommendations tailored to
|
||||
the user's topic intent. Designed for non-SEO-experts: every insight includes plain-English
|
||||
explanations of WHY it matters and WHAT to do about it.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -21,9 +22,10 @@ class GSCBrainstormService:
|
||||
|
||||
Flow:
|
||||
1. Fetch real GSC search analytics (query + page data, 30 days)
|
||||
2. Apply rule-based filters (Content Optimization, Content Enhancement, Keyword Gap)
|
||||
3. Generate LLM-powered strategic recommendations contextualised to the user's keywords
|
||||
4. Return structured results
|
||||
2. Compute derived metrics (CTR benchmarks, estimated traffic uplift, content formats)
|
||||
3. Apply rule-based filters (Quick Wins, Optimization, Enhancement, Rising Stars, Page Issues)
|
||||
4. Generate LLM-powered strategic recommendations contextualised to the user's keywords
|
||||
5. Return structured results with all data exposed for rich frontend display
|
||||
"""
|
||||
|
||||
def __init__(self, gsc_service: GSCService = None):
|
||||
@@ -39,18 +41,8 @@ class GSCBrainstormService:
|
||||
keywords: str,
|
||||
site_url: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate blog topic suggestions from the user's GSC data.
|
||||
|
||||
Args:
|
||||
user_id: Clerk user ID (must have GSC connected).
|
||||
keywords: User's 3+ word topic intent (e.g. "content marketing strategy").
|
||||
site_url: Optional site URL; auto-selected from user's first GSC site if omitted.
|
||||
|
||||
Returns:
|
||||
Dict with content_opportunities, keyword_gaps, ai_recommendations, summary.
|
||||
"""
|
||||
self._user_id = user_id
|
||||
|
||||
# 1. Resolve site_url
|
||||
if not site_url:
|
||||
sites = self.gsc_service.get_site_list(user_id)
|
||||
@@ -59,6 +51,8 @@ class GSCBrainstormService:
|
||||
"error": "No GSC sites found. Make sure your site is verified in Google Search Console.",
|
||||
"content_opportunities": [],
|
||||
"keyword_gaps": [],
|
||||
"quick_wins": [],
|
||||
"page_opportunities": [],
|
||||
"ai_recommendations": {},
|
||||
"summary": {},
|
||||
}
|
||||
@@ -80,6 +74,8 @@ class GSCBrainstormService:
|
||||
"error": analytics.get("error", "Failed to fetch GSC data"),
|
||||
"content_opportunities": [],
|
||||
"keyword_gaps": [],
|
||||
"quick_wins": [],
|
||||
"page_opportunities": [],
|
||||
"ai_recommendations": {},
|
||||
"summary": {},
|
||||
}
|
||||
@@ -93,9 +89,11 @@ class GSCBrainstormService:
|
||||
|
||||
if not keywords_data:
|
||||
return {
|
||||
"error": "No keyword data available for the selected period.",
|
||||
"error": "No keyword data available for the selected period. This usually means your site is new to GSC or hasn't received search traffic yet.",
|
||||
"content_opportunities": [],
|
||||
"keyword_gaps": [],
|
||||
"quick_wins": [],
|
||||
"page_opportunities": [],
|
||||
"ai_recommendations": {},
|
||||
"summary": {
|
||||
"site_url": site_url,
|
||||
@@ -107,18 +105,23 @@ class GSCBrainstormService:
|
||||
# 4. Rule-based analysis
|
||||
content_opportunities = self._identify_content_opportunities(keywords_data)
|
||||
keyword_gaps = self._identify_keyword_gaps(keywords_data)
|
||||
quick_wins = self._identify_quick_wins(keywords_data)
|
||||
page_opportunities = self._identify_page_opportunities(pages_data)
|
||||
|
||||
# 5. Summary metrics
|
||||
summary = self._compute_summary(keywords_data, pages_data, site_url, start_date, end_date)
|
||||
|
||||
# 6. AI recommendations (best-effort; don't fail the whole request on LLM error)
|
||||
# 6. AI recommendations
|
||||
ai_recommendations = self._generate_ai_recommendations(
|
||||
keywords_data, pages_data, summary, keywords
|
||||
keywords_data, pages_data, summary, keywords,
|
||||
content_opportunities, quick_wins, keyword_gaps,
|
||||
)
|
||||
|
||||
return {
|
||||
"content_opportunities": content_opportunities,
|
||||
"keyword_gaps": keyword_gaps,
|
||||
"quick_wins": quick_wins,
|
||||
"page_opportunities": page_opportunities,
|
||||
"ai_recommendations": ai_recommendations,
|
||||
"summary": summary,
|
||||
}
|
||||
@@ -168,39 +171,53 @@ class GSCBrainstormService:
|
||||
opportunities: List[Dict[str, Any]] = []
|
||||
|
||||
# Rule 1: Content Optimization — high impressions, low CTR
|
||||
# Meaning: Google is SHOWING your page for this query but people aren't clicking.
|
||||
# The content probably ranks but title/meta/snippet isn't compelling enough.
|
||||
for kw in keywords_data:
|
||||
if kw["impressions"] > 500 and kw["ctr"] < 3:
|
||||
estimated_gain = int(kw["impressions"] * 0.05) - kw["clicks"]
|
||||
opportunities.append({
|
||||
"type": "Content Optimization",
|
||||
"keyword": kw["keyword"],
|
||||
"opportunity": (
|
||||
f"Optimize existing content for '{kw['keyword']}' "
|
||||
f"to improve CTR from {kw['ctr']:.1f}% "
|
||||
f"(position {kw['position']:.1f})"
|
||||
f"Your site appears for '{kw['keyword']}' ({kw['impressions']:,} times/month) "
|
||||
f"but only {kw['ctr']:.1f}% click. Improving your title and meta description "
|
||||
f"could bring ~{max(estimated_gain, 5)} more clicks/month."
|
||||
),
|
||||
"potential_impact": "High",
|
||||
"potential_impact": "High" if kw["impressions"] > 1000 else "Medium",
|
||||
"current_position": kw["position"],
|
||||
"current_ctr": kw["ctr"],
|
||||
"impressions": kw["impressions"],
|
||||
"clicks": kw["clicks"],
|
||||
"estimated_traffic_gain": max(estimated_gain, 5),
|
||||
"priority": "High" if kw["impressions"] > 1000 else "Medium",
|
||||
"suggested_format": GSCBrainstormService._suggest_format(kw["keyword"]),
|
||||
})
|
||||
|
||||
# Rule 2: Content Enhancement — positions 11-20 with decent impressions
|
||||
# Meaning: You're on page 2 of Google. A small content boost could push you to page 1,
|
||||
# where CTR increases dramatically (page 1 gets ~95% of all clicks).
|
||||
for kw in keywords_data:
|
||||
if 10 < kw["position"] <= 20 and kw["impressions"] > 100:
|
||||
estimated_gain = int(kw["impressions"] * 0.08)
|
||||
opportunities.append({
|
||||
"type": "Content Enhancement",
|
||||
"keyword": kw["keyword"],
|
||||
"opportunity": (
|
||||
f"Enhance content for '{kw['keyword']}' to move from "
|
||||
f"position {kw['position']:.1f} to the first page"
|
||||
f"'{kw['keyword']}' ranks #{kw['position']:.0f} (page 2). "
|
||||
f"Moving to page 1 could capture ~{estimated_gain} more clicks/month "
|
||||
f"from {kw['impressions']:,} impressions."
|
||||
),
|
||||
"potential_impact": "Medium",
|
||||
"potential_impact": "High" if kw["impressions"] > 500 else "Medium",
|
||||
"current_position": kw["position"],
|
||||
"current_ctr": kw["ctr"],
|
||||
"impressions": kw["impressions"],
|
||||
"priority": "Medium",
|
||||
"clicks": kw["clicks"],
|
||||
"estimated_traffic_gain": estimated_gain,
|
||||
"priority": "High" if kw["impressions"] > 500 else "Medium",
|
||||
"suggested_format": GSCBrainstormService._suggest_format(kw["keyword"]),
|
||||
})
|
||||
|
||||
# Sort by impressions descending, keep top 10
|
||||
opportunities.sort(key=lambda x: x["impressions"], reverse=True)
|
||||
return opportunities[:10]
|
||||
|
||||
@@ -212,15 +229,111 @@ class GSCBrainstormService:
|
||||
|
||||
for kw in keywords_data:
|
||||
if 4 <= kw["position"] <= 20 and kw["impressions"] >= 50:
|
||||
# Estimate traffic gain if this keyword moved to position 1-3
|
||||
# Position 1 avg CTR ~31%, position 3 ~11%, current position CTR estimate
|
||||
position_1_ctr = 31.0
|
||||
current_ctr = kw["ctr"]
|
||||
estimated_gain = max(int(kw["impressions"] * (position_1_ctr - current_ctr) / 100), 1)
|
||||
|
||||
gaps.append({
|
||||
"keyword": kw["keyword"],
|
||||
"position": kw["position"],
|
||||
"impressions": kw["impressions"],
|
||||
"current_ctr": kw["ctr"],
|
||||
"clicks": kw["clicks"],
|
||||
"estimated_traffic_if_page1": estimated_gain,
|
||||
"gap_from_page1": round(kw["position"] - 3, 1),
|
||||
})
|
||||
|
||||
gaps.sort(key=lambda x: x["impressions"], reverse=True)
|
||||
return gaps[:10]
|
||||
|
||||
@staticmethod
|
||||
def _identify_quick_wins(
|
||||
keywords_data: List[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Keywords already on page 1 (positions 4-10) that could reach top 3
|
||||
with minor improvements — the highest-ROI opportunities."""
|
||||
quick_wins: List[Dict[str, Any]] = []
|
||||
|
||||
for kw in keywords_data:
|
||||
if 4 <= kw["position"] <= 10 and kw["impressions"] >= 100:
|
||||
# Position 3 CTR ≈ 11%, position 5 CTR ≈ 6%
|
||||
# Small improvements can yield big traffic gains
|
||||
target_ctr = 11.0 # approximate CTR for position 3
|
||||
estimated_gain = max(int(kw["impressions"] * (target_ctr - kw["ctr"]) / 100), 1)
|
||||
|
||||
quick_wins.append({
|
||||
"keyword": kw["keyword"],
|
||||
"position": kw["position"],
|
||||
"impressions": kw["impressions"],
|
||||
"current_ctr": kw["ctr"],
|
||||
"clicks": kw["clicks"],
|
||||
"estimated_traffic_gain": estimated_gain,
|
||||
"reason": (
|
||||
f"Already on page 1 at position #{kw['position']:.0f}. "
|
||||
f"Optimizing this page could increase CTR from {kw['ctr']:.1f}% "
|
||||
f"to ~{target_ctr:.0f}%, gaining ~{estimated_gain} clicks/month."
|
||||
),
|
||||
})
|
||||
|
||||
quick_wins.sort(key=lambda x: x["estimated_traffic_gain"], reverse=True)
|
||||
return quick_wins[:5]
|
||||
|
||||
@staticmethod
|
||||
def _identify_page_opportunities(
|
||||
pages_data: List[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Pages with high impressions but low CTR — the content or meta needs work."""
|
||||
opportunities: List[Dict[str, Any]] = []
|
||||
|
||||
for pg in pages_data:
|
||||
if pg["impressions"] > 300 and pg["ctr"] < 2.0:
|
||||
short_page = pg["page"].rstrip("/").rsplit("/", 1)[-1].replace("-", " ").title()
|
||||
if len(short_page) > 60:
|
||||
short_page = short_page[:57] + "..."
|
||||
opportunities.append({
|
||||
"page": pg["page"],
|
||||
"page_title": short_page,
|
||||
"impressions": pg["impressions"],
|
||||
"clicks": pg["clicks"],
|
||||
"current_ctr": pg["ctr"],
|
||||
"current_position": pg["position"],
|
||||
"reason": (
|
||||
f"This page gets {pg['impressions']:,} impressions but only {pg['ctr']:.1f}% CTR. "
|
||||
f"Reviewing the title and meta description could significantly boost clicks."
|
||||
),
|
||||
})
|
||||
|
||||
opportunities.sort(key=lambda x: x["impressions"], reverse=True)
|
||||
return opportunities[:5]
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Content format suggestion
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
@staticmethod
|
||||
def _suggest_format(keyword: str) -> str:
|
||||
"""Suggest a content format based on keyword patterns."""
|
||||
kw = keyword.lower()
|
||||
if any(w in kw for w in ["how to", "how do", "guide", "tutorial", "steps"]):
|
||||
return "How-To Guide"
|
||||
if any(w in kw for w in ["vs", "versus", "compare", "comparison", "difference"]):
|
||||
return "Comparison"
|
||||
if any(w in kw for w in ["best", "top", "recommended", "review", "reviews"]):
|
||||
return "Top Picks / Review"
|
||||
if any(w in kw for w in ["what is", "definition", "meaning", "explained"]):
|
||||
return "Explainer"
|
||||
if any(w in kw for w in ["list", "examples", "ideas", "tips", "ways"]):
|
||||
return "Listicle"
|
||||
if any(w in kw for w in ["free", "cheap", "alternative", "budget"]):
|
||||
return "Budget / Alternative"
|
||||
if any(w in kw for w in ["template", "calculator", "tool", "checker"]):
|
||||
return "Tool / Template"
|
||||
if any(w in kw for w in ["2024", "2025", "2026", "trends", "prediction", "future"]):
|
||||
return "Trend Report"
|
||||
return "In-Depth Article"
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Summary metrics
|
||||
# ------------------------------------------------------------------ #
|
||||
@@ -248,6 +361,16 @@ class GSCBrainstormService:
|
||||
top_keywords = sorted(keywords_data, key=lambda x: x["impressions"], reverse=True)[:5]
|
||||
top_pages = sorted(pages_data, key=lambda x: x["clicks"], reverse=True)[:3]
|
||||
|
||||
# Health score: 0-100 based on how many keywords are on page 1
|
||||
total_kw = len(keywords_data) or 1
|
||||
page1_pct = (pos_1_3 + pos_4_10) / total_kw * 100
|
||||
top3_pct = pos_1_3 / total_kw * 100
|
||||
health_score = round(min(top3_pct * 3 + page1_pct * 0.7, 100), 0)
|
||||
|
||||
# CTR benchmark: industry average is ~3.1% for position 1-10
|
||||
ctr_benchmark = 3.1
|
||||
ctr_vs_benchmark = round(avg_ctr - ctr_benchmark, 2)
|
||||
|
||||
return {
|
||||
"site_url": site_url,
|
||||
"date_range": {"start": start_date, "end": end_date},
|
||||
@@ -256,6 +379,8 @@ class GSCBrainstormService:
|
||||
"total_clicks": total_clicks,
|
||||
"avg_ctr": avg_ctr,
|
||||
"avg_position": avg_position,
|
||||
"ctr_vs_benchmark": ctr_vs_benchmark,
|
||||
"health_score": health_score,
|
||||
"keyword_distribution": {
|
||||
"positions_1_3": pos_1_3,
|
||||
"positions_4_10": pos_4_10,
|
||||
@@ -263,11 +388,22 @@ class GSCBrainstormService:
|
||||
"positions_21_plus": pos_21_plus,
|
||||
},
|
||||
"top_keywords": [
|
||||
{"keyword": kw["keyword"], "impressions": kw["impressions"], "position": kw["position"]}
|
||||
{
|
||||
"keyword": kw["keyword"],
|
||||
"impressions": kw["impressions"],
|
||||
"clicks": kw["clicks"],
|
||||
"position": kw["position"],
|
||||
"ctr": kw["ctr"],
|
||||
}
|
||||
for kw in top_keywords
|
||||
],
|
||||
"top_pages": [
|
||||
{"page": pg["page"], "clicks": pg["clicks"], "impressions": pg["impressions"]}
|
||||
{
|
||||
"page": pg["page"],
|
||||
"clicks": pg["clicks"],
|
||||
"impressions": pg["impressions"],
|
||||
"ctr": pg["ctr"],
|
||||
}
|
||||
for pg in top_pages
|
||||
],
|
||||
}
|
||||
@@ -282,60 +418,110 @@ class GSCBrainstormService:
|
||||
pages_data: List[Dict],
|
||||
summary: Dict,
|
||||
user_keywords: str,
|
||||
content_opportunities: List[Dict],
|
||||
quick_wins: List[Dict],
|
||||
keyword_gaps: List[Dict],
|
||||
) -> Dict[str, Any]:
|
||||
try:
|
||||
top_kw = ", ".join(kw["keyword"] for kw in summary.get("top_keywords", []))
|
||||
top_kw_list = summary.get("top_keywords", [])
|
||||
top_kw_str = "\n".join(
|
||||
f" • {kw['keyword']}: {kw['impressions']:,} impressions, position {kw['position']}, {kw['ctr']:.1f}% CTR"
|
||||
for kw in top_kw_list[:10]
|
||||
)
|
||||
dist = summary.get("keyword_distribution", {})
|
||||
|
||||
prompt = f"""Analyze this Google Search Console data and suggest blog topics the user should write about.
|
||||
opp_str = ""
|
||||
if content_opportunities:
|
||||
opp_str = "\nCONTENT OPPORTUNITIES (rule-based findings):\n" + "\n".join(
|
||||
f" • {o['keyword']}: {o['opportunity']}"
|
||||
for o in content_opportunities[:5]
|
||||
)
|
||||
else:
|
||||
opp_str = "\nNo major content opportunities detected from rule-based analysis."
|
||||
|
||||
USER'S TOPIC INTENT: "{user_keywords}"
|
||||
qw_str = ""
|
||||
if quick_wins:
|
||||
qw_str = "\nQUICK WINS (already on page 1, easy to optimize):\n" + "\n".join(
|
||||
f" • {q['keyword']}: position #{q['position']:.0f}, {q['current_ctr']:.1f}% CTR, est. +{q['estimated_traffic_gain']} clicks/month"
|
||||
for q in quick_wins[:3]
|
||||
)
|
||||
|
||||
SEARCH PERFORMANCE SUMMARY:
|
||||
- Total Keywords Tracked: {summary.get('total_keywords_analyzed', 0)}
|
||||
prompt = f"""You are an expert SEO content strategist analyzing real Google Search Console data for a blog writer.
|
||||
|
||||
The user wants to write about: "{user_keywords}"
|
||||
|
||||
Here is their GSC data for the last 30 days:
|
||||
|
||||
PERFORMANCE OVERVIEW:
|
||||
- Total Keywords: {summary.get('total_keywords_analyzed', 0)}
|
||||
- Total Impressions: {summary.get('total_impressions', 0):,}
|
||||
- Total Clicks: {summary.get('total_clicks', 0):,}
|
||||
- Average CTR: {summary.get('avg_ctr', 0):.2f}%
|
||||
- Average CTR: {summary.get('avg_ctr', 0):.2f}% (industry avg for positions 1-10 is ~3.1%)
|
||||
- Average Position: {summary.get('avg_position', 0):.1f}
|
||||
- SEO Health Score: {summary.get('health_score', 0)}/100
|
||||
|
||||
TOP PERFORMING KEYWORDS:
|
||||
{top_kw}
|
||||
TOP KEYWORDS BY IMPRESSIONS:
|
||||
{top_kw_str}
|
||||
|
||||
KEYWORD POSITION DISTRIBUTION:
|
||||
- Positions 1-3: {dist.get('positions_1_3', 0)}
|
||||
- Positions 4-10: {dist.get('positions_4_10', 0)}
|
||||
- Positions 11-20: {dist.get('positions_11_20', 0)}
|
||||
- Positions 21+: {dist.get('positions_21_plus', 0)}
|
||||
- Position 1-3 (top results): {dist.get('positions_1_3', 0)} keywords
|
||||
- Position 4-10 (page 1): {dist.get('positions_4_10', 0)} keywords
|
||||
- Position 11-20 (page 2): {dist.get('positions_11_20', 0)} keywords
|
||||
- Position 21+ (page 3+): {dist.get('positions_21_plus', 0)} keywords
|
||||
{opp_str}
|
||||
{qw_str}
|
||||
|
||||
Based on this data, provide:
|
||||
Based on this data, provide EXACT blog post suggestions the user should write.
|
||||
|
||||
1. IMMEDIATE TOPIC OPPORTUNITIES (0-30 days):
|
||||
- Specific blog post titles the user should write
|
||||
- Each tied to a keyword opportunity from the data
|
||||
- 3-5 suggestions
|
||||
For each suggestion include:
|
||||
1. A specific, compelling blog post TITLE (not vague topic)
|
||||
2. The keyword it targets and why (based on the data above)
|
||||
3. The recommended content format (how-to, listicle, comparison, etc.)
|
||||
4. Estimated impact (how many more clicks/month they could gain)
|
||||
|
||||
2. CONTENT STRATEGY TOPICS (1-3 months):
|
||||
- New topic clusters to build authority
|
||||
- Content pillar ideas
|
||||
- 3-5 suggestions
|
||||
|
||||
3. LONG-TERM CONTENT VISION (3-12 months):
|
||||
- Market expansion topics
|
||||
- Authority-building content ideas
|
||||
- 3-5 suggestions
|
||||
|
||||
IMPORTANT: Relate every topic suggestion to the user's interest in "{user_keywords}".
|
||||
Return your response in this exact JSON format:
|
||||
Return your response in this EXACT JSON format (no markdown, no code fences):
|
||||
{{
|
||||
"immediate_opportunities": ["topic 1", "topic 2", "topic 3"],
|
||||
"content_strategy": ["strategy 1", "strategy 2", "strategy 3"],
|
||||
"long_term_strategy": ["vision 1", "vision 2", "vision 3"]
|
||||
}}"""
|
||||
"immediate_opportunities": [
|
||||
{{
|
||||
"title": "Specific Blog Post Title Here",
|
||||
"keyword": "target keyword",
|
||||
"reason": "Why this will work based on the data",
|
||||
"format": "How-To Guide | Listicle | Comparison | Explainer | etc.",
|
||||
"estimated_impact": "Estimated X more clicks/month"
|
||||
}}
|
||||
],
|
||||
"content_strategy": [
|
||||
{{
|
||||
"title": "Pillar Content Title",
|
||||
"keyword": "target keyword",
|
||||
"reason": "Strategic reasoning",
|
||||
"format": "Content format",
|
||||
"estimated_impact": "Expected impact"
|
||||
}}
|
||||
],
|
||||
"long_term_strategy": [
|
||||
{{
|
||||
"title": "Authority Building Title",
|
||||
"keyword": "target keyword",
|
||||
"reason": "Long-term reasoning",
|
||||
"format": "Content format",
|
||||
"estimated_impact": "Expected long-term impact"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
IMPORTANT:
|
||||
- Provide 3-5 items in each category
|
||||
- Every suggestion MUST relate to the user's interest in "{user_keywords}"
|
||||
- Titles should be specific and compelling, like real blog post headlines
|
||||
- Use the data above to justify each recommendation
|
||||
- Prioritize keywords with high impressions but low CTR or low position"""
|
||||
|
||||
system_prompt = (
|
||||
"You are an enterprise SEO content strategist. Provide specific, data-driven "
|
||||
"blog topic suggestions that will improve the user's search performance. "
|
||||
"Always respond with valid JSON matching the requested format."
|
||||
"You are an expert SEO content strategist. You analyze Google Search Console data "
|
||||
"and provide specific, actionable blog post recommendations that will drive real traffic. "
|
||||
"You always respond with valid JSON matching the requested format. "
|
||||
"Every recommendation must be backed by the data provided."
|
||||
)
|
||||
|
||||
result = llm_text_gen(
|
||||
@@ -350,27 +536,58 @@ Return your response in this exact JSON format:
|
||||
if parsed:
|
||||
return parsed
|
||||
|
||||
return self._fallback_ai_recommendations(keywords_data)
|
||||
return self._fallback_ai_recommendations(keywords_data, content_opportunities, quick_wins)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"GSC brainstorm AI recommendations failed: {e}")
|
||||
return self._fallback_ai_recommendations(keywords_data)
|
||||
return self._fallback_ai_recommendations(keywords_data, content_opportunities, quick_wins)
|
||||
|
||||
@staticmethod
|
||||
def _parse_ai_response(raw: str) -> Optional[Dict[str, List[str]]]:
|
||||
def _parse_ai_response(self, raw: str) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
json_start = raw.find("{")
|
||||
json_end = raw.rfind("}") + 1
|
||||
# Strip markdown code fences if present
|
||||
cleaned = raw.strip()
|
||||
if cleaned.startswith("```"):
|
||||
first_newline = cleaned.find("\n")
|
||||
if first_newline != -1:
|
||||
cleaned = cleaned[first_newline + 1:]
|
||||
if cleaned.endswith("```"):
|
||||
cleaned = cleaned[:-3].strip()
|
||||
|
||||
json_start = cleaned.find("{")
|
||||
json_end = cleaned.rfind("}") + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
return None
|
||||
|
||||
chunk = raw[json_start:json_end]
|
||||
chunk = cleaned[json_start:json_end]
|
||||
parsed = json.loads(chunk)
|
||||
|
||||
def normalize_section(section: Any) -> List[Dict[str, str]]:
|
||||
if not isinstance(section, list):
|
||||
return []
|
||||
result = []
|
||||
for item in section:
|
||||
if isinstance(item, str):
|
||||
result.append({
|
||||
"title": item.split(":")[0].strip() if ":" in item else item[:60],
|
||||
"keyword": "",
|
||||
"reason": item,
|
||||
"format": "",
|
||||
"estimated_impact": "",
|
||||
})
|
||||
elif isinstance(item, dict):
|
||||
result.append({
|
||||
"title": str(item.get("title", "")),
|
||||
"keyword": str(item.get("keyword", "")),
|
||||
"reason": str(item.get("reason", "")),
|
||||
"format": str(item.get("format", "")),
|
||||
"estimated_impact": str(item.get("estimated_impact", "")),
|
||||
})
|
||||
return result
|
||||
|
||||
return {
|
||||
"immediate_opportunities": parsed.get("immediate_opportunities", [])[:5],
|
||||
"content_strategy": parsed.get("content_strategy", [])[:5],
|
||||
"long_term_strategy": parsed.get("long_term_strategy", [])[:5],
|
||||
"immediate_opportunities": normalize_section(parsed.get("immediate_opportunities", []))[:5],
|
||||
"content_strategy": normalize_section(parsed.get("content_strategy", []))[:5],
|
||||
"long_term_strategy": normalize_section(parsed.get("long_term_strategy", []))[:5],
|
||||
}
|
||||
except (json.JSONDecodeError, ValueError) as e:
|
||||
logger.warning(f"Failed to parse AI brainstorm response as JSON: {e}")
|
||||
@@ -379,26 +596,53 @@ Return your response in this exact JSON format:
|
||||
@staticmethod
|
||||
def _fallback_ai_recommendations(
|
||||
keywords_data: List[Dict],
|
||||
content_opportunities: List[Dict],
|
||||
quick_wins: List[Dict],
|
||||
) -> Dict[str, Any]:
|
||||
top_kw = keywords_data[:3] if keywords_data else []
|
||||
immediate = []
|
||||
for kw in top_kw:
|
||||
immediate.append(
|
||||
f"Write a comprehensive guide on '{kw['keyword']}' "
|
||||
f"(currently at position {kw['position']:.1f} with "
|
||||
f"{kw['impressions']} impressions)"
|
||||
)
|
||||
|
||||
# Build from quick wins first (highest ROI)
|
||||
for qw in quick_wins[:2]:
|
||||
immediate.append({
|
||||
"title": f"How to Rank #{int(qw['position'])} for '{qw['keyword']}' — Optimization Guide",
|
||||
"keyword": qw["keyword"],
|
||||
"reason": qw.get("reason", f"Already on page 1 at position {qw['position']:.0f}"),
|
||||
"format": "How-To Guide",
|
||||
"estimated_impact": f"+{qw.get('estimated_traffic_gain', 10)} clicks/month",
|
||||
})
|
||||
|
||||
# Then from content opportunities
|
||||
for opp in content_opportunities[:2]:
|
||||
immediate.append({
|
||||
"title": f"Complete Guide to {opp['keyword'].title()}",
|
||||
"keyword": opp["keyword"],
|
||||
"reason": opp.get("opportunity", f"{opp['impressions']:,} impressions with room to improve"),
|
||||
"format": opp.get("suggested_format", "In-Depth Article"),
|
||||
"estimated_impact": f"+{opp.get('estimated_traffic_gain', 10)} clicks/month",
|
||||
})
|
||||
|
||||
# Fill remaining with top keywords
|
||||
remaining = 5 - len(immediate)
|
||||
for kw in top_kw[:remaining]:
|
||||
immediate.append({
|
||||
"title": f"The Ultimate Guide to {kw['keyword'].title()}",
|
||||
"keyword": kw["keyword"],
|
||||
"reason": f"Top keyword with {kw['impressions']:,} impressions (position {kw['position']:.1f})",
|
||||
"format": "In-Depth Article",
|
||||
"estimated_impact": f"+{max(int(kw['impressions'] * 0.03), 5)} clicks/month",
|
||||
})
|
||||
|
||||
return {
|
||||
"immediate_opportunities": immediate or ["No keyword data available for recommendations"],
|
||||
"immediate_opportunities": immediate or [{"title": "No keyword data available", "keyword": "", "reason": "Connect GSC to get personalized suggestions", "format": "", "estimated_impact": ""}],
|
||||
"content_strategy": [
|
||||
"Develop topic clusters around your top-performing keywords",
|
||||
"Create comparison and vs-style content for competitive terms",
|
||||
"Build FAQ sections targeting question-based queries",
|
||||
{"title": "Topic Cluster: Build Authority Around Your Core Topics", "keyword": "", "reason": "Clustered content ranks higher and captures more long-tail queries", "format": "Pillar Page + Spokes", "estimated_impact": "+50-200 clicks/month over 3 months"},
|
||||
{"title": "Comparison Guide: Your Product vs. Alternatives", "keyword": "", "reason": "Comparison content captures high-intent searchers ready to decide", "format": "Comparison", "estimated_impact": "+20-80 clicks/month"},
|
||||
{"title": "FAQ: Answer What Your Audience Is Asking", "keyword": "", "reason": "FAQs capture featured snippets and voice search queries", "format": "FAQ / Listicle", "estimated_impact": "+30-100 clicks/month"},
|
||||
],
|
||||
"long_term_strategy": [
|
||||
"Build domain authority through pillar content",
|
||||
"Expand into adjacent topic areas",
|
||||
"Develop thought leadership content series",
|
||||
{"title": "Pillar Content: The Definitive Resource in Your Niche", "keyword": "", "reason": "Comprehensive guides become authoritative references that attract backlinks", "format": "Long-Form Guide", "estimated_impact": "+100-500 clicks/month over 6-12 months"},
|
||||
{"title": "Trend Report: What's Next in Your Industry", "keyword": "", "reason": "Forward-looking content captures emerging search demand early", "format": "Trend Report", "estimated_impact": "+50-200 clicks/month"},
|
||||
{"title": "Thought Leadership: Expert Roundup and Insights", "keyword": "", "reason": "Expert content builds E-E-A-T signals that improve overall domain authority", "format": "Expert Roundup", "estimated_impact": "+30-100 clicks/month per piece"},
|
||||
],
|
||||
}
|
||||
@@ -250,10 +250,10 @@ class GSCService:
|
||||
flow = Flow.from_client_config(
|
||||
self.client_config,
|
||||
scopes=self.scopes,
|
||||
redirect_uri=redirect_uri
|
||||
redirect_uri=redirect_uri,
|
||||
autogenerate_code_verifier=False,
|
||||
)
|
||||
|
||||
# Use a custom state that includes user_id for routing the callback to the correct DB
|
||||
|
||||
random_state = secrets.token_urlsafe(32)
|
||||
state = f"{user_id}:{random_state}"
|
||||
|
||||
@@ -300,7 +300,7 @@ class GSCService:
|
||||
logger.error(f"User database not found for user {user_id}")
|
||||
return False
|
||||
|
||||
# Verify state in user's DB
|
||||
# Verify state in user's DB (but don't delete yet — delete after successful token exchange)
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('SELECT user_id FROM gsc_oauth_states WHERE state = ?', (state,))
|
||||
@@ -309,10 +309,6 @@ class GSCService:
|
||||
if not result:
|
||||
logger.error(f"Invalid or expired GSC OAuth state for user {user_id}")
|
||||
return False
|
||||
|
||||
# Clean up state
|
||||
cursor.execute('DELETE FROM gsc_oauth_states WHERE state = ?', (state,))
|
||||
conn.commit()
|
||||
|
||||
# Exchange code for credentials
|
||||
if not self.client_config:
|
||||
@@ -322,12 +318,22 @@ class GSCService:
|
||||
flow = Flow.from_client_config(
|
||||
self.client_config,
|
||||
scopes=self.scopes,
|
||||
redirect_uri=os.getenv('GSC_REDIRECT_URI', 'http://localhost:8000/gsc/callback')
|
||||
redirect_uri=os.getenv('GSC_REDIRECT_URI', 'http://localhost:8000/gsc/callback'),
|
||||
autogenerate_code_verifier=False,
|
||||
)
|
||||
|
||||
flow.fetch_token(code=authorization_code)
|
||||
credentials = flow.credentials
|
||||
|
||||
# State consumed successfully — clean up
|
||||
try:
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('DELETE FROM gsc_oauth_states WHERE state = ?', (state,))
|
||||
conn.commit()
|
||||
except Exception as cleanup_err:
|
||||
logger.warning(f"Failed to clean up OAuth state: {cleanup_err}")
|
||||
|
||||
# Save credentials
|
||||
return self.save_user_credentials(user_id, credentials)
|
||||
|
||||
|
||||
@@ -343,18 +343,28 @@ class HallucinationDetector:
|
||||
logger.error(f"Error in batch evidence search: {str(e)}")
|
||||
return []
|
||||
|
||||
def _map_source_refs_from_reasoning(self, reasoning: str, sources: List[Dict[str, Any]]) -> List[int]:
|
||||
"""Parse 'Source N' references from reasoning text and return 0-based indices."""
|
||||
import re
|
||||
indices = set()
|
||||
for match in re.finditer(r'Source\s+(\d+)', reasoning):
|
||||
ref = int(match.group(1))
|
||||
if 1 <= ref <= len(sources):
|
||||
indices.add(ref - 1) # convert 1-based → 0-based
|
||||
return sorted(indices)
|
||||
|
||||
async def _assess_claims_batch(self, claims: List[str], sources: List[Dict[str, Any]], user_id: str = None) -> List[Claim]:
|
||||
"""Assess multiple claims against sources in one LLM call."""
|
||||
try:
|
||||
claims_to_assess = claims[:3]
|
||||
|
||||
combined_sources = "\n\n".join([
|
||||
f"Source {i+1}: {src.get('url','')}\nText: {src.get('text','')[:1000]}"
|
||||
f"Source [{i}]: {src.get('url','')}\nText: {src.get('text','')[:1000]}"
|
||||
for i, src in enumerate(sources)
|
||||
])
|
||||
|
||||
claims_text = "\n".join([
|
||||
f"Claim {i+1}: {claim}"
|
||||
f"Claim {i}: {claim}"
|
||||
for i, claim in enumerate(claims_to_assess)
|
||||
])
|
||||
|
||||
@@ -367,12 +377,14 @@ class HallucinationDetector:
|
||||
' "claim_index": 0,\n'
|
||||
' "assessment": "supported" or "refuted" or "insufficient_information",\n'
|
||||
' "confidence": number between 0.0 and 1.0,\n'
|
||||
' "supporting_sources": [array of source indices that support the claim],\n'
|
||||
' "refuting_sources": [array of source indices that refute the claim],\n'
|
||||
' "supporting_sources": [array of 0-based source indices, e.g. [0, 2] for Source [0] and Source [2]],\n'
|
||||
' "refuting_sources": [array of 0-based source indices, e.g. [1] for Source [1]],\n'
|
||||
' "reasoning": "brief explanation of your assessment"\n'
|
||||
' }\n'
|
||||
' ]\n'
|
||||
"}\n\n"
|
||||
"IMPORTANT: Source indices are 0-based. Source [0] is the first source, Source [1] is the second, etc.\n"
|
||||
"For every 'supported' or 'refuted' claim you MUST include the relevant source indices.\n\n"
|
||||
f"Claims to verify:\n{claims_text}\n\n"
|
||||
f"Sources:\n{combined_sources}\n\n"
|
||||
"Return only the JSON object:"
|
||||
@@ -407,6 +419,15 @@ class HallucinationDetector:
|
||||
if isinstance(idx, int) and 0 <= idx < len(sources):
|
||||
refuting_sources.append(sources[idx])
|
||||
|
||||
# Fallback: parse "Source N" from reasoning text when LLM omits indices
|
||||
if not supporting_sources and not refuting_sources and sources and assessment.get('reasoning'):
|
||||
ref_indices = self._map_source_refs_from_reasoning(assessment.get('reasoning', ''), sources)
|
||||
if ref_indices:
|
||||
if assessment.get('assessment') == 'supported':
|
||||
supporting_sources = [sources[i] for i in ref_indices]
|
||||
elif assessment.get('assessment') == 'refuted':
|
||||
refuting_sources = [sources[i] for i in ref_indices]
|
||||
|
||||
verified_claims.append(Claim(
|
||||
text=claim,
|
||||
confidence=float(assessment.get('confidence', 0.5)),
|
||||
@@ -464,7 +485,7 @@ class HallucinationDetector:
|
||||
"""Assess whether sources support or refute the claim using LLM."""
|
||||
try:
|
||||
combined_sources = "\n\n".join([
|
||||
f"Source {i+1}: {src.get('url','')}\nText: {src.get('text','')[:2000]}"
|
||||
f"Source [{i}]: {src.get('url','')}\nText: {src.get('text','')[:2000]}"
|
||||
for i, src in enumerate(sources)
|
||||
])
|
||||
|
||||
@@ -474,10 +495,12 @@ class HallucinationDetector:
|
||||
"{\n"
|
||||
' "assessment": "supported" or "refuted" or "insufficient_information",\n'
|
||||
' "confidence": number between 0.0 and 1.0,\n'
|
||||
' "supporting_sources": [array of source indices that support the claim],\n'
|
||||
' "refuting_sources": [array of source indices that refute the claim],\n'
|
||||
' "supporting_sources": [array of 0-based source indices, e.g. [0, 2] for Source [0] and Source [2]],\n'
|
||||
' "refuting_sources": [array of 0-based source indices, e.g. [1] for Source [1]],\n'
|
||||
' "reasoning": "brief explanation of your assessment"\n'
|
||||
"}\n\n"
|
||||
"IMPORTANT: Source indices are 0-based. Source [0] is the first source, Source [1] is the second, etc.\n"
|
||||
"For 'supported' or 'refuted' you MUST include the relevant source indices.\n\n"
|
||||
f"Claim to verify: {claim}\n\n"
|
||||
f"Sources:\n{combined_sources}\n\n"
|
||||
"Return only the JSON object:"
|
||||
@@ -508,6 +531,15 @@ class HallucinationDetector:
|
||||
if isinstance(idx, int) and 0 <= idx < len(sources):
|
||||
refuting_sources.append(sources[idx])
|
||||
|
||||
# Fallback: parse "Source N" from reasoning text when LLM omits indices
|
||||
if not supporting_sources and not refuting_sources and sources and result.get('reasoning'):
|
||||
ref_indices = self._map_source_refs_from_reasoning(result.get('reasoning', ''), sources)
|
||||
if ref_indices:
|
||||
if result.get('assessment') == 'supported':
|
||||
supporting_sources = [sources[i] for i in ref_indices]
|
||||
elif result.get('assessment') == 'refuted':
|
||||
refuting_sources = [sources[i] for i in ref_indices]
|
||||
|
||||
# Validate assessment value
|
||||
valid_assessments = ['supported', 'refuted', 'insufficient_information']
|
||||
if result['assessment'] not in valid_assessments:
|
||||
|
||||
@@ -46,6 +46,7 @@ def llm_text_gen(
|
||||
preferred_provider: Optional[str] = None,
|
||||
flow_type: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
temperature: Optional[float] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Generate text using Language Model (LLM) based on the provided prompt.
|
||||
@@ -58,6 +59,8 @@ def llm_text_gen(
|
||||
preferred_hf_models (list, optional): Preferred HuggingFace models.
|
||||
preferred_provider (str, optional): Preferred provider (google, huggingface).
|
||||
flow_type (str, optional): Flow type for logging (e.g., 'sif_agent', 'premium_tool').
|
||||
max_tokens (int, optional): Max tokens for response. If None, provider default is used.
|
||||
temperature (float, optional): Temperature for generation (0.0-1.0). If None, defaults to 0.7.
|
||||
|
||||
Returns:
|
||||
str: Generated text based on the prompt.
|
||||
@@ -75,9 +78,8 @@ def llm_text_gen(
|
||||
# Set default values for LLM parameters
|
||||
gpt_provider = "google" # Default to Google Gemini
|
||||
model = "gemini-2.0-flash-001"
|
||||
temperature = 0.7
|
||||
if max_tokens is None:
|
||||
max_tokens = 4000
|
||||
if temperature is None:
|
||||
temperature = 0.7
|
||||
top_p = 0.9
|
||||
n = 1
|
||||
fp = 16
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
import re
|
||||
import asyncio
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
from dataclasses import dataclass
|
||||
from loguru import logger
|
||||
import random
|
||||
@@ -17,42 +18,33 @@ class WritingSuggestion:
|
||||
|
||||
class WritingAssistantService:
|
||||
"""
|
||||
Minimal writing assistant that combines Exa search with Gemini continuation.
|
||||
- Exa provides relevant sources with content snippets
|
||||
- Gemini generates a short, cited continuation based on current text and sources
|
||||
Writing assistant that combines Exa search with LLM continuation.
|
||||
- Searches relevant sources using the content near the cursor position
|
||||
- Generates a short continuation grounded in sources
|
||||
- Confidence derived from source availability and quality
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
# COST CONTROL: Daily usage limits
|
||||
self.daily_api_calls = 0
|
||||
self.daily_limit = 50 # Max 50 API calls per day (~$2.50 max cost)
|
||||
self.daily_limit = 50
|
||||
self.last_reset_date = None
|
||||
|
||||
def _get_cached_suggestion(self, text: str) -> WritingSuggestion | None:
|
||||
"""No cached suggestions - always use real API calls for authentic results."""
|
||||
return None
|
||||
|
||||
def _check_daily_limit(self) -> bool:
|
||||
"""Check if we're within daily API usage limits."""
|
||||
import datetime
|
||||
|
||||
today = datetime.date.today()
|
||||
|
||||
# Reset counter if it's a new day
|
||||
if self.last_reset_date != today:
|
||||
self.daily_api_calls = 0
|
||||
self.last_reset_date = today
|
||||
|
||||
# Check if we've exceeded the limit
|
||||
if self.daily_api_calls >= self.daily_limit:
|
||||
return False
|
||||
|
||||
# Increment counter for this API call
|
||||
self.daily_api_calls += 1
|
||||
logger.info(f"Writing assistant API call #{self.daily_api_calls}/{self.daily_limit} today")
|
||||
return True
|
||||
|
||||
async def suggest(self, text: str, user_id: str | None = None) -> List[WritingSuggestion]:
|
||||
async def suggest(self, text: str, user_id: str | None = None, cursor_position: Optional[int] = None) -> List[WritingSuggestion]:
|
||||
if not text or len(text.strip()) < 6:
|
||||
return []
|
||||
|
||||
@@ -67,26 +59,41 @@ class WritingAssistantService:
|
||||
if len(text.strip()) < 50:
|
||||
return []
|
||||
|
||||
# 1) Find relevant sources via Exa
|
||||
sources = await self._search_sources(text, user_id=user_id)
|
||||
# Use text before cursor for context (where the user is actively writing)
|
||||
if cursor_position is not None and 0 < cursor_position <= len(text):
|
||||
context_text = text[:cursor_position]
|
||||
else:
|
||||
context_text = text
|
||||
|
||||
# 2) Generate continuation suggestion via LLM grounded in sources
|
||||
suggestion_text, confidence = await self._generate_continuation(text, sources, user_id=user_id)
|
||||
# 1) Find relevant sources via Exa (non-fatal)
|
||||
sources = []
|
||||
try:
|
||||
sources = await self._search_sources(context_text, user_id=user_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"WritingAssistant Exa search failed, proceeding without sources: {e}")
|
||||
|
||||
# 2) Generate continuation suggestion via LLM
|
||||
suggestion_text, confidence = await self._generate_continuation(context_text, sources, user_id=user_id)
|
||||
|
||||
if not suggestion_text:
|
||||
return []
|
||||
|
||||
return [WritingSuggestion(text=suggestion_text.strip(), confidence=confidence, sources=sources)]
|
||||
|
||||
async def _search_sources(self, text: str, user_id: str = None) -> List[Dict[str, Any]]:
|
||||
"""Search for relevant sources using ExaResearchProvider with subscription checks."""
|
||||
async def _search_sources(self, context_text: str, user_id: str = None) -> List[Dict[str, Any]]:
|
||||
"""Search Exa using the last sentence before cursor for a focused query."""
|
||||
try:
|
||||
from services.blog_writer.research.exa_provider import ExaResearchProvider
|
||||
|
||||
exa_query = (
|
||||
(text[-1000:] if len(text) > 1000 else text)
|
||||
+ "\n\nIf you found the above interesting, here's another useful resource to read:"
|
||||
)
|
||||
# Extract the last sentence from context to use as a focused search query
|
||||
sentences = re.split(r'(?<=[.!?])\s+', context_text.strip())
|
||||
last_sentence = sentences[-1].strip().strip('"').strip("'") if sentences else context_text
|
||||
|
||||
# If very short, use last two sentences
|
||||
if len(last_sentence) < 20 and len(sentences) >= 2:
|
||||
last_sentence = ' '.join(s[-2:]).strip().strip('"').strip("'")
|
||||
|
||||
exa_query = last_sentence[:500] if len(last_sentence) > 500 else last_sentence
|
||||
|
||||
provider = ExaResearchProvider()
|
||||
sources = await provider.simple_search(
|
||||
@@ -95,7 +102,6 @@ class WritingAssistantService:
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# Normalize keys to match expected format
|
||||
normalized = []
|
||||
for s in sources:
|
||||
normalized.append({
|
||||
@@ -104,7 +110,7 @@ class WritingAssistantService:
|
||||
"text": s.get("text", ""),
|
||||
"author": s.get("author", ""),
|
||||
"published_date": s.get("publishedDate", ""),
|
||||
"score": float(s.get("score", 0.5)),
|
||||
"score": float(s.get("score") if s.get("score") is not None else 0.5),
|
||||
})
|
||||
|
||||
if not normalized:
|
||||
@@ -151,8 +157,21 @@ class WritingAssistantService:
|
||||
suggestion = (str(ai_resp or "")).strip()
|
||||
if not suggestion:
|
||||
raise Exception("Assistive writer returned empty suggestion")
|
||||
confidence = 0.7
|
||||
return suggestion, confidence
|
||||
|
||||
# Dynamic confidence based on source quality and response signals
|
||||
confidence = 0.5
|
||||
if sources:
|
||||
# More sources and higher scores = more confident
|
||||
avg_score = sum(s.get("score", 0.5) for s in sources) / len(sources)
|
||||
confidence = 0.5 + (len(sources) / 6.0) * 0.3 + avg_score * 0.2
|
||||
if suggestion.endswith(('.', '!', '?')):
|
||||
confidence += 0.05
|
||||
# Check if citation hint was included
|
||||
if '[http' in suggestion or '((' in suggestion:
|
||||
confidence += 0.05
|
||||
confidence = min(confidence, 1.0)
|
||||
|
||||
return suggestion, round(confidence, 2)
|
||||
except Exception as e:
|
||||
logger.error(f"WritingAssistant _generate_continuation error: {e}")
|
||||
raise
|
||||
|
||||
Reference in New Issue
Block a user