fix: add metadata-based Stripe customer lookup in verify-checkout for reliable post-subscription plan detection (#538)

This commit is contained in:
ajaysi
2026-05-26 15:25:05 +05:30
parent 6331671c6a
commit 96fa469fe8
71 changed files with 3366 additions and 17182 deletions

View File

@@ -170,7 +170,7 @@ async def verify_checkout_status(
stripe_customer_id = subscription.stripe_customer_id if subscription else None
# If no stripe_customer_id in DB, try to find it by email
# If no stripe_customer_id in DB, try to find it by email or metadata
if not stripe_customer_id:
try:
import stripe
@@ -188,8 +188,22 @@ async def verify_checkout_status(
db.commit()
else:
logger.info(f"Verify-checkout: No local subscription record for user {user_id}, will query Stripe directly")
except Exception as email_err:
logger.warning(f"Failed to find Stripe customer by email: {email_err}")
# Fallback: search by metadata user_id (handles email mismatches)
if not stripe_customer_id:
customers = stripe.Customer.search(
query=f"metadata['user_id']:'{user_id}'",
limit=1
)
if customers and customers.data:
stripe_customer_id = customers.data[0].id
logger.info(f"Verify-checkout: Found Stripe customer by metadata user_id for user {user_id}")
if subscription:
subscription.stripe_customer_id = stripe_customer_id
db.commit()
except Exception as lookup_err:
logger.warning(f"Failed to find Stripe customer by email or metadata: {lookup_err}")
# If user has a Stripe customer ID, query Stripe directly
if stripe_customer_id:
@@ -250,6 +264,57 @@ async def verify_checkout_status(
except Exception as stripe_err:
logger.warning(f"Failed to query Stripe directly for user {user_id}: {stripe_err}")
# Fallback: search Stripe subscriptions by metadata user_id (handles cases where
# customer was created without metadata or email doesn't match)
if not stripe_customer_id or not subscription:
try:
import stripe
meta_subs = stripe.Subscription.search(
query=f"status:'active' AND metadata['user_id']:'{user_id}'",
limit=1
)
if meta_subs and meta_subs.data:
stripe_sub = meta_subs.data[0]
stripe_customer_id = stripe_sub.customer
price_id = stripe_sub['items']['data'][0]['price']['id']
logger.info(f"Verify-checkout: Found subscription by metadata user_id for user {user_id}")
stripe_service._update_user_subscription(
user_id,
stripe_customer_id=stripe_customer_id,
stripe_subscription_id=stripe_sub.id,
status="active",
price_id=price_id
)
try:
PricingService.clear_user_cache(user_id)
except Exception:
pass
db.expire_all()
subscription = db.query(UserSubscription).filter(
UserSubscription.user_id == user_id,
UserSubscription.is_active == True
).first()
if subscription:
return {
"success": True,
"data": {
"active": True,
"plan": subscription.plan.tier.value,
"tier": subscription.plan.tier.value,
"can_use_api": True,
"limits": format_plan_limits(subscription.plan),
"source": "stripe_direct_metadata"
}
}
except Exception as meta_err:
logger.warning(f"Failed to find subscription by metadata for user {user_id}: {meta_err}")
# Fallback to local DB status
if subscription and subscription.is_active:
from services.subscription.pricing_service import PricingService

View File

@@ -31,6 +31,7 @@ from services.seo_tools.technical_seo_service import TechnicalSEOService
from services.seo_tools.enterprise_seo_service import EnterpriseSEOService
from services.seo_tools.gsc_analyzer_service import GSCAnalyzerService
from services.seo_tools.content_strategy_service import ContentStrategyService
from services.seo_tools.llm_insights_service import LLMInsightsService
from services.database import get_session_for_user
from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService
from middleware.logging_middleware import log_api_call, save_to_file
@@ -151,6 +152,53 @@ class ContentOpportunitiesRequest(BaseModel):
min_impressions: int = Field(default=100, ge=10, description="Minimum impressions threshold")
date_range_days: int = Field(default=90, ge=7, le=365, description="Number of days to analyze")
# ==================== LLM INSIGHTS REQUEST MODELS ====================
class EnterpriseAuditInsightsRequest(BaseModel):
"""Request model for AI insights from enterprise audit"""
audit_results: Dict[str, Any] = Field(..., description="Complete audit results")
website_url: str = Field(..., description="Website being audited")
target_keywords: Optional[List[str]] = Field(None, description="Target keywords")
class GSCAnalysisInsightsRequest(BaseModel):
"""Request model for AI insights from GSC analysis"""
gsc_analysis: Dict[str, Any] = Field(..., description="Complete GSC analysis data")
website_url: str = Field(..., description="Website being analyzed")
class ContentStrategyRequest(BaseModel):
"""Request model for content strategy generation"""
current_content: Dict[str, Any] = Field(..., description="Current content analysis")
content_gaps: List[str] = Field(..., description="Identified content gaps")
target_keywords: List[str] = Field(..., description="Target keywords")
competitor_content: Optional[Dict[str, Any]] = Field(None, description="Competitor content analysis")
class TrafficRoadmapRequest(BaseModel):
"""Request model for traffic improvement roadmap"""
current_metrics: Dict[str, Any] = Field(..., description="Current traffic metrics")
identified_opportunities: List[Dict[str, Any]] = Field(..., description="Improvement opportunities")
implementation_timeline_weeks: int = Field(default=12, ge=4, le=52, description="Implementation timeline")
class CompetitiveInsightsRequest(BaseModel):
"""Request model for competitive insights generation"""
primary_site_analysis: Dict[str, Any] = Field(..., description="Primary site analysis")
competitor_analyses: List[Dict[str, Any]] = Field(..., description="Competitor analyses")
class PrioritizedRecommendationsRequest(BaseModel):
"""Request model for prioritized recommendations"""
all_recommendations: List[Dict[str, Any]] = Field(..., description="All recommendations to prioritize")
business_context: Dict[str, Any] = Field(..., description="Business goals and constraints")
class QuickWinsRequest(BaseModel):
"""Request model for quick wins identification"""
audit_data: Dict[str, Any] = Field(..., description="Complete audit data")
max_days_to_implement: int = Field(default=7, ge=1, le=30, description="Maximum days to implement")
class KeywordExpansionRequest(BaseModel):
"""Request model for keyword expansion"""
current_keywords: List[str] = Field(..., description="Current target keywords")
content_analysis: Dict[str, Any] = Field(..., description="Content analysis data")
target_difficulty: Optional[str] = Field(None, description="Target difficulty (low/medium/high)")
# Exception Handler
async def handle_seo_tool_exception(func_name: str, error: Exception, request_data: Dict) -> ErrorResponse:
"""Handle exceptions from SEO tools with intelligent logging"""
@@ -1081,3 +1129,466 @@ async def check_enterprise_services_health() -> BaseResponse:
message="Enterprise health check failed",
data={"error": str(e)}
)
# ==================== LLM INSIGHTS ENDPOINTS (Phase 2A.2) ====================
@router.post("/llm/generate-audit-insights", response_model=BaseResponse)
@log_api_call
async def generate_audit_insights(
request: EnterpriseAuditInsightsRequest,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user)
) -> Union[BaseResponse, ErrorResponse]:
"""
Generate AI-powered insights from enterprise SEO audit results.
Analyzes audit findings and produces strategic, actionable insights with:
- Priority scoring (1-10 scale)
- Traffic impact projections
- Implementation difficulty assessments
- Step-by-step action guides
- Required tools and resources
"""
start_time = datetime.utcnow()
try:
logger.info(f"Generating audit insights for {request.website_url}")
llm_service = LLMInsightsService()
insights = await llm_service.generate_enterprise_audit_insights(
audit_results=request.audit_results,
website_url=request.website_url,
target_keywords=request.target_keywords
)
execution_time = (datetime.utcnow() - start_time).total_seconds()
# Log successful operation
log_data = {
"operation": "audit_insights_generation",
"website_url": request.website_url,
"insights_generated": len(insights.get('insights', [])),
"execution_time": execution_time,
"success": True
}
background_tasks.add_task(save_to_file, f"{LOG_DIR}/llm_operations.jsonl", log_data)
return BaseResponse(
success=True,
message="Audit insights generated successfully",
execution_time=execution_time,
data=insights
)
except Exception as e:
logger.error(f"Audit insights generation failed: {str(e)}", exc_info=True)
return await handle_seo_tool_exception("generate_audit_insights", e, {"website_url": request.website_url})
@router.post("/llm/generate-gsc-insights", response_model=BaseResponse)
@log_api_call
async def generate_gsc_insights(
request: GSCAnalysisInsightsRequest,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user)
) -> Union[BaseResponse, ErrorResponse]:
"""
Generate strategic insights from GSC search performance analysis.
Produces targeted, actionable insights including:
- Keyword optimization opportunities
- Content ranking improvement strategies
- CTR enhancement tactics
- Competitive positioning analysis
- Quick-win identification
"""
start_time = datetime.utcnow()
try:
logger.info(f"Generating GSC insights for {request.website_url}")
llm_service = LLMInsightsService()
insights = await llm_service.generate_gsc_analysis_insights(
gsc_analysis=request.gsc_analysis,
website_url=request.website_url
)
execution_time = (datetime.utcnow() - start_time).total_seconds()
log_data = {
"operation": "gsc_insights_generation",
"website_url": request.website_url,
"insights_generated": len(insights.get('insights', [])),
"execution_time": execution_time,
"success": True
}
background_tasks.add_task(save_to_file, f"{LOG_DIR}/llm_operations.jsonl", log_data)
return BaseResponse(
success=True,
message="GSC insights generated successfully",
execution_time=execution_time,
data=insights
)
except Exception as e:
logger.error(f"GSC insights generation failed: {str(e)}", exc_info=True)
return await handle_seo_tool_exception("generate_gsc_insights", e, {"website_url": request.website_url})
@router.post("/llm/generate-content-strategy", response_model=BaseResponse)
@log_api_call
async def generate_content_strategy(
request: ContentStrategyRequest,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user)
) -> Union[BaseResponse, ErrorResponse]:
"""
Generate comprehensive content strategy with AI recommendations.
Creates detailed strategy including:
- Content gap analysis and solutions
- Content calendar recommendations
- Keyword-to-content mapping
- Competitive content benchmarking
- Topic cluster suggestions
- Publishing frequency recommendations
"""
start_time = datetime.utcnow()
try:
logger.info(f"Generating content strategy ({len(request.content_gaps)} gaps)")
llm_service = LLMInsightsService()
strategy = await llm_service.generate_content_strategy_insights(
current_content=request.current_content,
content_gaps=request.content_gaps,
target_keywords=request.target_keywords,
competitor_content=request.competitor_content
)
execution_time = (datetime.utcnow() - start_time).total_seconds()
log_data = {
"operation": "content_strategy_generation",
"gaps_addressed": len(request.content_gaps),
"keywords_analyzed": len(request.target_keywords),
"execution_time": execution_time,
"success": True
}
background_tasks.add_task(save_to_file, f"{LOG_DIR}/llm_operations.jsonl", log_data)
return BaseResponse(
success=True,
message="Content strategy generated successfully",
execution_time=execution_time,
data=strategy
)
except Exception as e:
logger.error(f"Content strategy generation failed: {str(e)}", exc_info=True)
return await handle_seo_tool_exception("generate_content_strategy", e, {"gaps_count": len(request.content_gaps)})
@router.post("/llm/generate-traffic-roadmap", response_model=BaseResponse)
@log_api_call
async def generate_traffic_roadmap(
request: TrafficRoadmapRequest,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user)
) -> Union[BaseResponse, ErrorResponse]:
"""
Generate phased traffic improvement roadmap with projections.
Produces detailed roadmap with:
- Phased implementation plan (Week 1, 2, 3+)
- Traffic gain projections per phase
- Priority-ordered action items
- Resource requirements per phase
- Key performance indicators (KPIs)
- Success metrics and validation points
- Risk mitigation strategies
"""
start_time = datetime.utcnow()
try:
logger.info(f"Generating traffic roadmap ({request.implementation_timeline_weeks} weeks)")
llm_service = LLMInsightsService()
roadmap = await llm_service.generate_traffic_improvement_roadmap(
current_metrics=request.current_metrics,
identified_opportunities=request.identified_opportunities,
implementation_timeline_weeks=request.implementation_timeline_weeks
)
execution_time = (datetime.utcnow() - start_time).total_seconds()
log_data = {
"operation": "traffic_roadmap_generation",
"timeline_weeks": request.implementation_timeline_weeks,
"opportunities_count": len(request.identified_opportunities),
"execution_time": execution_time,
"success": True
}
background_tasks.add_task(save_to_file, f"{LOG_DIR}/llm_operations.jsonl", log_data)
return BaseResponse(
success=True,
message="Traffic roadmap generated successfully",
execution_time=execution_time,
data=roadmap
)
except Exception as e:
logger.error(f"Traffic roadmap generation failed: {str(e)}", exc_info=True)
return await handle_seo_tool_exception("generate_traffic_roadmap", e,
{"opportunities_count": len(request.identified_opportunities)})
@router.post("/llm/generate-competitive-insights", response_model=BaseResponse)
@log_api_call
async def generate_competitive_insights(
request: CompetitiveInsightsRequest,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user)
) -> Union[BaseResponse, ErrorResponse]:
"""
Generate competitive positioning and intelligence insights.
Analyzes competitive landscape and provides:
- Competitive advantage identification
- Competitive gap analysis
- Market opportunity identification
- Threat assessment
- Win strategy recommendations
- Differentiation recommendations
- Market position recommendations
"""
start_time = datetime.utcnow()
try:
logger.info(f"Generating competitive insights ({len(request.competitor_analyses)} competitors)")
llm_service = LLMInsightsService()
insights = await llm_service.generate_competitive_insights(
primary_site_analysis=request.primary_site_analysis,
competitor_analyses=request.competitor_analyses
)
execution_time = (datetime.utcnow() - start_time).total_seconds()
log_data = {
"operation": "competitive_insights_generation",
"competitors_analyzed": len(request.competitor_analyses),
"execution_time": execution_time,
"success": True
}
background_tasks.add_task(save_to_file, f"{LOG_DIR}/llm_operations.jsonl", log_data)
return BaseResponse(
success=True,
message="Competitive insights generated successfully",
execution_time=execution_time,
data=insights
)
except Exception as e:
logger.error(f"Competitive insights generation failed: {str(e)}", exc_info=True)
return await handle_seo_tool_exception("generate_competitive_insights", e,
{"competitors_count": len(request.competitor_analyses)})
@router.post("/llm/prioritized-recommendations", response_model=BaseResponse)
@log_api_call
async def get_prioritized_recommendations(
request: PrioritizedRecommendationsRequest,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user)
) -> Union[BaseResponse, ErrorResponse]:
"""
Get AI-prioritized recommendations ranked by business impact.
Scores and prioritizes recommendations by:
- Traffic impact potential
- Implementation effort required
- Resource requirements
- Timeline to implementation
- Business alignment
- Risk level
- ROI potential
Returns categorized as: Quick Wins | High Impact | Long-term
"""
start_time = datetime.utcnow()
try:
logger.info(f"Prioritizing {len(request.all_recommendations)} recommendations")
llm_service = LLMInsightsService()
prioritized = await llm_service.generate_prioritized_recommendations(
all_recommendations=request.all_recommendations,
business_context=request.business_context
)
execution_time = (datetime.utcnow() - start_time).total_seconds()
log_data = {
"operation": "prioritized_recommendations_generation",
"total_recommendations": len(request.all_recommendations),
"quick_wins": len(prioritized.get('quick_wins', [])),
"execution_time": execution_time,
"success": True
}
background_tasks.add_task(save_to_file, f"{LOG_DIR}/llm_operations.jsonl", log_data)
return BaseResponse(
success=True,
message="Recommendations prioritized successfully",
execution_time=execution_time,
data=prioritized
)
except Exception as e:
logger.error(f"Recommendation prioritization failed: {str(e)}", exc_info=True)
return await handle_seo_tool_exception("get_prioritized_recommendations", e,
{"recommendations_count": len(request.all_recommendations)})
@router.post("/llm/quick-wins", response_model=BaseResponse)
@log_api_call
async def identify_quick_wins(
request: QuickWinsRequest,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user)
) -> Union[BaseResponse, ErrorResponse]:
"""
Identify quick wins - high-impact actions implementable in short timeframe.
Finds high-ROI quick wins including:
- Meta tag optimization opportunities
- URL structure improvements
- On-page optimization quick fixes
- Internal linking recommendations
- Content formatting improvements
- Technical SEO quick fixes
- Performance optimization opportunities
Each with: estimated traffic gain, implementation time, tools needed, expected outcomes
"""
start_time = datetime.utcnow()
try:
logger.info(f"Identifying quick wins (max {request.max_days_to_implement} days)")
llm_service = LLMInsightsService()
quick_wins = await llm_service.generate_quick_wins(
audit_data=request.audit_data,
max_days_to_implement=request.max_days_to_implement
)
execution_time = (datetime.utcnow() - start_time).total_seconds()
log_data = {
"operation": "quick_wins_identification",
"max_days": request.max_days_to_implement,
"quick_wins_found": len(quick_wins.get('quick_wins', [])),
"total_potential_traffic": quick_wins.get('total_potential_traffic', 0),
"execution_time": execution_time,
"success": True
}
background_tasks.add_task(save_to_file, f"{LOG_DIR}/llm_operations.jsonl", log_data)
return BaseResponse(
success=True,
message="Quick wins identified successfully",
execution_time=execution_time,
data=quick_wins
)
except Exception as e:
logger.error(f"Quick wins identification failed: {str(e)}", exc_info=True)
return await handle_seo_tool_exception("identify_quick_wins", e,
{"max_days": request.max_days_to_implement})
@router.post("/llm/keyword-expansion", response_model=BaseResponse)
@log_api_call
async def expand_keywords(
request: KeywordExpansionRequest,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user)
) -> Union[BaseResponse, ErrorResponse]:
"""
Expand keyword list with AI-generated related and long-tail keywords.
Generates 15-20 additional keywords including:
- Long-tail keyword variations
- Question-based keywords (People Also Ask)
- Local keyword variations
- Intent-based keywords (commercial, informational, navigational)
- Seasonal keyword variants
Each keyword includes: search volume estimate, difficulty score, relevance, content opportunity
"""
start_time = datetime.utcnow()
try:
logger.info(f"Expanding keywords from {len(request.current_keywords)} base keywords")
llm_service = LLMInsightsService()
expansion = await llm_service.generate_keyword_expansion(
current_keywords=request.current_keywords,
content_analysis=request.content_analysis,
target_difficulty=request.target_difficulty
)
execution_time = (datetime.utcnow() - start_time).total_seconds()
log_data = {
"operation": "keyword_expansion",
"original_keywords": len(request.current_keywords),
"expanded_keywords": expansion.get('expanded_keywords', 0),
"execution_time": execution_time,
"success": True
}
background_tasks.add_task(save_to_file, f"{LOG_DIR}/llm_operations.jsonl", log_data)
return BaseResponse(
success=True,
message="Keyword expansion completed successfully",
execution_time=execution_time,
data=expansion
)
except Exception as e:
logger.error(f"Keyword expansion failed: {str(e)}", exc_info=True)
return await handle_seo_tool_exception("expand_keywords", e,
{"keywords_count": len(request.current_keywords)})
@router.get("/llm/health", response_model=BaseResponse)
@log_api_call
async def check_llm_insights_health() -> BaseResponse:
"""Health check for LLM insights service"""
try:
llm_service = LLMInsightsService()
health = await llm_service.health_check()
return BaseResponse(
success=True,
message="LLM insights service is healthy",
data={
"service": health.get('service'),
"version": health.get('version'),
"llm_integration": health.get('llm_integration'),
"timestamp": health.get('last_check')
}
)
except Exception as e:
logger.error(f"LLM insights health check failed: {str(e)}")
return BaseResponse(
success=False,
message="LLM insights service health check failed",
data={"error": str(e)}
)

View File

@@ -421,14 +421,15 @@ class GSCService:
if not start_date:
start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
# Check cache first
# Check cache first (only return cached data with non-empty query rows)
cache_key = f"{user_id}_{site_url}_{start_date}_{end_date}"
cached_data = self._get_cached_data(user_id, site_url, 'analytics', cache_key)
if cached_data and isinstance(cached_data, dict):
has_pages = 'page_data' in cached_data and isinstance(cached_data.get('page_data'), dict)
has_queries = 'query_data' in cached_data and isinstance(cached_data.get('query_data'), dict)
if has_pages and has_queries:
logger.info(f"Returning cached analytics data for user: {user_id} (includes page_data)")
has_query_rows = cached_data.get('query_data', {}).get('rows', [])
if has_pages and has_queries and has_query_rows:
logger.info(f"Returning cached analytics data for user: {user_id} (includes page_data, {len(has_query_rows)} query rows)")
return cached_data
try:
@@ -623,7 +624,11 @@ class GSCService:
'siteUrl': site_url
}
self._cache_data(user_id, site_url, 'analytics', analytics_data, cache_key)
if analytics_data.get('query_data', {}).get('rows'):
self._cache_data(user_id, site_url, 'analytics', analytics_data, cache_key)
logger.info(f"Analytics data cached for user: {user_id}, site: {site_url} ({len(analytics_data.get('query_data', {}).get('rows', []))} query rows)")
else:
logger.info(f"Skipping cache for user: {user_id} — empty query_data rows; next request will retry fresh")
logger.info(f"Retrieved comprehensive analytics data for user: {user_id}, site: {site_url}")
return analytics_data
@@ -655,7 +660,7 @@ class GSCService:
'warning': f'Query-level data unavailable: {str(query_error)}'
}
self._cache_data(user_id, site_url, 'analytics', analytics_data, cache_key)
logger.info(f"Query-level data unavailable for user {user_id}; fallback analytics returned (not cached)")
return analytics_data
except Exception as e:

View File

@@ -0,0 +1,898 @@
"""
LLM-Powered SEO Insights Service for Phase 2A.2
Provides AI-powered insights and recommendations based on enterprise SEO audits
and GSC analysis using Claude/GPT LLM models with advanced prompt engineering.
Features:
- 8 specialized insight generation methods
- Dynamic prompt templates with context awareness
- Priority-scored recommendations
- Traffic improvement strategies
- Implementation guides and phasing
- Competitive intelligence synthesis
- Content gap analysis
- AI-driven traffic projections
"""
from typing import Dict, Any, List, Optional, Tuple
from datetime import datetime
from dataclasses import dataclass, asdict
import asyncio
import json
from loguru import logger
from services.llm_providers.main_text_generation import llm_text_gen
@dataclass
class AIInsight:
"""Data class for AI-generated insights"""
title: str
description: str
priority_score: int # 1-10
estimated_traffic_impact: str
implementation_difficulty: str # easy, moderate, hard
estimated_time_to_implement: str # days, weeks, months
steps: List[str]
tools_required: List[str]
expected_outcomes: List[str]
business_impact: str
class LLMInsightsService:
"""
Service for generating AI-powered SEO insights and recommendations
using LLM models with specialized prompts for different analysis types.
"""
def __init__(self):
"""Initialize the LLM insights service"""
self.service_name = "llm_insights_generator"
self.version = "1.0"
self.model_preference = "claude" # Claude for superior reasoning
logger.info(f"Initialized {self.service_name} v{self.version}")
# ============= AUDIT INSIGHTS =============
async def generate_enterprise_audit_insights(
self,
audit_results: Dict[str, Any],
website_url: str,
target_keywords: Optional[List[str]] = None
) -> Dict[str, Any]:
"""
Generate comprehensive AI insights from complete enterprise audit results.
Args:
audit_results: Full audit data from enterprise_seo_service
website_url: The audited website
target_keywords: Keywords from analysis
Returns:
AI-generated insights with priority scoring
"""
try:
logger.info(f"Generating enterprise audit insights for {website_url}")
# Extract key metrics from audit
overall_score = audit_results.get('overall_score', 0)
component_scores = audit_results.get('component_scores', {})
priority_actions = audit_results.get('priority_actions', [])
# Build context for LLM
context = self._build_audit_context(
website_url, audit_results, target_keywords
)
# Generate insights prompt
prompt = self._build_audit_insights_prompt(context, overall_score, component_scores)
# Call LLM
insights_json = await self._call_llm_for_json(
prompt=prompt,
context_type="enterprise_audit_insights"
)
# Parse and structure insights
insights = self._parse_insights_response(insights_json)
# Add metadata
result = {
'status': 'completed',
'website_url': website_url,
'audit_overall_score': overall_score,
'insights_generated': len(insights),
'insights': insights,
'generated_at': datetime.utcnow().isoformat(),
'summary': self._generate_summary(insights, overall_score)
}
logger.info(f"Generated {len(insights)} insights for {website_url}")
return result
except Exception as e:
logger.error(f"Enterprise audit insights generation failed: {str(e)}", exc_info=True)
raise
# ============= GSC INSIGHTS =============
async def generate_gsc_analysis_insights(
self,
gsc_analysis: Dict[str, Any],
website_url: str
) -> Dict[str, Any]:
"""
Generate strategic insights from GSC analysis with keyword opportunities.
Args:
gsc_analysis: Full GSC analysis data
website_url: Website being analyzed
Returns:
Strategic GSC-specific insights
"""
try:
logger.info(f"Generating GSC analysis insights for {website_url}")
# Extract key GSC metrics
performance_overview = gsc_analysis.get('performance_overview', {})
content_opportunities = gsc_analysis.get('content_opportunities', [])
technical_insights = gsc_analysis.get('technical_insights', {})
# Build GSC context
context = self._build_gsc_context(gsc_analysis, website_url)
# Generate insights prompt
prompt = self._build_gsc_insights_prompt(
context,
len(content_opportunities),
performance_overview
)
# Call LLM
insights_json = await self._call_llm_for_json(
prompt=prompt,
context_type="gsc_analysis_insights"
)
# Parse insights
insights = self._parse_insights_response(insights_json)
result = {
'status': 'completed',
'website_url': website_url,
'total_content_opportunities': len(content_opportunities),
'insights': insights,
'generated_at': datetime.utcnow().isoformat(),
'focus_areas': self._identify_gsc_focus_areas(insights)
}
logger.info(f"Generated {len(insights)} GSC insights")
return result
except Exception as e:
logger.error(f"GSC analysis insights generation failed: {str(e)}", exc_info=True)
raise
# ============= CONTENT STRATEGY =============
async def generate_content_strategy_insights(
self,
current_content: Dict[str, Any],
content_gaps: List[str],
target_keywords: List[str],
competitor_content: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Generate strategic content recommendations based on gaps and keywords.
Args:
current_content: Existing content analysis
content_gaps: Identified content gaps
target_keywords: Target keywords for content
competitor_content: Optional competitor content analysis
Returns:
Content strategy insights with phased plan
"""
try:
logger.info("Generating content strategy insights")
# Build content strategy context
context = self._build_content_strategy_context(
current_content, content_gaps, target_keywords, competitor_content
)
# Generate strategy prompt
prompt = self._build_content_strategy_prompt(context, len(content_gaps))
# Call LLM
strategy_json = await self._call_llm_for_json(
prompt=prompt,
context_type="content_strategy_insights"
)
# Parse strategy insights
insights = self._parse_strategy_response(strategy_json)
result = {
'status': 'completed',
'gaps_addressed': len(content_gaps),
'strategy_insights': insights,
'phased_roadmap': self._create_content_roadmap(insights),
'generated_at': datetime.utcnow().isoformat()
}
return result
except Exception as e:
logger.error(f"Content strategy generation failed: {str(e)}", exc_info=True)
raise
# ============= TRAFFIC ROADMAP =============
async def generate_traffic_improvement_roadmap(
self,
current_metrics: Dict[str, Any],
identified_opportunities: List[Dict[str, Any]],
implementation_timeline_weeks: int = 12
) -> Dict[str, Any]:
"""
Generate phased roadmap for traffic improvement with revenue impact.
Args:
current_metrics: Current traffic/conversion metrics
identified_opportunities: List of improvement opportunities
implementation_timeline_weeks: Timeline for implementation
Returns:
Phased roadmap with traffic projections
"""
try:
logger.info(f"Generating traffic roadmap for {implementation_timeline_weeks} weeks")
# Build roadmap context
context = self._build_roadmap_context(
current_metrics, identified_opportunities, implementation_timeline_weeks
)
# Generate roadmap prompt
prompt = self._build_roadmap_prompt(context)
# Call LLM
roadmap_json = await self._call_llm_for_json(
prompt=prompt,
context_type="traffic_roadmap"
)
# Parse and structure roadmap
phases = self._parse_roadmap_response(roadmap_json)
result = {
'status': 'completed',
'timeline_weeks': implementation_timeline_weeks,
'current_traffic': current_metrics.get('organic_traffic', 0),
'projected_traffic': self._calculate_projected_traffic(phases),
'phases': phases,
'generated_at': datetime.utcnow().isoformat()
}
return result
except Exception as e:
logger.error(f"Traffic roadmap generation failed: {str(e)}", exc_info=True)
raise
# ============= COMPETITIVE INSIGHTS =============
async def generate_competitive_insights(
self,
primary_site_analysis: Dict[str, Any],
competitor_analyses: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""
Generate competitive positioning insights and gap analysis.
Args:
primary_site_analysis: Analysis of primary website
competitor_analyses: List of competitor analyses
Returns:
Competitive positioning insights
"""
try:
logger.info(f"Generating competitive insights vs {len(competitor_analyses)} competitors")
# Build competitive context
context = self._build_competitive_context(
primary_site_analysis, competitor_analyses
)
# Generate competitive prompt
prompt = self._build_competitive_insights_prompt(context)
# Call LLM
competitive_json = await self._call_llm_for_json(
prompt=prompt,
context_type="competitive_insights"
)
# Parse competitive insights
insights = self._parse_competitive_response(competitive_json)
result = {
'status': 'completed',
'competitors_analyzed': len(competitor_analyses),
'competitive_positioning': insights,
'opportunities': self._identify_competitive_opportunities(insights),
'threats': self._identify_competitive_threats(insights),
'generated_at': datetime.utcnow().isoformat()
}
return result
except Exception as e:
logger.error(f"Competitive insights generation failed: {str(e)}", exc_info=True)
raise
# ============= PRIORITIZED RECOMMENDATIONS =============
async def generate_prioritized_recommendations(
self,
all_recommendations: List[Dict[str, Any]],
business_context: Dict[str, Any]
) -> Dict[str, Any]:
"""
Generate AI-prioritized recommendations based on impact and effort.
Args:
all_recommendations: All raw recommendations
business_context: Business goals and constraints
Returns:
Prioritized and scored recommendations
"""
try:
logger.info(f"Prioritizing {len(all_recommendations)} recommendations")
# Build prioritization context
context = self._build_prioritization_context(
all_recommendations, business_context
)
# Generate prioritization prompt
prompt = self._build_prioritization_prompt(context)
# Call LLM
prioritized_json = await self._call_llm_for_json(
prompt=prompt,
context_type="prioritized_recommendations"
)
# Parse prioritized recommendations
recommendations = self._parse_prioritized_response(prioritized_json)
result = {
'status': 'completed',
'total_recommendations': len(recommendations),
'quick_wins': [r for r in recommendations if r.get('priority_score', 0) >= 8],
'high_impact': [r for r in recommendations if 6 <= r.get('priority_score', 0) < 8],
'long_term': [r for r in recommendations if r.get('priority_score', 0) < 6],
'recommendations': recommendations,
'generated_at': datetime.utcnow().isoformat()
}
return result
except Exception as e:
logger.error(f"Recommendation prioritization failed: {str(e)}", exc_info=True)
raise
# ============= QUICK WINS =============
async def generate_quick_wins(
self,
audit_data: Dict[str, Any],
max_days_to_implement: int = 7
) -> Dict[str, Any]:
"""
Identify quick wins - high-impact items implementable in short timeframe.
Args:
audit_data: Complete audit data
max_days_to_implement: Maximum days for "quick win"
Returns:
List of quick wins with implementation guides
"""
try:
logger.info(f"Generating quick wins (max {max_days_to_implement} days)")
# Build quick wins context
context = self._build_quick_wins_context(audit_data, max_days_to_implement)
# Generate quick wins prompt
prompt = self._build_quick_wins_prompt(context)
# Call LLM
quick_wins_json = await self._call_llm_for_json(
prompt=prompt,
context_type="quick_wins"
)
# Parse quick wins
wins = self._parse_quick_wins_response(quick_wins_json)
result = {
'status': 'completed',
'quick_wins_identified': len(wins),
'total_potential_traffic': sum(w.get('estimated_traffic_gain', 0) for w in wins),
'quick_wins': wins,
'implementation_order': self._order_quick_wins(wins),
'generated_at': datetime.utcnow().isoformat()
}
return result
except Exception as e:
logger.error(f"Quick wins generation failed: {str(e)}", exc_info=True)
raise
# ============= KEYWORD EXPANSION =============
async def generate_keyword_expansion(
self,
current_keywords: List[str],
content_analysis: Dict[str, Any],
target_difficulty: Optional[str] = None
) -> Dict[str, Any]:
"""
Generate keyword expansion recommendations with difficulty and volume.
Args:
current_keywords: Current target keywords
content_analysis: Content analysis data
target_difficulty: Preferred difficulty level (low, medium, high)
Returns:
Expanded keyword list with scoring
"""
try:
logger.info(f"Generating keyword expansion from {len(current_keywords)} keywords")
# Build keyword expansion context
context = self._build_keyword_context(
current_keywords, content_analysis, target_difficulty
)
# Generate keyword expansion prompt
prompt = self._build_keyword_expansion_prompt(context)
# Call LLM
keywords_json = await self._call_llm_for_json(
prompt=prompt,
context_type="keyword_expansion"
)
# Parse expanded keywords
expanded = self._parse_keyword_response(keywords_json)
result = {
'status': 'completed',
'original_keywords': len(current_keywords),
'expanded_keywords': len(expanded),
'new_keywords': expanded,
'categorized_by_difficulty': self._categorize_by_difficulty(expanded),
'generated_at': datetime.utcnow().isoformat()
}
return result
except Exception as e:
logger.error(f"Keyword expansion generation failed: {str(e)}", exc_info=True)
raise
# ============= HELPER METHODS =============
async def _call_llm_for_json(
self,
prompt: str,
context_type: str,
max_tokens: int = 2000
) -> str:
"""Call LLM and ensure JSON response"""
try:
# System prompt for JSON generation
system_prompt = """You are an expert SEO strategist and data analyst.
Generate detailed, actionable JSON responses with specific metrics and recommendations.
Ensure all responses are valid JSON that can be parsed."""
# Call LLM with JSON-focused settings
response = llm_text_gen(
prompt=prompt,
system_prompt=system_prompt,
user_id=None,
preferred_provider="claude",
flow_type=f"seo_{context_type}",
max_tokens=max_tokens,
temperature=0.7
)
# Extract JSON if wrapped in text
if isinstance(response, str):
# Try to find JSON in response
import re
json_match = re.search(r'\{[\s\S]*\}', response)
if json_match:
return json_match.group(0)
return response
except Exception as e:
logger.error(f"LLM call failed for {context_type}: {str(e)}")
raise
def _build_audit_context(
self,
website_url: str,
audit_results: Dict[str, Any],
keywords: Optional[List[str]]
) -> str:
"""Build context string for audit insights"""
score = audit_results.get('overall_score', 0)
status = "strong" if score >= 70 else "moderate" if score >= 50 else "needs improvement"
return f"""
Website: {website_url}
Overall Audit Score: {score}/100 ({status})
Target Keywords: {', '.join(keywords) if keywords else 'Not specified'}
Components Analyzed: {list(audit_results.get('component_scores', {}).keys())}
Priority Actions: {len(audit_results.get('priority_actions', []))}
"""
def _build_audit_insights_prompt(
self,
context: str,
overall_score: float,
component_scores: Dict[str, float]
) -> str:
"""Build prompt for audit insights generation"""
return f"""Based on this SEO audit data:
{context}
Component Scores: {json.dumps(component_scores, indent=2)}
Generate 5-7 specific, actionable AI insights in JSON format:
{{
"insights": [
{{
"title": "Insight Title",
"description": "Detailed description of the insight",
"priority_score": 8,
"estimated_traffic_impact": "15-25%",
"implementation_difficulty": "moderate",
"estimated_time_weeks": 2,
"steps": ["Step 1", "Step 2"],
"tools_required": ["Tool1"],
"expected_outcomes": ["Outcome1"]
}}
],
"summary": "Overall summary of insights"
}}"""
def _build_gsc_insights_prompt(
self,
context: str,
opportunities_count: int,
performance: Dict[str, Any]
) -> str:
"""Build prompt for GSC insights"""
return f"""Based on Google Search Console analysis:
{context}
Total Opportunities Identified: {opportunities_count}
Current Performance: {json.dumps(performance, indent=2)}
Generate strategic GSC insights in JSON format focusing on:
1. Quick fixes for high-volume keywords
2. Keywords ready to rank higher
3. Content expansion opportunities
4. Technical SEO issues
Return as JSON with same structure as audit insights."""
def _parse_insights_response(self, response_json: str) -> List[Dict[str, Any]]:
"""Parse LLM response into insights"""
try:
data = json.loads(response_json)
return data.get('insights', [])
except:
logger.warning("Could not parse insights response as JSON")
return []
def _generate_summary(
self,
insights: List[Dict[str, Any]],
overall_score: float
) -> str:
"""Generate summary of insights"""
if not insights:
return "No insights generated"
high_priority = sum(1 for i in insights if i.get('priority_score', 0) >= 8)
return f"{high_priority} high-priority insights identified for score improvement from {overall_score}/100"
def _build_gsc_context(
self,
gsc_analysis: Dict[str, Any],
website_url: str
) -> str:
"""Build GSC context for insights"""
perf = gsc_analysis.get('performance_overview', {})
return f"""
Website: {website_url}
Total Keywords Tracked: {perf.get('total_keywords_tracked', 0)}
Total Pages Indexed: {perf.get('total_pages_indexed', 0)}
Overall CTR: {perf.get('overall_ctr', 0):.2f}%
Average Position: {perf.get('average_position', 0):.1f}
"""
def _identify_gsc_focus_areas(self, insights: List[Dict[str, Any]]) -> List[str]:
"""Identify focus areas from GSC insights"""
focus_areas = set()
for insight in insights:
if "meta" in insight.get('title', '').lower():
focus_areas.add("Meta Tags Optimization")
if "ranking" in insight.get('title', '').lower():
focus_areas.add("Ranking Improvement")
if "content" in insight.get('title', '').lower():
focus_areas.add("Content Expansion")
return list(focus_areas)
def _build_content_strategy_context(
self,
current_content: Dict[str, Any],
content_gaps: List[str],
target_keywords: List[str],
competitor_content: Optional[Dict[str, Any]]
) -> str:
"""Build content strategy context"""
return f"""
Current Content Assets: {current_content.get('total_content', 0)} pieces
Content Gaps Identified: {len(content_gaps)}
Gaps: {', '.join(content_gaps[:5])}
Target Keywords: {', '.join(target_keywords)}
Competitor Content Items: {competitor_content.get('total_items', 0) if competitor_content else 'N/A'}
"""
def _build_content_strategy_prompt(self, context: str, gap_count: int) -> str:
"""Build content strategy prompt"""
return f"""Based on content analysis:
{context}
Create a 3-phase content strategy plan for addressing {gap_count} content gaps.
Return JSON with phases, specific content pieces, keywords per content, and expected traffic impact."""
def _parse_strategy_response(self, response: str) -> List[Dict[str, Any]]:
"""Parse strategy response"""
try:
return json.loads(response).get('strategy_insights', [])
except:
return []
def _create_content_roadmap(self, insights: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Create phased content roadmap"""
return [
{"phase": 1, "items": insights[:len(insights)//3], "timeline": "Weeks 1-4"},
{"phase": 2, "items": insights[len(insights)//3:2*len(insights)//3], "timeline": "Weeks 5-8"},
{"phase": 3, "items": insights[2*len(insights)//3:], "timeline": "Weeks 9-12"}
]
def _build_roadmap_context(
self,
current_metrics: Dict[str, Any],
opportunities: List[Dict[str, Any]],
timeline: int
) -> str:
"""Build roadmap context"""
return f"""
Current Traffic: {current_metrics.get('organic_traffic', 0)} monthly visits
Conversion Rate: {current_metrics.get('conversion_rate', 0):.2f}%
Opportunities Identified: {len(opportunities)}
Implementation Timeline: {timeline} weeks
"""
def _build_roadmap_prompt(self, context: str) -> str:
"""Build roadmap generation prompt"""
return f"""Create a detailed traffic improvement roadmap:
{context}
Generate phases with:
- Specific actions
- Expected traffic gains
- Dependencies
- Resource requirements
- Success metrics
Return as JSON with phase details and projections."""
def _parse_roadmap_response(self, response: str) -> List[Dict[str, Any]]:
"""Parse roadmap response"""
try:
return json.loads(response).get('phases', [])
except:
return []
def _calculate_projected_traffic(self, phases: List[Dict[str, Any]]) -> int:
"""Calculate total projected traffic from phases"""
total = 0
for phase in phases:
if 'projected_traffic_gain' in phase:
total += phase['projected_traffic_gain']
return total
def _build_competitive_context(
self,
primary: Dict[str, Any],
competitors: List[Dict[str, Any]]
) -> str:
"""Build competitive analysis context"""
return f"""
Primary Site Score: {primary.get('score', 0)}/100
Competitors: {len(competitors)}
Average Competitor Score: {sum(c.get('score', 0) for c in competitors) / len(competitors) if competitors else 0:.1f}/100
"""
def _build_competitive_insights_prompt(self, context: str) -> str:
"""Build competitive insights prompt"""
return f"""Analyze competitive positioning:
{context}
Identify:
1. Competitive advantages
2. Competitive gaps
3. Market opportunities
4. Threat areas
Return as JSON with detailed analysis."""
def _parse_competitive_response(self, response: str) -> Dict[str, Any]:
"""Parse competitive response"""
try:
return json.loads(response)
except:
return {}
def _identify_competitive_opportunities(self, insights: Dict[str, Any]) -> List[str]:
"""Extract competitive opportunities"""
return insights.get('opportunities', [])
def _identify_competitive_threats(self, insights: Dict[str, Any]) -> List[str]:
"""Extract competitive threats"""
return insights.get('threats', [])
def _build_prioritization_context(
self,
recommendations: List[Dict[str, Any]],
business: Dict[str, Any]
) -> str:
"""Build prioritization context"""
return f"""
Total Recommendations: {len(recommendations)}
Business Goals: {business.get('goals', [])}
Budget: {business.get('budget', 'Not specified')}
Timeline: {business.get('timeline', 'Not specified')}
"""
def _build_prioritization_prompt(self, context: str) -> str:
"""Build prioritization prompt"""
return f"""Prioritize recommendations by impact and effort:
{context}
Score each 1-10 on:
- Impact
- Effort required
- Timeline
- Business alignment
Return JSON with prioritized list and scoring."""
def _parse_prioritized_response(self, response: str) -> List[Dict[str, Any]]:
"""Parse prioritized recommendations"""
try:
return json.loads(response).get('recommendations', [])
except:
return []
def _build_quick_wins_context(
self,
audit_data: Dict[str, Any],
max_days: int
) -> str:
"""Build quick wins context"""
return f"""
Maximum Days to Implement: {max_days}
Focus on:
- High traffic potential
- Low effort
- Clear ROI
- Quick implementation
"""
def _build_quick_wins_prompt(self, context: str) -> str:
"""Build quick wins prompt"""
return f"""Identify quick wins from audit:
{context}
Return JSON with wins ranked by (impact × effort) score."""
def _parse_quick_wins_response(self, response: str) -> List[Dict[str, Any]]:
"""Parse quick wins response"""
try:
return json.loads(response).get('quick_wins', [])
except:
return []
def _order_quick_wins(self, wins: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Order quick wins by priority"""
return sorted(wins, key=lambda x: x.get('priority_score', 0), reverse=True)
def _build_keyword_context(
self,
keywords: List[str],
content_analysis: Dict[str, Any],
target_difficulty: Optional[str]
) -> str:
"""Build keyword expansion context"""
return f"""
Current Keywords: {', '.join(keywords)}
Content Quality Score: {content_analysis.get('quality_score', 0)}/100
Target Difficulty: {target_difficulty or 'Mixed'}
"""
def _build_keyword_expansion_prompt(self, context: str) -> str:
"""Build keyword expansion prompt"""
return f"""Expand keyword list based on:
{context}
Suggest 15-20 related keywords with:
- Difficulty estimate
- Volume estimate
- Relevance to current keywords
- Content opportunity
Return as JSON."""
def _parse_keyword_response(self, response: str) -> List[Dict[str, Any]]:
"""Parse keyword response"""
try:
return json.loads(response).get('keywords', [])
except:
return []
def _categorize_by_difficulty(self, keywords: List[Dict[str, Any]]) -> Dict[str, List[str]]:
"""Categorize keywords by difficulty"""
return {
'easy': [k.get('keyword', '') for k in keywords if k.get('difficulty', 'medium') == 'low'],
'medium': [k.get('keyword', '') for k in keywords if k.get('difficulty', 'medium') == 'medium'],
'hard': [k.get('keyword', '') for k in keywords if k.get('difficulty', 'medium') == 'high']
}
async def health_check(self) -> Dict[str, Any]:
"""Health check for LLM insights service"""
return {
'status': 'operational',
'service': self.service_name,
'version': self.version,
'llm_integration': 'available',
'last_check': datetime.utcnow().isoformat()
}