feat: image generation overhaul (model-aware text, dim clamping, \.30 pricing), event-driven dashboard cache invalidation, SEO insights (AI visibility, GSC, keyword gap), YouTube OAuth/publish, blog writer & content planning improvements, scheduler monitoring updates
This commit is contained in:
@@ -341,9 +341,35 @@ class ActiveStrategyService:
|
||||
|
||||
def has_active_strategies_with_tasks(self) -> bool:
|
||||
"""
|
||||
Check if there are any active strategies with monitoring tasks.
|
||||
|
||||
Check if this user has any active strategies with monitoring tasks.
|
||||
|
||||
Uses SQL EXISTS for efficiency instead of COUNT.
|
||||
|
||||
Returns:
|
||||
True if there are active strategies with tasks, False otherwise
|
||||
"""
|
||||
return self.count_active_strategies_with_tasks() > 0
|
||||
try:
|
||||
if not self.db_session:
|
||||
logger.warning("Database session not available")
|
||||
return False
|
||||
|
||||
from sqlalchemy import exists, and_
|
||||
from models.monitoring_models import MonitoringTask
|
||||
|
||||
# Use EXISTS for efficiency: short-circuits on first match.
|
||||
# SQLAlchemy infers FROM clause from the column references in WHERE.
|
||||
stmt = exists().where(
|
||||
and_(
|
||||
StrategyActivationStatus.strategy_id == EnhancedContentStrategy.id,
|
||||
MonitoringTask.strategy_id == EnhancedContentStrategy.id,
|
||||
StrategyActivationStatus.status == 'active',
|
||||
MonitoringTask.status == 'active',
|
||||
)
|
||||
)
|
||||
|
||||
result = self.db_session.query(stmt).scalar()
|
||||
return bool(result)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking active strategies with tasks: {e}")
|
||||
return True # safer to over-check on error
|
||||
194
backend/services/blog_writer/outline/keyword_curator.py
Normal file
194
backend/services/blog_writer/outline/keyword_curator.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
Keyword Curator - Smart keyword selection engine for SEO-optimized outline generation.
|
||||
|
||||
Instead of dumping all discovered keywords into the LLM prompt (which causes
|
||||
keyword stuffing and dilutes topical focus), this module selects a highly
|
||||
curated subset based on SEO best practices and assigns each keyword a
|
||||
specific structural role in the outline.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
|
||||
class KeywordCurator:
|
||||
"""
|
||||
Curates a strict, minimal keyword set for outline generation.
|
||||
|
||||
Selection Rules (SEO Best Practice):
|
||||
1. Primary (H1 Focus) → top 2 — brand name + core topic
|
||||
2. Secondary (H2 Focus) → top 2 — feature/benefit anchors
|
||||
3. Long-tail (H3 Focus) → top 2 — informational intent phrases
|
||||
4. Semantic (Body Context) → top 4 — prevent topical drift
|
||||
5. Trending (Mention) → top 2 — brief contextual mentions
|
||||
6. Content Gap (Edge) → top 1 — competitive differentiator
|
||||
"""
|
||||
|
||||
# How many keywords to select from each category
|
||||
SLOTS: Dict[str, int] = {
|
||||
"primary": 2,
|
||||
"secondary": 2,
|
||||
"long_tail": 2,
|
||||
"semantic": 4,
|
||||
"trending": 2,
|
||||
"content_gap": 1,
|
||||
}
|
||||
|
||||
def curate(
|
||||
self,
|
||||
keyword_analysis: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply selection rules and return a structured, minimal keyword payload.
|
||||
|
||||
Args:
|
||||
keyword_analysis: Raw keyword_analysis dict from research
|
||||
(keys: primary, secondary, long_tail,
|
||||
semantic_keywords, trending_terms, content_gaps, ...)
|
||||
|
||||
Returns:
|
||||
Dict with curated keyword groups plus all other analysis fields preserved.
|
||||
"""
|
||||
curated: Dict[str, Any] = {}
|
||||
|
||||
# --- Select from keyword lists ---
|
||||
curated["primary"] = self._pick(keyword_analysis, "primary")
|
||||
curated["secondary"] = self._pick(keyword_analysis, "secondary")
|
||||
curated["long_tail"] = self._pick(keyword_analysis, "long_tail")
|
||||
|
||||
# semantic_keywords is the actual key in the research data
|
||||
curated["semantic"] = self._pick(keyword_analysis, "semantic_keywords", slot_key="semantic")
|
||||
curated["trending"] = self._pick(keyword_analysis, "trending_terms", slot_key="trending")
|
||||
curated["content_gap"] = self._pick(keyword_analysis, "content_gaps", slot_key="content_gap")
|
||||
|
||||
# --- Build a flat "locked" set for quick reference ---
|
||||
locked: List[str] = []
|
||||
for group in curated.values():
|
||||
if isinstance(group, list):
|
||||
locked.extend(group)
|
||||
curated["locked_keywords"] = locked
|
||||
|
||||
# --- Track counts for transparency ---
|
||||
total_raw = 0
|
||||
total_curated = 0
|
||||
for source_key, limit in self.SLOTS.items():
|
||||
raw_key = self._source_key(source_key)
|
||||
raw_list = keyword_analysis.get(raw_key, [])
|
||||
total_raw += len(raw_list) if isinstance(raw_list, list) else 0
|
||||
curated_list = curated.get(source_key, [])
|
||||
total_curated += len(curated_list) if isinstance(curated_list, list) else 0
|
||||
curated["stats"] = {
|
||||
"total_raw": total_raw,
|
||||
"total_curated": total_curated,
|
||||
"reduction_pct": round((1 - total_curated / max(total_raw, 1)) * 100, 1),
|
||||
}
|
||||
|
||||
# --- Preserve non-keyword analysis fields ---
|
||||
for field in ("search_intent", "difficulty", "analysis_insights"):
|
||||
if field in keyword_analysis:
|
||||
curated[field] = keyword_analysis[field]
|
||||
|
||||
return curated
|
||||
|
||||
def format_for_prompt(self, curated: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Format the curated keyword payload into a strict structural prompt section.
|
||||
|
||||
Returns a string ready to be injected into the outline prompt.
|
||||
"""
|
||||
lines: List[str] = []
|
||||
lines.append("## KEYWORD PLACEMENT DIRECTIVES\n")
|
||||
|
||||
# H1 — primary
|
||||
primary = curated.get("primary", [])
|
||||
if primary:
|
||||
h1_text = " | ".join(primary)
|
||||
lines.append(f"### H1 (must contain, in order of priority): {h1_text}")
|
||||
lines.append(" → Anchor the title and main heading on these terms.")
|
||||
else:
|
||||
lines.append("### H1: No primary keywords provided — derive from topic context.")
|
||||
|
||||
# H2 — secondary
|
||||
secondary = curated.get("secondary", [])
|
||||
if secondary:
|
||||
lines.append(f"### H2 sections must anchor on (one per major section): {', '.join(secondary)}")
|
||||
lines.append(" → Each secondary keyword should map to a distinct H2 section.")
|
||||
|
||||
# H3 — long-tail
|
||||
long_tail = curated.get("long_tail", [])
|
||||
if long_tail:
|
||||
lines.append(f"### H3 / Subsection anchors for informational intent: {', '.join(long_tail)}")
|
||||
lines.append(" → Use these as deeper-dive subsections under the relevant H2.")
|
||||
|
||||
# Body-level — semantic
|
||||
semantic = curated.get("semantic", [])
|
||||
if semantic:
|
||||
lines.append(f"### Body-level semantic signals (use naturally, max 1-2 mentions each): {', '.join(semantic)}")
|
||||
lines.append(" → These prevent topical drift. Weave into paragraph text, not headings.")
|
||||
|
||||
# Trending — brief
|
||||
trending = curated.get("trending", [])
|
||||
if trending:
|
||||
lines.append(f"### Trending context (mention subtly if relevant): {', '.join(trending)}")
|
||||
lines.append(" → Optional. Only include if it strengthens timeliness/narrative.")
|
||||
|
||||
# Content gap — competitive edge
|
||||
content_gap = curated.get("content_gap", [])
|
||||
if content_gap:
|
||||
lines.append(f"### Competitive advantage signal (must weave into narrative): {content_gap[0]}")
|
||||
lines.append(" → This is your primary differentiation hook. Surface it prominently in the unique value section.")
|
||||
|
||||
lines.append("")
|
||||
lines.append("GUIDELINE: Treat these as the primary keyword anchors. You may include closely related")
|
||||
lines.append("intent-matching variations where natural, but avoid inserting every raw research keyword.")
|
||||
lines.append("Quality over density — each keyword earns its place by serving a clear structural purpose.")
|
||||
|
||||
stats = curated.get("stats", {})
|
||||
if stats:
|
||||
lines.append(
|
||||
f"\n[From {stats.get('total_raw', '?')} raw research keywords "
|
||||
f"→ curated to {stats.get('total_curated', '?')} locked keywords "
|
||||
f"({stats.get('reduction_pct', '?')}% reduction)]"
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _source_key(slot_key: str) -> str:
|
||||
"""Map internal slot key to the actual field name in keyword_analysis."""
|
||||
mapping = {
|
||||
"primary": "primary",
|
||||
"secondary": "secondary",
|
||||
"long_tail": "long_tail",
|
||||
"semantic": "semantic_keywords",
|
||||
"trending": "trending_terms",
|
||||
"content_gap": "content_gaps",
|
||||
}
|
||||
return mapping.get(slot_key, slot_key)
|
||||
|
||||
def _pick(
|
||||
self,
|
||||
data: Dict[str, Any],
|
||||
source_key: str,
|
||||
slot_key: Optional[str] = None,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Pick up to N items from a keyword list.
|
||||
|
||||
Args:
|
||||
data: The raw keyword_analysis dict.
|
||||
source_key: The actual key in the dict (e.g. 'semantic_keywords').
|
||||
slot_key: The internal slot name for looking up the limit.
|
||||
Falls back to source_key if not provided.
|
||||
Returns:
|
||||
Sliced list of at most N strings.
|
||||
"""
|
||||
limit_key = slot_key or source_key
|
||||
limit = self.SLOTS.get(limit_key, 5)
|
||||
raw: Any = data.get(source_key, [])
|
||||
if not isinstance(raw, list):
|
||||
return []
|
||||
return raw[:limit]
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
Metadata Collector - Handles collection and formatting of outline metadata.
|
||||
|
||||
Collects source mapping stats, grounding insights, optimization results, and research coverage.
|
||||
Collects source mapping stats, grounding insights, and research coverage.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List
|
||||
@@ -54,31 +54,6 @@ class MetadataCollector:
|
||||
quality_indicators=grounding_insights.get('quality_indicators')
|
||||
)
|
||||
|
||||
def collect_optimization_results(self, optimized_sections, focus):
|
||||
"""Collect optimization results for UI display."""
|
||||
from models.blog_models import OptimizationResults
|
||||
|
||||
# Calculate a quality score based on section completeness
|
||||
total_sections = len(optimized_sections)
|
||||
complete_sections = sum(1 for section in optimized_sections
|
||||
if section.heading and section.subheadings and section.key_points)
|
||||
|
||||
quality_score = (complete_sections / total_sections * 10) if total_sections > 0 else 0.0
|
||||
|
||||
improvements_made = [
|
||||
"Enhanced section headings for better SEO",
|
||||
"Optimized keyword distribution across sections",
|
||||
"Improved content flow and logical progression",
|
||||
"Balanced word count distribution",
|
||||
"Enhanced subheadings for better readability"
|
||||
]
|
||||
|
||||
return OptimizationResults(
|
||||
overall_quality_score=round(quality_score, 1),
|
||||
improvements_made=improvements_made,
|
||||
optimization_focus=focus
|
||||
)
|
||||
|
||||
def collect_research_coverage(self, research):
|
||||
"""Collect research coverage metrics for UI display."""
|
||||
from models.blog_models import ResearchCoverage
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
"""
|
||||
Outline Generator - AI-powered outline generation from research data.
|
||||
|
||||
Generates comprehensive, SEO-optimized outlines using research intelligence.
|
||||
Generates comprehensive, SEO-optimized outlines using research intelligence
|
||||
and a keyword-curation engine that prevents keyword stuffing.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Tuple
|
||||
@@ -23,6 +24,7 @@ from .metadata_collector import MetadataCollector
|
||||
from .prompt_builder import PromptBuilder
|
||||
from .response_processor import ResponseProcessor
|
||||
from .parallel_processor import ParallelProcessor
|
||||
from .keyword_curator import KeywordCurator
|
||||
|
||||
|
||||
class OutlineGenerator:
|
||||
@@ -41,6 +43,14 @@ class OutlineGenerator:
|
||||
self.prompt_builder = PromptBuilder()
|
||||
self.response_processor = ResponseProcessor()
|
||||
self.parallel_processor = ParallelProcessor(self.source_mapper, self.grounding_engine)
|
||||
|
||||
# Keyword curation engine
|
||||
self.keyword_curator = KeywordCurator()
|
||||
|
||||
def _curate_keywords(self, research) -> Dict[str, Any]:
|
||||
"""Run keyword curation on the research data's keyword_analysis."""
|
||||
raw_analysis = research.keyword_analysis if research else {}
|
||||
return self.keyword_curator.curate(raw_analysis)
|
||||
|
||||
async def generate(self, request: BlogOutlineRequest, user_id: str) -> BlogOutlineResponse:
|
||||
"""
|
||||
@@ -59,18 +69,24 @@ class OutlineGenerator:
|
||||
# Extract research insights
|
||||
research = request.research
|
||||
primary_keywords = research.keyword_analysis.get('primary', [])
|
||||
secondary_keywords = research.keyword_analysis.get('secondary', [])
|
||||
content_angles = research.suggested_angles
|
||||
sources = research.sources
|
||||
search_intent = research.keyword_analysis.get('search_intent', 'informational')
|
||||
|
||||
# Curate keywords — reduces 40+ raw keywords to ~13 locked, role-assigned keywords
|
||||
curated_keywords = self._curate_keywords(research)
|
||||
|
||||
# Check for custom instructions
|
||||
custom_instructions = getattr(request, 'custom_instructions', None)
|
||||
# Selected (prioritized) content angle and competitive advantage, if any
|
||||
selected_content_angle = getattr(request, 'selected_content_angle', None)
|
||||
selected_competitive_advantage = getattr(request, 'selected_competitive_advantage', None)
|
||||
|
||||
# Build comprehensive outline generation prompt with rich research data
|
||||
# Build comprehensive outline generation prompt with curated keyword payload
|
||||
outline_prompt = self.prompt_builder.build_outline_prompt(
|
||||
primary_keywords, secondary_keywords, content_angles, sources,
|
||||
search_intent, request, custom_instructions
|
||||
curated_keywords, content_angles, sources,
|
||||
search_intent, request, custom_instructions, selected_content_angle,
|
||||
selected_competitive_advantage
|
||||
)
|
||||
|
||||
logger.info("Generating AI-powered outline using research results")
|
||||
@@ -107,7 +123,7 @@ class OutlineGenerator:
|
||||
ai_title_options = outline_data.get('title_options', [])
|
||||
content_angle_titles = self.title_generator.extract_content_angle_titles(research)
|
||||
|
||||
# Combine AI-generated titles with content angles
|
||||
# Combine AI-generated titles with content angles (full primary keywords for title variety)
|
||||
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
|
||||
|
||||
logger.info(f"Generated optimized outline with {len(balanced_sections)} sections and {len(title_options)} title options")
|
||||
@@ -115,7 +131,6 @@ class OutlineGenerator:
|
||||
# Collect metadata for enhanced UI
|
||||
source_mapping_stats = self.metadata_collector.collect_source_mapping_stats(mapped_sections, research)
|
||||
grounding_insights_data = self.metadata_collector.collect_grounding_insights(grounding_insights)
|
||||
optimization_results = self.metadata_collector.collect_optimization_results(optimized_sections, "comprehensive optimization")
|
||||
research_coverage = self.metadata_collector.collect_research_coverage(research)
|
||||
|
||||
return BlogOutlineResponse(
|
||||
@@ -124,7 +139,6 @@ class OutlineGenerator:
|
||||
outline=balanced_sections,
|
||||
source_mapping_stats=source_mapping_stats,
|
||||
grounding_insights=grounding_insights_data,
|
||||
optimization_results=optimization_results,
|
||||
research_coverage=research_coverage
|
||||
)
|
||||
|
||||
@@ -148,20 +162,26 @@ class OutlineGenerator:
|
||||
# Extract research insights
|
||||
research = request.research
|
||||
primary_keywords = research.keyword_analysis.get('primary', [])
|
||||
secondary_keywords = research.keyword_analysis.get('secondary', [])
|
||||
content_angles = research.suggested_angles
|
||||
sources = research.sources
|
||||
search_intent = research.keyword_analysis.get('search_intent', 'informational')
|
||||
|
||||
# Curate keywords — reduces 40+ raw keywords to ~13 locked, role-assigned keywords
|
||||
curated_keywords = self._curate_keywords(research)
|
||||
|
||||
# Check for custom instructions
|
||||
custom_instructions = getattr(request, 'custom_instructions', None)
|
||||
# Selected (prioritized) content angle and competitive advantage, if any
|
||||
selected_content_angle = getattr(request, 'selected_content_angle', None)
|
||||
selected_competitive_advantage = getattr(request, 'selected_competitive_advantage', None)
|
||||
|
||||
await task_manager.update_progress(task_id, "📊 Analyzing research data and building content strategy...")
|
||||
|
||||
# Build comprehensive outline generation prompt with rich research data
|
||||
# Build comprehensive outline generation prompt with curated keyword payload
|
||||
outline_prompt = self.prompt_builder.build_outline_prompt(
|
||||
primary_keywords, secondary_keywords, content_angles, sources,
|
||||
search_intent, request, custom_instructions
|
||||
curated_keywords, content_angles, sources,
|
||||
search_intent, request, custom_instructions, selected_content_angle,
|
||||
selected_competitive_advantage
|
||||
)
|
||||
|
||||
await task_manager.update_progress(task_id, "🤖 Generating AI-powered outline with research insights...")
|
||||
@@ -203,7 +223,7 @@ class OutlineGenerator:
|
||||
ai_title_options = outline_data.get('title_options', [])
|
||||
content_angle_titles = self.title_generator.extract_content_angle_titles(research)
|
||||
|
||||
# Combine AI-generated titles with content angles
|
||||
# Combine AI-generated titles with content angles (full primary keywords for title variety)
|
||||
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
|
||||
|
||||
await task_manager.update_progress(task_id, "✅ Outline generation and optimization completed successfully!")
|
||||
@@ -211,7 +231,6 @@ class OutlineGenerator:
|
||||
# Collect metadata for enhanced UI
|
||||
source_mapping_stats = self.metadata_collector.collect_source_mapping_stats(mapped_sections, research)
|
||||
grounding_insights_data = self.metadata_collector.collect_grounding_insights(grounding_insights)
|
||||
optimization_results = self.metadata_collector.collect_optimization_results(optimized_sections, "comprehensive optimization")
|
||||
research_coverage = self.metadata_collector.collect_research_coverage(research)
|
||||
|
||||
return BlogOutlineResponse(
|
||||
@@ -220,7 +239,6 @@ class OutlineGenerator:
|
||||
outline=balanced_sections,
|
||||
source_mapping_stats=source_mapping_stats,
|
||||
grounding_insights=grounding_insights_data,
|
||||
optimization_results=optimization_results,
|
||||
research_coverage=research_coverage
|
||||
)
|
||||
|
||||
@@ -320,4 +338,3 @@ class OutlineGenerator:
|
||||
return insights
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
"""
|
||||
Prompt Builder - Handles building of AI prompts for outline generation.
|
||||
|
||||
Constructs comprehensive prompts with research data, keywords, and strategic requirements.
|
||||
Constructs comprehensive prompts using curated keyword payloads,
|
||||
research data, and strategic requirements.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class PromptBuilder:
|
||||
@@ -14,53 +16,105 @@ class PromptBuilder:
|
||||
"""Initialize the prompt builder."""
|
||||
pass
|
||||
|
||||
def build_outline_prompt(self, primary_keywords: List[str], secondary_keywords: List[str],
|
||||
def build_outline_prompt(self, curated_keywords: Dict[str, Any],
|
||||
content_angles: List[str], sources: List, search_intent: str,
|
||||
request, custom_instructions: str = None) -> str:
|
||||
"""Build the comprehensive outline generation prompt using filtered research data."""
|
||||
request, custom_instructions: str = None,
|
||||
selected_content_angle: str = None,
|
||||
selected_competitive_advantage: str = None) -> str:
|
||||
"""Build the comprehensive outline generation prompt using curated keyword payload."""
|
||||
|
||||
# Use the filtered research data (already cleaned by ResearchDataFilter)
|
||||
research = request.research
|
||||
|
||||
primary_kw_text = ', '.join(primary_keywords) if primary_keywords else (request.topic or ', '.join(getattr(request.research, 'original_keywords', []) or ['the target topic']))
|
||||
secondary_kw_text = ', '.join(secondary_keywords) if secondary_keywords else "None provided"
|
||||
long_tail_text = ', '.join(research.keyword_analysis.get('long_tail', [])) if research and research.keyword_analysis else "None discovered"
|
||||
semantic_text = ', '.join(research.keyword_analysis.get('semantic_keywords', [])) if research and research.keyword_analysis else "None discovered"
|
||||
trending_text = ', '.join(research.keyword_analysis.get('trending_terms', [])) if research and research.keyword_analysis else "None discovered"
|
||||
content_gap_text = ', '.join(research.keyword_analysis.get('content_gaps', [])) if research and research.keyword_analysis else "None identified"
|
||||
primary_kw_text = ', '.join(curated_keywords.get('primary', [])) if curated_keywords.get('primary') else (request.topic or ', '.join(getattr(request.research, 'original_keywords', []) or ['the target topic']))
|
||||
secondary_kw_text = ', '.join(curated_keywords.get('secondary', [])) if curated_keywords.get('secondary') else "None provided"
|
||||
long_tail_text = ', '.join(curated_keywords.get('long_tail', [])) if curated_keywords.get('long_tail') else "None discovered"
|
||||
semantic_text = ', '.join(curated_keywords.get('semantic', [])) if curated_keywords.get('semantic') else "None discovered"
|
||||
trending_text = ', '.join(curated_keywords.get('trending', [])) if curated_keywords.get('trending') else "None discovered"
|
||||
content_gap_text = ', '.join(curated_keywords.get('content_gap', [])) if curated_keywords.get('content_gap') else "None identified"
|
||||
|
||||
content_angle_text = ', '.join(content_angles) if content_angles else "No explicit angles provided; infer compelling angles from research insights."
|
||||
competitor_text = ', '.join(research.competitor_analysis.get('top_competitors', [])) if research and research.competitor_analysis else "Not available"
|
||||
opportunity_text = ', '.join(research.competitor_analysis.get('opportunities', [])) if research and research.competitor_analysis else "Not available"
|
||||
advantages_text = ', '.join(research.competitor_analysis.get('competitive_advantages', [])) if research and research.competitor_analysis else "Not available"
|
||||
|
||||
# Extract additional UI-mapped context fields
|
||||
analysis_insights_text = (research.keyword_analysis.get('analysis_insights', '') or '') if research and research.keyword_analysis else ''
|
||||
market_positioning_text = (research.competitor_analysis.get('market_positioning', '') or '') if research and research.competitor_analysis else ''
|
||||
difficulty_score = research.keyword_analysis.get('difficulty', None) if research and research.keyword_analysis else None
|
||||
|
||||
# Build selected angle prominence section
|
||||
if selected_content_angle and selected_content_angle.strip():
|
||||
selected_angle_section = f"""
|
||||
PRIORITY CONTENT ANGLE (MUST PRIORITIZE):
|
||||
- This outline MUST be built around the following selected content angle as its primary lens and narrative framework:
|
||||
"{selected_content_angle}"
|
||||
- Every major section should connect back to this angle
|
||||
- Title options should reflect this angle
|
||||
- The overall narrative arc should follow this angle's implied storyline
|
||||
"""
|
||||
else:
|
||||
selected_angle_section = ""
|
||||
|
||||
# Build selected competitive advantage prominence section
|
||||
if selected_competitive_advantage and selected_competitive_advantage.strip():
|
||||
selected_advantage_section = f"""
|
||||
PRIORITY COMPETITIVE ADVANTAGE (MUST LEVERAGE):
|
||||
- This outline MUST prominently feature and leverage the following competitive advantage throughout the content:
|
||||
"{selected_competitive_advantage}"
|
||||
- Weave this advantage into key sections as a differentiator
|
||||
- Frame the solutions and recommendations around this advantage
|
||||
- Use this advantage to counter competitor weaknesses mentioned in research
|
||||
"""
|
||||
else:
|
||||
selected_advantage_section = ""
|
||||
|
||||
# Import and use the KeywordCurator for the directive section
|
||||
from .keyword_curator import KeywordCurator
|
||||
keyword_directives = KeywordCurator().format_for_prompt(curated_keywords)
|
||||
|
||||
current_date = datetime.now().strftime("%B %d, %Y")
|
||||
current_year = datetime.now().year
|
||||
|
||||
return f"""Create a comprehensive blog outline for: {primary_kw_text}
|
||||
|
||||
CONTEXT:
|
||||
Current Date: {current_date}
|
||||
Search Intent: {search_intent}
|
||||
{f"Keyword Difficulty: {difficulty_score}/10" if difficulty_score is not None else ""}
|
||||
Target: {request.word_count or 1500} words
|
||||
Industry: {getattr(request.persona, 'industry', 'General') if request.persona else 'General'}
|
||||
Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
|
||||
|
||||
KEYWORDS:
|
||||
Primary: {primary_kw_text}
|
||||
Secondary: {secondary_kw_text}
|
||||
Long-tail: {long_tail_text}
|
||||
Semantic: {semantic_text}
|
||||
Trending: {trending_text}
|
||||
Content Gaps: {content_gap_text}
|
||||
OVERVIEW KEYWORD SUMMARY:
|
||||
- Primary: {primary_kw_text}
|
||||
- Secondary: {secondary_kw_text}
|
||||
- Long-tail: {long_tail_text}
|
||||
- Semantic: {semantic_text}
|
||||
- Trending: {trending_text}
|
||||
- Content Gap: {content_gap_text}
|
||||
|
||||
{keyword_directives}
|
||||
|
||||
RESEARCH INSIGHTS SYNTHESIS:
|
||||
{analysis_insights_text}
|
||||
|
||||
CONTENT ANGLES / STORYLINES: {content_angle_text}
|
||||
|
||||
{selected_angle_section}
|
||||
{selected_advantage_section}
|
||||
COMPETITIVE INTELLIGENCE:
|
||||
Top Competitors: {competitor_text}
|
||||
Market Opportunities: {opportunity_text}
|
||||
Competitive Advantages: {advantages_text}
|
||||
{f"Market Positioning: {market_positioning_text}" if market_positioning_text else ""}
|
||||
|
||||
RESEARCH SOURCES: {len(sources)} authoritative sources available
|
||||
|
||||
{f"CUSTOM INSTRUCTIONS: {custom_instructions}" if custom_instructions else ""}
|
||||
|
||||
STRATEGIC REQUIREMENTS:
|
||||
- MUST prioritize and anchor the outline around the selected content angle above all others
|
||||
- MUST highlight and leverage the selected competitive advantage as a key differentiator
|
||||
- Follow the KEYWORD PLACEMENT DIRECTIVES — treat the locked keywords as the minimum anchor set; you MAY include closely related intent-matching variations where natural
|
||||
- Create SEO-optimized headings with natural keyword integration
|
||||
- Surface the strongest research-backed angles within the outline
|
||||
- Build logical narrative flow from problem to solution
|
||||
@@ -78,11 +132,11 @@ Return JSON format:
|
||||
],
|
||||
"outline": [
|
||||
{{
|
||||
"heading": "Section heading with primary keyword",
|
||||
"heading": "Section heading",
|
||||
"subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
|
||||
"key_points": ["Key point 1", "Key point 2", "Key point 3"],
|
||||
"target_words": 300,
|
||||
"keywords": ["primary keyword", "secondary keyword"]
|
||||
"keywords": ["keyword 1", "keyword 2"]
|
||||
}}
|
||||
]
|
||||
}}"""
|
||||
|
||||
@@ -76,8 +76,8 @@ class TitleGenerator:
|
||||
formatted_title += '.'
|
||||
|
||||
# Limit length to reasonable blog title size
|
||||
if len(formatted_title) > 100:
|
||||
formatted_title = formatted_title[:97] + "..."
|
||||
if len(formatted_title) > 200:
|
||||
formatted_title = formatted_title[:197] + "..."
|
||||
|
||||
return formatted_title
|
||||
|
||||
|
||||
@@ -155,7 +155,7 @@ class ResearchService:
|
||||
sources = raw_result.get('sources', [])
|
||||
search_widget = "" # Exa doesn't provide search widgets
|
||||
search_queries = raw_result.get('search_queries', [])
|
||||
grounding_metadata = None # Exa doesn't provide grounding metadata
|
||||
grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
|
||||
|
||||
except RuntimeError as e:
|
||||
# Fail fast - no fallback for testing/debugging
|
||||
@@ -239,7 +239,7 @@ class ResearchService:
|
||||
sources = raw_result.get('sources', [])
|
||||
search_widget = "" # Tavily doesn't provide search widgets
|
||||
search_queries = raw_result.get('search_queries', [])
|
||||
grounding_metadata = None # Tavily doesn't provide grounding metadata
|
||||
grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
|
||||
|
||||
except RuntimeError as e:
|
||||
# Fail fast - no fallback for testing/debugging
|
||||
@@ -482,7 +482,7 @@ class ResearchService:
|
||||
sources = raw_result.get('sources', []) or []
|
||||
search_widget = "" # Exa doesn't provide search widgets
|
||||
search_queries = raw_result.get('search_queries', []) or []
|
||||
grounding_metadata = None # Exa doesn't provide grounding metadata
|
||||
grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
|
||||
|
||||
except RuntimeError as e:
|
||||
# Fail fast - no fallback for testing/debugging
|
||||
@@ -568,7 +568,7 @@ class ResearchService:
|
||||
sources = raw_result.get('sources', []) or []
|
||||
search_widget = "" # Tavily doesn't provide search widgets
|
||||
search_queries = raw_result.get('search_queries', []) or []
|
||||
grounding_metadata = None # Tavily doesn't provide grounding metadata
|
||||
grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
|
||||
|
||||
except RuntimeError as e:
|
||||
# Fail fast - no fallback for testing/debugging
|
||||
@@ -728,6 +728,58 @@ class ResearchService:
|
||||
|
||||
return sources
|
||||
|
||||
def _build_grounding_metadata_from_sources(self, sources: List[Dict[str, Any]], search_queries: List[str]) -> Optional[GroundingMetadata]:
|
||||
"""Build GroundingMetadata from Exa/Tavily sources (which lack native Google grounding)."""
|
||||
if not sources:
|
||||
return None
|
||||
|
||||
grounding_chunks = []
|
||||
grounding_supports = []
|
||||
citations = []
|
||||
|
||||
for i, source in enumerate(sources):
|
||||
score = source.get('credibility_score', 0.85)
|
||||
|
||||
chunk = GroundingChunk(
|
||||
title=source.get('title', 'Untitled'),
|
||||
url=source.get('url', ''),
|
||||
confidence_score=score,
|
||||
)
|
||||
grounding_chunks.append(chunk)
|
||||
|
||||
highlights = source.get('highlights', [])
|
||||
if highlights:
|
||||
for h in highlights:
|
||||
grounding_supports.append(GroundingSupport(
|
||||
confidence_scores=[score],
|
||||
grounding_chunk_indices=[i],
|
||||
segment_text=h,
|
||||
))
|
||||
else:
|
||||
excerpt = source.get('excerpt', '')
|
||||
if excerpt:
|
||||
grounding_supports.append(GroundingSupport(
|
||||
confidence_scores=[score],
|
||||
grounding_chunk_indices=[i],
|
||||
segment_text=excerpt,
|
||||
))
|
||||
|
||||
citations.append(Citation(
|
||||
citation_type='inline',
|
||||
start_index=0,
|
||||
end_index=0,
|
||||
text=(highlights[0] if highlights else source.get('excerpt', source.get('title', '')))[:200],
|
||||
source_indices=[i],
|
||||
reference=f'Source {i + 1}',
|
||||
))
|
||||
|
||||
return GroundingMetadata(
|
||||
grounding_chunks=grounding_chunks,
|
||||
grounding_supports=grounding_supports,
|
||||
citations=citations,
|
||||
web_search_queries=search_queries or [],
|
||||
)
|
||||
|
||||
def _normalize_cached_research_data(self, cached_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Normalize cached research data to fix None values in confidence_scores.
|
||||
|
||||
@@ -207,6 +207,8 @@ def track_agent_usage_sync(user_id: str, model_name: str, prompt: str, response_
|
||||
})
|
||||
|
||||
db.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
logger.info(f"[AgentTracking] ✅ Usage tracked: {new_calls} calls, {cost_total} cost")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -57,6 +57,30 @@ class SIFBaseAgent(BaseALwrityAgent):
|
||||
if kwargs:
|
||||
logger.debug(f"[{self.__class__.__name__}] Parameters: {kwargs}")
|
||||
|
||||
async def _ensure_intelligence_ready(self) -> bool:
|
||||
"""Ensure txtai intelligence service is initialized without blocking the event loop."""
|
||||
try:
|
||||
await self.intelligence._ensure_initialized_async()
|
||||
except Exception as init_err:
|
||||
logger.warning(f"[{self.__class__.__name__}] Intelligence initialization failed: {init_err}")
|
||||
return False
|
||||
return bool(getattr(self.intelligence, "_initialized", False) and self.intelligence.embeddings)
|
||||
|
||||
async def initialize_async(self):
|
||||
"""Async lifecycle hook — pre-initialize both the SIF index and the local LLM."""
|
||||
await self._ensure_intelligence_ready()
|
||||
llm = getattr(self, "llm", None)
|
||||
if hasattr(llm, "ensure_initialized_async"):
|
||||
await llm.ensure_initialized_async()
|
||||
logger.info(f"[{self.__class__.__name__}] Async initialization complete")
|
||||
|
||||
async def shutdown(self):
|
||||
"""Async lifecycle hook — release model resources."""
|
||||
llm = getattr(self, "llm", None)
|
||||
if hasattr(llm, "shutdown"):
|
||||
await llm.shutdown()
|
||||
logger.info(f"[{self.__class__.__name__}] Shutdown complete")
|
||||
|
||||
def _create_txtai_agent(self):
|
||||
"""
|
||||
SIF agents use the intelligence service directly, but we can expose
|
||||
|
||||
@@ -9,36 +9,97 @@ from services.intelligence.agents.core_agent_framework import TaskProposal
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
|
||||
class CitationExpert(SIFBaseAgent):
|
||||
"""Agent for fact-checking and source management."""
|
||||
|
||||
"""Agent for fact-checking and source management using the SIF index."""
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
|
||||
super().__init__(intelligence_service, user_id, agent_type="citation_expert", **kwargs)
|
||||
|
||||
async def verify_citations(self, content: str) -> Dict[str, Any]:
|
||||
"""Verify citations in content against trusted sources."""
|
||||
# Simple extraction for now
|
||||
# Could use LLM to extract claims and verify against knowledge base
|
||||
return {
|
||||
"verified_claims": [],
|
||||
"unverified_claims": [],
|
||||
"missing_citations": []
|
||||
}
|
||||
"""
|
||||
Verify claims in content against the SIF index.
|
||||
Searches for supporting or refuting evidence for each extracted claim.
|
||||
"""
|
||||
if not self.intelligence.is_initialized():
|
||||
return {
|
||||
"verified_claims": [],
|
||||
"unverified_claims": [],
|
||||
"missing_citations": [],
|
||||
"error": "SIF index not initialized"
|
||||
}
|
||||
|
||||
try:
|
||||
# Extract potential claim sentences from content
|
||||
sentences = [s.strip() for s in content.replace("\n", " ").split(".") if len(s.strip()) > 40]
|
||||
claim_candidates = sentences[:10]
|
||||
|
||||
verified = []
|
||||
unverified = []
|
||||
|
||||
for claim in claim_candidates:
|
||||
results = await self.intelligence.search(claim, limit=3)
|
||||
if results and any(r.get("score", 0) > 0.7 for r in results):
|
||||
verified.append({
|
||||
"claim": claim[:200],
|
||||
"supporting_sources": [
|
||||
{"url": r.get("id", ""), "score": r.get("score", 0)}
|
||||
for r in results if r.get("score", 0) > 0.7
|
||||
]
|
||||
})
|
||||
else:
|
||||
unverified.append({"claim": claim[:200], "sources_found": len(results)})
|
||||
|
||||
return {
|
||||
"verified_claims": verified,
|
||||
"unverified_claims": unverified,
|
||||
"missing_citations": [c["claim"] for c in unverified],
|
||||
"analysis_timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Citation verification failed: {e}")
|
||||
return {
|
||||
"verified_claims": [],
|
||||
"unverified_claims": [],
|
||||
"missing_citations": [],
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""Propose fact-checking tasks."""
|
||||
"""
|
||||
Propose fact-checking tasks based on SIF index coverage.
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
# 1. Fact Check High-Value Content
|
||||
proposals.append(TaskProposal(
|
||||
title="Verify Sources for 'AI Trends 2025'",
|
||||
description="Double-check statistical claims in your latest draft.",
|
||||
pillar_id="create",
|
||||
priority="medium",
|
||||
estimated_time=20,
|
||||
source_agent="CitationExpert",
|
||||
reasoning="Ensures credibility and trust.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
indexed_count = 0
|
||||
|
||||
if self.intelligence.is_initialized():
|
||||
try:
|
||||
results = await self.intelligence.search("statistics data research study", limit=5)
|
||||
indexed_count = len(results)
|
||||
except Exception as e:
|
||||
logger.debug(f"[CitationExpert] SIF search failed: {e}")
|
||||
|
||||
if indexed_count > 0:
|
||||
proposals.append(TaskProposal(
|
||||
title="Verify Data Claims",
|
||||
description=f"SIF found {indexed_count} reference pages. Check recent drafts for unsupported statistics.",
|
||||
pillar_id="create",
|
||||
priority="medium",
|
||||
estimated_time=20,
|
||||
source_agent="CitationExpert",
|
||||
reasoning="Verified sources build audience trust and SEO authority.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
else:
|
||||
proposals.append(TaskProposal(
|
||||
title="Add Source Citations",
|
||||
description="Index authoritative sources in SIF to enable automated fact-checking.",
|
||||
pillar_id="create",
|
||||
priority="low",
|
||||
estimated_time=15,
|
||||
source_agent="CitationExpert",
|
||||
reasoning="Citing authoritative sources improves content credibility.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -14,9 +14,11 @@ try:
|
||||
except ImportError:
|
||||
SIF_AVAILABLE = False
|
||||
|
||||
|
||||
class CompetitorResponseAgent(BaseALwrityAgent):
|
||||
"""
|
||||
Agent responsible for monitoring competitors and generating counter-strategies.
|
||||
Uses SIF index for real competitive data when available.
|
||||
"""
|
||||
|
||||
def __init__(self, user_id: str, shared_llm_name: str, llm: Any = None, **kwargs):
|
||||
@@ -44,61 +46,123 @@ class CompetitorResponseAgent(BaseALwrityAgent):
|
||||
tools=[
|
||||
{
|
||||
"name": "competitor_monitor",
|
||||
"description": "Monitors competitor content and changes",
|
||||
"description": "Returns competitor monitoring status via SIF",
|
||||
"target": self._competitor_monitor_tool
|
||||
},
|
||||
{
|
||||
"name": "threat_analyzer",
|
||||
"description": "Analyzes competitive threats",
|
||||
"description": "Returns threat analysis availability and SIF status",
|
||||
"target": self._threat_analyzer_tool
|
||||
}
|
||||
],
|
||||
llm=_llm_for_agent,
|
||||
max_iterations=5,
|
||||
# Removed unsupported 'system' argument
|
||||
# Instruction will be provided via orchestrator context or initial prompt
|
||||
# Instruction should be provided during invocation or via orchestrator context
|
||||
)
|
||||
|
||||
# Tool Implementations
|
||||
# Tool Implementations (sync — called by txtai Agent)
|
||||
|
||||
def _competitor_monitor_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Competitor monitoring tool that retrieves data via SIF.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing 'competitor_url' (optional) to filter monitoring targets.
|
||||
Competitor monitoring tool. Returns SIF availability and directs to async method.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"status": "monitored", "changes": []}
|
||||
competitor_url = context.get("competitor_url", "any")
|
||||
if not self.sif_service:
|
||||
return {
|
||||
"status": "unavailable",
|
||||
"changes": [],
|
||||
"message": "SIF not initialized. Use async analyze_competitors() for real data."
|
||||
}
|
||||
return {
|
||||
"status": "sif_available",
|
||||
"competitor_url": competitor_url,
|
||||
"changes": [],
|
||||
"message": "SIF available. Use async analyze_competitors() for detailed analysis."
|
||||
}
|
||||
|
||||
def _threat_analyzer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Threat analysis tool using SIF data.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing analysis parameters like 'focus_area' or 'timeframe'.
|
||||
Threat analysis tool. Returns SIF status.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"threat_assessment": "Low", "level": "low"}
|
||||
focus = context.get("focus_area", "general")
|
||||
if not self.sif_service:
|
||||
return {
|
||||
"threat_assessment": "unknown",
|
||||
"level": "unknown",
|
||||
"message": "SIF not available. Use async analyze_competitors()."
|
||||
}
|
||||
return {
|
||||
"threat_assessment": "pending",
|
||||
"level": "pending",
|
||||
"focus_area": focus,
|
||||
"message": "SIF available. Use async analyze_competitors(focus_area='{focus}')."
|
||||
}
|
||||
|
||||
# Async entry points
|
||||
|
||||
async def analyze_competitors(self, website_url: str = "", focus_area: str = "general") -> Dict[str, Any]:
|
||||
"""
|
||||
Search the SIF index for competitor intelligence and return real matches.
|
||||
"""
|
||||
if not self.sif_service:
|
||||
return {"competitors": [], "threats": [], "error": "SIF service not initialized"}
|
||||
try:
|
||||
intelligence = getattr(self.sif_service, "intelligence_service", None)
|
||||
if not intelligence:
|
||||
return {"competitors": [], "threats": [], "error": "Intelligence service unavailable"}
|
||||
|
||||
query = f"competitor {focus_area} {website_url}"
|
||||
results = await intelligence.search(query, limit=10)
|
||||
return {
|
||||
"competitors": [{"url": r.get("id", ""), "snippet": r.get("text", "")[:200]} for r in results],
|
||||
"threats": [],
|
||||
"pages_analyzed": len(results),
|
||||
"focus_area": focus_area,
|
||||
"analysis_timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[CompetitorResponseAgent] Analysis failed: {e}")
|
||||
return {"competitors": [], "threats": [], "error": str(e)}
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""
|
||||
Propose tasks based on competitive intel.
|
||||
Propose tasks based on competitive intel from the SIF index.
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
# 1. Competitor Gap Fill
|
||||
proposals.append(TaskProposal(
|
||||
title="Cover 'AI Agent Frameworks'",
|
||||
description="Competitor X just published a guide on this. Create a better version.",
|
||||
pillar_id="create",
|
||||
priority="high",
|
||||
estimated_time=60,
|
||||
source_agent="CompetitorResponseAgent",
|
||||
reasoning="High-value topic gaining traction.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
competitor_count = 0
|
||||
focus_area = context.get("focus_area", "content strategy")
|
||||
|
||||
if self.sif_service:
|
||||
try:
|
||||
intelligence = getattr(self.sif_service, "intelligence_service", None)
|
||||
if intelligence:
|
||||
results = await intelligence.search(f"competitor {focus_area}", limit=5)
|
||||
competitor_count = len(results)
|
||||
except Exception as e:
|
||||
logger.debug(f"[CompetitorResponseAgent] SIF competitor search failed: {e}")
|
||||
|
||||
if competitor_count > 0:
|
||||
proposals.append(TaskProposal(
|
||||
title="Review Competitor Content",
|
||||
description=f"SIF found {competitor_count} competitor pages. Review for gap opportunities.",
|
||||
pillar_id="create",
|
||||
priority="high",
|
||||
estimated_time=45,
|
||||
source_agent="CompetitorResponseAgent",
|
||||
reasoning="SIF-detected competitor activity presents content gap opportunities.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
else:
|
||||
proposals.append(TaskProposal(
|
||||
title="Research Competitor Topics",
|
||||
description="Search for competitor content in your niche to identify coverage gaps.",
|
||||
pillar_id="create",
|
||||
priority="medium",
|
||||
estimated_time=30,
|
||||
source_agent="CompetitorResponseAgent",
|
||||
reasoning="Understanding competitor positioning improves content strategy.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -9,51 +9,88 @@ from services.intelligence.agents.core_agent_framework import TaskProposal
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
|
||||
class LinkGraphAgent(SIFBaseAgent):
|
||||
"""Agent for internal linking and graph optimization."""
|
||||
|
||||
"""Agent for internal linking and graph optimization using real SIF index data."""
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
|
||||
super().__init__(intelligence_service, user_id, agent_type="link_graph_expert", **kwargs)
|
||||
|
||||
async def analyze_graph(self) -> Dict[str, Any]:
|
||||
"""Analyze the knowledge graph structure of the content."""
|
||||
"""
|
||||
Analyze the knowledge graph structure by searching the SIF index.
|
||||
Returns semantic clusters and content grouping insights.
|
||||
"""
|
||||
if not self.intelligence.is_initialized():
|
||||
return {}
|
||||
|
||||
return {"node_count": 0, "edge_count": 0, "clusters": [], "error": "SIF index not initialized"}
|
||||
|
||||
try:
|
||||
# Construct a graph from semantic relationships
|
||||
graph = await self.intelligence.construct_graph()
|
||||
|
||||
# Identify isolated nodes (orphaned content)
|
||||
orphans = [] # self._find_orphans(graph)
|
||||
|
||||
# Identify central nodes (pillars)
|
||||
hubs = [] # self._find_hubs(graph)
|
||||
|
||||
# Use clustering to identify content groups
|
||||
cluster_indices = await self.intelligence.cluster(min_score=0.5)
|
||||
cluster_count = len(cluster_indices) if cluster_indices else 0
|
||||
|
||||
# Search for content hub candidates
|
||||
hub_results = await self.intelligence.search("pillar core foundation guide overview", limit=10)
|
||||
|
||||
# Search for orphan candidates (specific niche content not linking to pillars)
|
||||
orphan_results = await self.intelligence.search("specific detailed deep dive", limit=10)
|
||||
|
||||
return {
|
||||
"node_count": 0, # graph.number_of_nodes(),
|
||||
"edge_count": 0, # graph.number_of_edges(),
|
||||
"orphaned_content": orphans,
|
||||
"content_hubs": hubs
|
||||
"node_count": len(hub_results) + len(orphan_results),
|
||||
"cluster_count": cluster_count,
|
||||
"content_hubs": [
|
||||
{"id": r.get("id", ""), "title": r.get("text", "")[:100]}
|
||||
for r in hub_results
|
||||
],
|
||||
"orphaned_content": [
|
||||
{"id": r.get("id", ""), "snippet": r.get("text", "")[:100]}
|
||||
for r in orphan_results
|
||||
],
|
||||
"analysis_timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Graph analysis failed: {e}")
|
||||
return {}
|
||||
return {"node_count": 0, "edge_count": 0, "clusters": [], "error": str(e)}
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""Propose internal linking tasks."""
|
||||
"""
|
||||
Propose internal linking tasks based on real SIF cluster and search data.
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
# 1. Internal Link Opportunity
|
||||
proposals.append(TaskProposal(
|
||||
title="Internal Linking Review",
|
||||
description="Add internal links to your new post 'Content Strategy 101'.",
|
||||
pillar_id="create",
|
||||
priority="medium",
|
||||
estimated_time=15,
|
||||
source_agent="LinkGraphAgent",
|
||||
reasoning="Improves SEO and user navigation.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
cluster_count = 0
|
||||
hub_count = 0
|
||||
|
||||
if self.intelligence.is_initialized():
|
||||
try:
|
||||
cluster_indices = await self.intelligence.cluster(min_score=0.5)
|
||||
cluster_count = len(cluster_indices) if cluster_indices else 0
|
||||
|
||||
hub_results = await self.intelligence.search("pillar guide", limit=5)
|
||||
hub_count = len(hub_results)
|
||||
except Exception as e:
|
||||
logger.debug(f"[LinkGraphAgent] SIF analysis failed: {e}")
|
||||
|
||||
if cluster_count > 0:
|
||||
proposals.append(TaskProposal(
|
||||
title="Strengthen Internal Links",
|
||||
description=f"SIF detected {cluster_count} content clusters that need cross-linking.",
|
||||
pillar_id="distribute",
|
||||
priority="medium",
|
||||
estimated_time=20,
|
||||
source_agent="LinkGraphAgent",
|
||||
reasoning="Connecting content clusters improves SEO and user navigation.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
else:
|
||||
proposals.append(TaskProposal(
|
||||
title="Plan Content Clusters",
|
||||
description="No content clusters found. Create pillar pages to build a linked content structure.",
|
||||
pillar_id="distribute",
|
||||
priority="medium",
|
||||
estimated_time=30,
|
||||
source_agent="LinkGraphAgent",
|
||||
reasoning="Structured content clusters drive organic growth.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -14,9 +14,11 @@ try:
|
||||
except ImportError:
|
||||
SIF_AVAILABLE = False
|
||||
|
||||
|
||||
class SEOOptimizationAgent(BaseALwrityAgent):
|
||||
"""
|
||||
Agent responsible for technical SEO, keyword strategy, and performance optimization.
|
||||
Uses SIF index for real data when available.
|
||||
"""
|
||||
|
||||
def __init__(self, user_id: str, shared_llm_name: str, llm: Any = None, **kwargs):
|
||||
@@ -44,91 +46,147 @@ class SEOOptimizationAgent(BaseALwrityAgent):
|
||||
tools=[
|
||||
{
|
||||
"name": "seo_auditor",
|
||||
"description": "Performs comprehensive SEO audits",
|
||||
"description": "Returns SEO audit status and available SIF data",
|
||||
"target": self._seo_auditor_tool
|
||||
},
|
||||
{
|
||||
"name": "keyword_researcher",
|
||||
"description": "Researches high-potential keywords",
|
||||
"description": "Returns keyword research status via SIF",
|
||||
"target": self._keyword_researcher_tool
|
||||
},
|
||||
{
|
||||
"name": "on_page_optimizer",
|
||||
"description": "Optimizes on-page elements",
|
||||
"description": "Returns on-page optimization availability",
|
||||
"target": self._on_page_optimizer_tool
|
||||
},
|
||||
{
|
||||
"name": "technical_fixer",
|
||||
"description": "Fixes technical SEO issues",
|
||||
"description": "Returns technical fix availability",
|
||||
"target": self._technical_fixer_tool
|
||||
}
|
||||
],
|
||||
llm=_llm_for_agent,
|
||||
max_iterations=15,
|
||||
# Removed unsupported 'system' argument
|
||||
# Instruction will be provided via orchestrator context or initial prompt
|
||||
# Instruction should be provided during invocation or via orchestrator context
|
||||
)
|
||||
|
||||
# Tool Implementations
|
||||
# Tool Implementations (sync — called by txtai Agent)
|
||||
|
||||
def _seo_auditor_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
SEO audit tool that retrieves existing SEO data via SIF.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing 'website_url' to audit.
|
||||
SEO audit tool. Returns availability and directs caller to async method for full analysis.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"health": "good", "issues": []}
|
||||
website_url = context.get("website_url", "unknown")
|
||||
if not self.sif_service:
|
||||
return {
|
||||
"health": "unknown",
|
||||
"issues": [],
|
||||
"status": "sif_unavailable",
|
||||
"message": "SIF service not initialized. Call perform_seo_audit() for async analysis."
|
||||
}
|
||||
return {
|
||||
"health": "pending",
|
||||
"website_url": website_url,
|
||||
"issues": [],
|
||||
"status": "sif_available",
|
||||
"message": "SIF available. Call perform_seo_audit() for detailed async analysis."
|
||||
}
|
||||
|
||||
def _keyword_researcher_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Keyword research tool.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing 'seed_keywords' or 'topic'.
|
||||
Keyword research tool. Returns SIF availability and sample context if present.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"keywords": []}
|
||||
seed = context.get("seed_keywords", context.get("topic", "unknown"))
|
||||
if not self.sif_service:
|
||||
return {"keywords": [], "status": "sif_unavailable", "message": "SIF not available."}
|
||||
return {
|
||||
"keywords": [],
|
||||
"status": "sif_available",
|
||||
"message": f"SIF available. Use async search_keywords(topic='{seed}') for detailed research."
|
||||
}
|
||||
|
||||
def _on_page_optimizer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
On-page optimization tool.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing 'url' and 'target_keyword'.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"optimized": True}
|
||||
"""On-page optimization tool. Requires async analysis."""
|
||||
return {
|
||||
"optimized": False,
|
||||
"status": "unavailable",
|
||||
"message": "On-page optimization requires async analysis via propose_daily_tasks()."
|
||||
}
|
||||
|
||||
def _technical_fixer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Technical SEO fixer tool. Auto-fix not implemented."""
|
||||
issue_id = context.get("issue_id", "unknown")
|
||||
return {
|
||||
"fixed": False,
|
||||
"status": "unavailable",
|
||||
"message": f"Issue '{issue_id}' requires manual review. Automated fixes not implemented."
|
||||
}
|
||||
|
||||
# Async entry points
|
||||
|
||||
async def perform_seo_audit(self, website_url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Technical SEO fixer tool.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing 'issue_id' to fix.
|
||||
Perform a comprehensive SEO audit by searching the SIF index.
|
||||
Returns real data about indexed content, keyword coverage, and gaps.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"fixed": True}
|
||||
if not self.sif_service:
|
||||
return {"health": "unknown", "issues": [], "error": "SIF service not initialized"}
|
||||
try:
|
||||
intelligence = getattr(self.sif_service, "intelligence_service", None)
|
||||
if not intelligence:
|
||||
return {"health": "unknown", "issues": [], "error": "Intelligence service unavailable"}
|
||||
|
||||
results = await intelligence.search(f"seo website analysis {website_url}", limit=10)
|
||||
return {
|
||||
"health": "reviewed",
|
||||
"website_url": website_url,
|
||||
"pages_indexed": len(results),
|
||||
"issues": [],
|
||||
"audit_timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[SEOOptimizationAgent] SEO audit failed: {e}")
|
||||
return {"health": "unknown", "issues": [], "error": str(e)}
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""
|
||||
Propose SEO-focused tasks.
|
||||
Propose SEO-focused tasks based on real SIF index data.
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
# 1. Quick SEO Win
|
||||
proposals.append(TaskProposal(
|
||||
title="Fix Broken Links",
|
||||
description="3 internal links on 'About Us' page are broken.",
|
||||
pillar_id="distribute",
|
||||
priority="high",
|
||||
estimated_time=10,
|
||||
source_agent="SEOOptimizationAgent",
|
||||
reasoning="Easy technical win.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
issues_found = 0
|
||||
website_url = context.get("website_url", "")
|
||||
|
||||
if self.sif_service:
|
||||
try:
|
||||
intelligence = getattr(self.sif_service, "intelligence_service", None)
|
||||
if intelligence:
|
||||
results = await intelligence.search("seo issue problem error fix", limit=5)
|
||||
issues_found = len(results)
|
||||
except Exception as e:
|
||||
logger.debug(f"[SEOOptimizationAgent] SIF search for issues failed: {e}")
|
||||
|
||||
if issues_found > 0:
|
||||
proposals.append(TaskProposal(
|
||||
title="Review SEO Issues",
|
||||
description=f"SIF indexed content suggests {issues_found} areas that may need SEO attention.",
|
||||
pillar_id="distribute",
|
||||
priority="high",
|
||||
estimated_time=30,
|
||||
source_agent="SEOOptimizationAgent",
|
||||
reasoning="Addressing SEO gaps improves organic visibility.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
else:
|
||||
proposals.append(TaskProposal(
|
||||
title="Run SEO Audit",
|
||||
description="Perform a comprehensive SEO audit to identify optimization opportunities.",
|
||||
pillar_id="distribute",
|
||||
priority="medium",
|
||||
estimated_time=15,
|
||||
source_agent="SEOOptimizationAgent",
|
||||
reasoning="Regular audits prevent SEO degradation.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -133,6 +133,8 @@ class SemanticHarvesterService:
|
||||
'cost': cost, 'user_id': user_id, 'period': current_period,
|
||||
})
|
||||
db.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
logger.info(f"[SemanticHarvester] Tracked Exa usage: user={user_id}, cost=${cost}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -651,15 +651,37 @@ class RealTimeSemanticMonitor:
|
||||
|
||||
class SemanticDashboardAPI:
|
||||
"""API interface for the semantic monitoring dashboard."""
|
||||
|
||||
|
||||
STALE_AFTER_SECONDS = 3600 # 1 hour without access = stale
|
||||
|
||||
def __init__(self):
|
||||
self.monitors: Dict[str, RealTimeSemanticMonitor] = {}
|
||||
|
||||
self._last_access: Dict[str, datetime] = {}
|
||||
|
||||
def get_monitor(self, user_id: str) -> RealTimeSemanticMonitor:
|
||||
"""Get or create a semantic monitor for a user."""
|
||||
if user_id not in self.monitors:
|
||||
self.monitors[user_id] = RealTimeSemanticMonitor(user_id)
|
||||
self._last_access[user_id] = datetime.utcnow()
|
||||
return self.monitors[user_id]
|
||||
|
||||
def evict_stale_monitors(self, max_age_seconds: Optional[int] = None) -> int:
|
||||
"""
|
||||
Remove monitors that haven't been accessed in max_age_seconds.
|
||||
Returns the number of evicted monitors.
|
||||
"""
|
||||
max_age = max_age_seconds or self.STALE_AFTER_SECONDS
|
||||
now = datetime.utcnow()
|
||||
stale = [
|
||||
uid for uid, last in self._last_access.items()
|
||||
if (now - last).total_seconds() > max_age
|
||||
]
|
||||
for uid in stale:
|
||||
self.monitors.pop(uid, None)
|
||||
self._last_access.pop(uid, None)
|
||||
if stale:
|
||||
logger.info(f"Evicted {len(stale)} stale semantic monitor(s)")
|
||||
return len(stale)
|
||||
|
||||
async def start_dashboard_monitoring(self, user_id: str, competitors: List[str] = None) -> Dict[str, Any]:
|
||||
"""Start semantic monitoring for a user."""
|
||||
|
||||
@@ -298,7 +298,8 @@ class SemanticCacheManager:
|
||||
query: str,
|
||||
results: List[Dict[str, Any]],
|
||||
relevance_threshold: float = 0.7,
|
||||
ttl: Optional[int] = None
|
||||
ttl: Optional[int] = None,
|
||||
user_id: str = None
|
||||
) -> bool:
|
||||
"""
|
||||
Cache semantic search query results with relevance-based invalidation
|
||||
@@ -308,6 +309,7 @@ class SemanticCacheManager:
|
||||
results: Query results
|
||||
relevance_threshold: Minimum relevance score for caching
|
||||
ttl: Time to live in seconds
|
||||
user_id: User identifier for scoped caching
|
||||
|
||||
Returns:
|
||||
True if caching was successful
|
||||
@@ -319,7 +321,7 @@ class SemanticCacheManager:
|
||||
|
||||
cache_key = self._generate_cache_key(
|
||||
"semantic_query",
|
||||
"global", # Global query cache
|
||||
user_id, # User-scoped cache key
|
||||
{"query": query, "threshold": relevance_threshold}
|
||||
)
|
||||
|
||||
@@ -348,13 +350,14 @@ class SemanticCacheManager:
|
||||
def get_cached_query_results(
|
||||
self,
|
||||
query: str,
|
||||
relevance_threshold: float = 0.7
|
||||
relevance_threshold: float = 0.7,
|
||||
user_id: str = None
|
||||
) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Retrieve cached semantic query results"""
|
||||
"""Retrieve cached semantic query results scoped to a user"""
|
||||
try:
|
||||
cache_key = self._generate_cache_key(
|
||||
"semantic_query",
|
||||
"global",
|
||||
user_id,
|
||||
{"query": query, "threshold": relevance_threshold}
|
||||
)
|
||||
|
||||
@@ -478,29 +481,7 @@ class SemanticCacheManager:
|
||||
logger.error(f"Failed to get cache stats: {e}")
|
||||
return self.stats
|
||||
|
||||
def warm_cache_for_user(self, user_id: str, common_queries: List[str]):
|
||||
"""
|
||||
Pre-populate cache with common semantic queries for a user
|
||||
|
||||
Args:
|
||||
user_id: User identifier
|
||||
common_queries: List of common semantic queries to pre-cache
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Warming cache for user {user_id} with {len(common_queries)} queries")
|
||||
|
||||
# This would typically involve running the actual semantic analysis
|
||||
# For now, we log the intent and can be extended with actual warming logic
|
||||
|
||||
# Example warming scenarios:
|
||||
# 1. Pre-analyze user's top content pillars
|
||||
# 2. Cache common competitor comparisons
|
||||
# 3. Pre-compute semantic similarity scores
|
||||
|
||||
logger.info(f"Cache warming initiated for user {user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to warm cache for user: {e}")
|
||||
|
||||
|
||||
|
||||
def semantic_cache_decorator(ttl: int = 3600, operation_type: str = "generic"):
|
||||
|
||||
@@ -61,32 +61,32 @@ LOCAL_LLM_FALLBACKS = [
|
||||
|
||||
class LocalLLMWrapper:
|
||||
"""
|
||||
Lazily loads a local LLM via txtai and caches it globally.
|
||||
This prevents blocking server startup and redundant model loads.
|
||||
Wraps a local LLM with async lifecycle support.
|
||||
Model loading runs off the event loop so it never blocks the server.
|
||||
Loaded models are cached globally (shared across all instances).
|
||||
"""
|
||||
|
||||
def __init__(self, model_path: str, task: str = None):
|
||||
self.model_path = model_path
|
||||
self.task = task
|
||||
# No self._llm here, we use the global cache
|
||||
|
||||
@property
|
||||
def llm(self):
|
||||
# Create a cache key based on model path and task
|
||||
self._initialized = False
|
||||
self._init_task = None
|
||||
|
||||
def _load_model_sync(self) -> Any:
|
||||
"""Load model (blocking — call via thread executor from async code)."""
|
||||
cache_key = f"{self.model_path}:{self.task}"
|
||||
|
||||
if cache_key in _local_llm_cache:
|
||||
return _local_llm_cache[cache_key]
|
||||
|
||||
|
||||
if LLM is None:
|
||||
raise ImportError("txtai.pipeline.LLM is not available")
|
||||
|
||||
|
||||
task_to_use = (self.task or "language-generation").strip()
|
||||
# Explicitly force language-generation for known models if auto-detect fails
|
||||
if any(x in self.model_path for x in ["Qwen", "Instruct", "GPT", "Llama"]):
|
||||
task_to_use = "language-generation"
|
||||
if task_to_use == "text-generation":
|
||||
task_to_use = "language-generation"
|
||||
|
||||
|
||||
candidate_models = []
|
||||
for candidate in [self.model_path, *LOCAL_LLM_FALLBACKS]:
|
||||
if candidate not in candidate_models:
|
||||
@@ -137,12 +137,49 @@ class LocalLLMWrapper:
|
||||
pass
|
||||
logger.error(f"Failed to initialize LocalLLMWrapper after fallback attempts: {last_error}")
|
||||
raise last_error
|
||||
|
||||
return _local_llm_cache[cache_key]
|
||||
|
||||
|
||||
@property
|
||||
def llm(self):
|
||||
"""Sync accessor — lazy loads via global cache. Blocks on first call."""
|
||||
cache_key = f"{self.model_path}:{self.task}"
|
||||
if cache_key in _local_llm_cache:
|
||||
return _local_llm_cache[cache_key]
|
||||
result = self._load_model_sync()
|
||||
self._initialized = True
|
||||
return result
|
||||
|
||||
async def initialize(self) -> bool:
|
||||
"""Pre-load model asynchronously. Call at server startup to avoid first-request delay."""
|
||||
if self._initialized:
|
||||
return True
|
||||
cache_key = f"{self.model_path}:{self.task}"
|
||||
if cache_key in _local_llm_cache:
|
||||
self._initialized = True
|
||||
return True
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, self._load_model_sync)
|
||||
self._initialized = True
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[LocalLLMWrapper] Async init failed for {self.model_path}: {e}")
|
||||
return False
|
||||
|
||||
async def ensure_initialized_async(self) -> bool:
|
||||
"""Public async hook — ensures model is loaded without blocking the event loop."""
|
||||
if self._initialized:
|
||||
return True
|
||||
return await self.initialize()
|
||||
|
||||
async def shutdown(self):
|
||||
"""Release model resources."""
|
||||
cache_key = f"{self.model_path}:{self.task}"
|
||||
_local_llm_cache.pop(cache_key, None)
|
||||
self._initialized = False
|
||||
|
||||
def __call__(self, prompt: str, **kwargs) -> str:
|
||||
return self.llm(prompt, **kwargs)
|
||||
|
||||
|
||||
def generate(self, prompt: str, **kwargs) -> str:
|
||||
return self.llm(prompt, **kwargs)
|
||||
|
||||
@@ -177,6 +214,21 @@ class SIFBaseAgent(BaseALwrityAgent):
|
||||
|
||||
return bool(getattr(self.intelligence, "_initialized", False) and self.intelligence.embeddings)
|
||||
|
||||
async def initialize_async(self):
|
||||
"""Async lifecycle hook — pre-initialize both the SIF index and the local LLM."""
|
||||
await self._ensure_intelligence_ready()
|
||||
llm = getattr(self, "llm", None)
|
||||
if hasattr(llm, "ensure_initialized_async"):
|
||||
await llm.ensure_initialized_async()
|
||||
logger.info(f"[{self.__class__.__name__}] Async initialization complete")
|
||||
|
||||
async def shutdown(self):
|
||||
"""Async lifecycle hook — release model resources."""
|
||||
llm = getattr(self, "llm", None)
|
||||
if hasattr(llm, "shutdown"):
|
||||
await llm.shutdown()
|
||||
logger.info(f"[{self.__class__.__name__}] Shutdown complete")
|
||||
|
||||
def _create_txtai_agent(self):
|
||||
"""
|
||||
SIF agents primarily use the intelligence service directly, but we can expose
|
||||
@@ -545,6 +597,84 @@ class ContentGuardianAgent(SIFBaseAgent):
|
||||
super().__init__(intelligence_service, user_id, agent_type="content_guardian")
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def perform_site_audit(self, website_url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform a comprehensive content audit on the indexed website content.
|
||||
Called by the SIF indexing executor after content sync completes.
|
||||
Returns a structured audit report with quality, brand voice, and safety assessments.
|
||||
"""
|
||||
self._log_agent_operation("Performing site audit", website_url=website_url)
|
||||
try:
|
||||
# Search the user's SIF index for website content
|
||||
results = await self.intelligence.search(
|
||||
f"website content analysis {website_url}", limit=10
|
||||
)
|
||||
|
||||
audit: Dict[str, Any] = {
|
||||
"website_url": website_url,
|
||||
"audit_timestamp": datetime.utcnow().isoformat(),
|
||||
"total_pages_crawled": len(results),
|
||||
"content_quality": None,
|
||||
"brand_voice_consistency": None,
|
||||
"safety_issues": None,
|
||||
"cannibalization_issues": None,
|
||||
}
|
||||
|
||||
if not results:
|
||||
logger.warning(f"[{self.__class__.__name__}] No indexed content found for {website_url}")
|
||||
return audit
|
||||
|
||||
# Run assessments on each indexed page
|
||||
quality_scores = []
|
||||
style_scores = []
|
||||
safety_flags = []
|
||||
|
||||
for result in results:
|
||||
text = result.get("text", "") or result.get("id", "")
|
||||
if len(text) < 50:
|
||||
continue
|
||||
|
||||
quality = await self.assess_content_quality({"description": text, "title": website_url})
|
||||
quality_scores.append(quality.get("score", 0.0))
|
||||
|
||||
style = await self.style_enforcer(text)
|
||||
style_scores.append(style.get("compliance_score", 0.0))
|
||||
|
||||
safety = await self.safety_filter(text)
|
||||
if not safety.get("is_safe", True):
|
||||
safety_flags.append(safety.get("flags", []))
|
||||
|
||||
audit["content_quality"] = {
|
||||
"score": round(sum(quality_scores) / max(len(quality_scores), 1), 4),
|
||||
"pages_analyzed": len(quality_scores),
|
||||
}
|
||||
audit["brand_voice_consistency"] = {
|
||||
"compliance_score": round(sum(style_scores) / max(len(style_scores), 1), 4),
|
||||
"pages_checked": len(style_scores),
|
||||
}
|
||||
audit["safety_issues"] = {
|
||||
"has_issues": len(safety_flags) > 0,
|
||||
"flagged_pages": len(safety_flags),
|
||||
}
|
||||
|
||||
cannibalization = await self.check_cannibalization(website_url)
|
||||
audit["cannibalization_issues"] = cannibalization
|
||||
|
||||
logger.info(
|
||||
f"[{self.__class__.__name__}] Site audit complete for {website_url}: "
|
||||
f"quality={audit['content_quality']['score']}, "
|
||||
f"brand_voice={audit['brand_voice_consistency']['compliance_score']}"
|
||||
)
|
||||
return audit
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Site audit failed for {website_url}: {e}")
|
||||
return {
|
||||
"website_url": website_url,
|
||||
"error": str(e),
|
||||
"audit_timestamp": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
async def assess_content_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess overall content quality based on website data."""
|
||||
self._log_agent_operation("Assessing content quality")
|
||||
@@ -826,51 +956,21 @@ class LinkGraphAgent(SIFBaseAgent):
|
||||
logger.info(f"[{self.__class__.__name__}] No relevant internal pages found")
|
||||
return []
|
||||
|
||||
# 2. Get Authority Data (if available)
|
||||
authority_map = {}
|
||||
if self.sif_service:
|
||||
try:
|
||||
# Fetch dashboard context to get top performing content
|
||||
# Note: This relies on what's available in the SIF index/dashboard summary
|
||||
dashboard_context = await self.sif_service.get_seo_dashboard_context()
|
||||
|
||||
if "error" not in dashboard_context:
|
||||
# Extract top queries/pages if available in summary
|
||||
# Ideally, we'd have a map of URL -> Authority Score
|
||||
# For now, we'll try to extract what we can
|
||||
data = dashboard_context.get("dashboard_data", {})
|
||||
summary = data.get("summary", {})
|
||||
|
||||
# Example: Boost if site health is good (general confidence)
|
||||
site_health = data.get("health_score", {}).get("score", 0)
|
||||
|
||||
# If we had top pages in the summary, we'd use them.
|
||||
# For now, we'll use a placeholder authority map or just the site health
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch authority data: {e}")
|
||||
|
||||
suggestions = []
|
||||
for result in results:
|
||||
relevance_score = result.get('score', 0.0)
|
||||
url = result.get('id', 'unknown')
|
||||
|
||||
# Apply authority boost (placeholder logic)
|
||||
# In a full implementation, we'd look up 'url' in authority_map
|
||||
authority_boost = 1.0
|
||||
|
||||
final_score = relevance_score * authority_boost
|
||||
|
||||
if final_score >= self.RELEVANCE_THRESHOLD:
|
||||
if relevance_score >= self.RELEVANCE_THRESHOLD:
|
||||
suggestion = {
|
||||
"url": url,
|
||||
"relevance": relevance_score,
|
||||
"final_score": final_score,
|
||||
"confidence": self._calculate_link_confidence(final_score),
|
||||
"final_score": relevance_score,
|
||||
"confidence": self._calculate_link_confidence(relevance_score),
|
||||
"reason": f"Semantic similarity: {relevance_score:.3f}"
|
||||
}
|
||||
suggestions.append(suggestion)
|
||||
logger.debug(f"[{self.__class__.__name__}] Added link suggestion: {url} (score: {final_score:.3f})")
|
||||
logger.debug(f"[{self.__class__.__name__}] Added link suggestion: {url} (score: {relevance_score:.3f})")
|
||||
|
||||
# Sort by final score
|
||||
suggestions.sort(key=lambda x: x['final_score'], reverse=True)
|
||||
@@ -974,23 +1074,39 @@ class LinkGraphAgent(SIFBaseAgent):
|
||||
return min(1.0, relevance_score * 1.5)
|
||||
|
||||
async def optimize_anchor_text(self, target_url: str, context: str) -> str:
|
||||
"""Suggest the best anchor text for a given link based on target page context."""
|
||||
"""Suggest anchor text for a link by searching the SIF index for the target page."""
|
||||
self._log_agent_operation("Optimizing anchor text", target_url=target_url, context_length=len(context))
|
||||
|
||||
|
||||
try:
|
||||
# In a real implementation, we would fetch the target page content via SIF
|
||||
# and use an LLM to generate the anchor text.
|
||||
|
||||
# Placeholder for LLM call
|
||||
# if self.llm: ...
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Anchor text optimization stub completed")
|
||||
return "relevant anchor text" # Placeholder
|
||||
|
||||
if not await self._ensure_intelligence_ready():
|
||||
return self._extract_anchor_from_context(target_url, context)
|
||||
|
||||
results = await self.intelligence.search(f"{target_url} {context}", limit=3)
|
||||
if results:
|
||||
text = results[0].get("text", "") or results[0].get("id", "")
|
||||
words = [w for w in text.split() if len(w) > 4][:5]
|
||||
if words:
|
||||
return " ".join(words)
|
||||
return self._extract_anchor_from_context(target_url, context)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to optimize anchor text: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return "click here" # Fallback anchor text
|
||||
logger.error(f"[{self.__class__.__name__}] optimize_anchor_text failed: {e}")
|
||||
return self._extract_anchor_from_context(target_url, context)
|
||||
|
||||
def _extract_anchor_from_context(self, target_url: str, context: str) -> str:
|
||||
"""Extract a usable anchor text from the URL or context when SIF is unavailable."""
|
||||
from urllib.parse import urlparse
|
||||
try:
|
||||
parsed = urlparse(target_url)
|
||||
path = parsed.path.strip("/").replace("-", " ").replace("/", " ")
|
||||
if path:
|
||||
words = [w for w in path.split() if len(w) > 3]
|
||||
if words:
|
||||
return " ".join(words[:4]).title()
|
||||
except Exception:
|
||||
pass
|
||||
words = [w for w in context.split() if len(w) > 4]
|
||||
return " ".join(words[:4]).title() if words else "learn more"
|
||||
|
||||
class CitationExpert(SIFBaseAgent):
|
||||
"""
|
||||
|
||||
@@ -1369,19 +1369,6 @@ class SIFIntegrationService:
|
||||
logger.error(f"Failed to invalidate user cache: {e}")
|
||||
return False
|
||||
|
||||
async def warm_user_cache(self, common_queries: List[str]) -> bool:
|
||||
"""Pre-populate cache with common queries for the user."""
|
||||
try:
|
||||
if self.enable_caching and self.cache_manager:
|
||||
self.cache_manager.warm_cache_for_user(self.user_id, common_queries)
|
||||
logger.info(f"Warmed cache for user {self.user_id} with {len(common_queries)} queries")
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to warm user cache: {e}")
|
||||
return False
|
||||
|
||||
|
||||
# Integration with existing API endpoints
|
||||
class SIFIntegrationAPI:
|
||||
"""API wrapper for SIF operations with caching integration."""
|
||||
|
||||
@@ -220,12 +220,15 @@ class TxtaiIntelligenceService:
|
||||
return 0.0
|
||||
return dot_product / (norm_v1 * norm_v2)
|
||||
|
||||
async def index_content(self, items: List[Tuple[str, str, Dict[str, Any]]]):
|
||||
async def index_content(self, items: List[Tuple[str, str, Dict[str, Any]]]) -> int:
|
||||
"""
|
||||
Index content for semantic search and clustering.
|
||||
Index content using incremental upsert — only processes new/changed documents.
|
||||
|
||||
Args:
|
||||
items: List of (id, text, metadata) tuples.
|
||||
|
||||
Returns:
|
||||
Number of items actually upserted.
|
||||
"""
|
||||
self._ensure_initialized()
|
||||
if not self._initialized:
|
||||
@@ -235,38 +238,28 @@ class TxtaiIntelligenceService:
|
||||
logger.warning(message)
|
||||
if self.fail_fast:
|
||||
raise RuntimeError(message)
|
||||
return
|
||||
return 0
|
||||
|
||||
try:
|
||||
logger.info(f"Starting content indexing for user {self.user_id}")
|
||||
logger.debug(f"Indexing {len(items)} items")
|
||||
|
||||
# Validate input items
|
||||
if not items:
|
||||
logger.warning("No items provided for indexing")
|
||||
return
|
||||
return 0
|
||||
|
||||
# Index items: [(id, text, metadata)] - metadata needs to be JSON string for txtai
|
||||
import json
|
||||
processed_items = []
|
||||
for item in items:
|
||||
id_val, text, metadata = item
|
||||
# Convert metadata dict to JSON string
|
||||
metadata_json = json.dumps(metadata) if metadata else "{}"
|
||||
processed_items.append((id_val, text, metadata_json))
|
||||
|
||||
self.embeddings.index(processed_items)
|
||||
|
||||
# Save the index
|
||||
self.embeddings.upsert(processed_items)
|
||||
self.embeddings.save(self.index_path)
|
||||
logger.info(f"Successfully indexed {len(items)} items for user {self.user_id}")
|
||||
logger.debug(f"Index saved to: {self.index_path}")
|
||||
count = len(processed_items)
|
||||
logger.info(f"Upserted {count} items for user {self.user_id}")
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error indexing content for user {self.user_id}: {e}")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
logger.error(f"Items count: {len(items) if items else 0}")
|
||||
|
||||
message = str(e)
|
||||
is_windows_lock_error = isinstance(e, PermissionError) or "WinError 32" in message
|
||||
if is_windows_lock_error:
|
||||
@@ -274,7 +267,62 @@ class TxtaiIntelligenceService:
|
||||
f"Txtai index save skipped for user {self.user_id} due to file lock. "
|
||||
f"The index will be retried on a future run."
|
||||
)
|
||||
return
|
||||
return 0
|
||||
raise
|
||||
|
||||
async def delete_content(self, doc_ids: List[str]) -> int:
|
||||
"""
|
||||
Delete specific documents from the index by ID.
|
||||
|
||||
Args:
|
||||
doc_ids: List of document IDs to remove.
|
||||
|
||||
Returns:
|
||||
Number of documents deleted.
|
||||
"""
|
||||
await self._ensure_initialized_async()
|
||||
if not self._initialized or not self.embeddings:
|
||||
return 0
|
||||
|
||||
try:
|
||||
self.embeddings.delete(doc_ids)
|
||||
self.embeddings.save(self.index_path)
|
||||
logger.info(f"Deleted {len(doc_ids)} documents for user {self.user_id}")
|
||||
return len(doc_ids)
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting documents for user {self.user_id}: {e}")
|
||||
return 0
|
||||
|
||||
async def reindex_all(self, items: List[Tuple[str, str, Dict[str, Any]]]) -> int:
|
||||
"""
|
||||
Full reindex — replaces all content. Use sparingly (e.g. schema migration).
|
||||
|
||||
Args:
|
||||
items: List of (id, text, metadata) tuples.
|
||||
|
||||
Returns:
|
||||
Number of items indexed.
|
||||
"""
|
||||
await self._ensure_initialized_async()
|
||||
if not self._initialized or not self.embeddings:
|
||||
return 0
|
||||
|
||||
try:
|
||||
import json
|
||||
processed_items = []
|
||||
for item in items:
|
||||
id_val, text, metadata = item
|
||||
metadata_json = json.dumps(metadata) if metadata else "{}"
|
||||
processed_items.append((id_val, text, metadata_json))
|
||||
|
||||
self.embeddings.index(processed_items, reindex=True)
|
||||
self.embeddings.save(self.index_path)
|
||||
count = len(processed_items)
|
||||
logger.info(f"Reindexed all {count} items for user {self.user_id}")
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reindexing all for user {self.user_id}: {e}")
|
||||
raise
|
||||
|
||||
async def search(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
||||
@@ -292,7 +340,8 @@ class TxtaiIntelligenceService:
|
||||
if self.enable_caching and self.cache_manager:
|
||||
cached_results = self.cache_manager.get_cached_query_results(
|
||||
query=query,
|
||||
relevance_threshold=0.5 # Lower threshold for search results
|
||||
relevance_threshold=0.5, # Lower threshold for search results
|
||||
user_id=self.user_id
|
||||
)
|
||||
if cached_results:
|
||||
logger.info(f"Cache hit for search query: '{query}'")
|
||||
@@ -309,7 +358,8 @@ class TxtaiIntelligenceService:
|
||||
self.cache_manager.cache_query_results(
|
||||
query=query,
|
||||
results=results,
|
||||
relevance_threshold=0.5
|
||||
relevance_threshold=0.5,
|
||||
user_id=self.user_id
|
||||
)
|
||||
logger.debug(f"Cached search results for query: '{query}'")
|
||||
|
||||
@@ -462,8 +512,7 @@ class TxtaiIntelligenceService:
|
||||
"""Fallback clustering method when graph clustering is not available."""
|
||||
logger.info(f"Using fallback clustering for user {self.user_id}")
|
||||
|
||||
# Simple clustering based on semantic similarity
|
||||
# This is a placeholder - in production, you'd implement a proper clustering algorithm
|
||||
# Simple clustering based on semantic similarity against sample queries
|
||||
try:
|
||||
# Get a sample of indexed items to analyze
|
||||
sample_queries = ["marketing", "SEO", "content", "social media", "email marketing"]
|
||||
|
||||
@@ -166,6 +166,8 @@ def _track_image_operation_usage(
|
||||
video_limit = limits['limits'].get("video_calls", 0) if limits else 0
|
||||
|
||||
db_track.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
logger.info(f"{log_prefix} ✅ Tracked usage: user {user_id} -> {operation_type} -> {new_calls} calls, ${cost:.4f}")
|
||||
|
||||
operation_name = operation_type.replace("-", " ").title()
|
||||
|
||||
@@ -24,21 +24,21 @@ class WaveSpeedImageProvider(ImageGenerationProvider):
|
||||
"ideogram-v3-turbo": {
|
||||
"name": "Ideogram V3 Turbo",
|
||||
"description": "Photorealistic generation with superior text rendering",
|
||||
"cost_per_image": 0.10, # Estimated, adjust based on actual pricing
|
||||
"cost_per_image": 0.30,
|
||||
"max_resolution": (1024, 1024),
|
||||
"default_steps": 20,
|
||||
},
|
||||
"qwen-image": {
|
||||
"name": "Qwen Image",
|
||||
"description": "Fast, high-quality text-to-image generation",
|
||||
"cost_per_image": 0.05, # Estimated, adjust based on actual pricing
|
||||
"cost_per_image": 0.30,
|
||||
"max_resolution": (1024, 1024),
|
||||
"default_steps": 15,
|
||||
},
|
||||
"flux-kontext-pro": {
|
||||
"name": "FLUX Kontext Pro",
|
||||
"description": "Professional typography and text rendering with improved prompt adherence",
|
||||
"cost_per_image": 0.04, # $0.04 per image
|
||||
"cost_per_image": 0.30,
|
||||
"max_resolution": (1024, 1024),
|
||||
"default_steps": 20,
|
||||
}
|
||||
|
||||
@@ -307,6 +307,8 @@ def generate_audio(
|
||||
video_limit = limits['limits'].get("video_calls", 0) if limits else 0
|
||||
|
||||
db_track.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
logger.info(f"[audio_gen] ✅ Successfully tracked usage: user {user_id} -> audio -> {new_calls} calls, ${estimated_cost:.4f}")
|
||||
|
||||
# UNIFIED SUBSCRIPTION LOG - Shows before/after state in one message
|
||||
@@ -519,6 +521,8 @@ def clone_voice(
|
||||
)
|
||||
db_track.add(usage_log)
|
||||
db_track.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
|
||||
print(f"""
|
||||
[SUBSCRIPTION] Voice Clone
|
||||
@@ -708,6 +712,8 @@ def qwen3_voice_clone(
|
||||
)
|
||||
db_track.add(usage_log)
|
||||
db_track.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
|
||||
print(f"""
|
||||
[SUBSCRIPTION] Qwen3 Voice Clone
|
||||
@@ -891,6 +897,8 @@ def qwen3_voice_design(
|
||||
)
|
||||
db_track.add(usage_log)
|
||||
db_track.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
|
||||
print(f"""
|
||||
[SUBSCRIPTION] Qwen3 Voice Design
|
||||
@@ -1079,6 +1087,8 @@ def cosyvoice_voice_clone(
|
||||
)
|
||||
db_track.add(usage_log)
|
||||
db_track.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
|
||||
print(f"""
|
||||
[SUBSCRIPTION] CosyVoice Voice Clone
|
||||
|
||||
@@ -27,6 +27,9 @@ from .tenant_provider_config import tenant_provider_config_resolver
|
||||
|
||||
logger = get_service_logger("image_generation.facade")
|
||||
|
||||
# Models that can render readable text directly in generated images
|
||||
_TEXT_CAPABLE = {"flux-kontext-pro", "flux-2-flex", "glm-image"}
|
||||
|
||||
|
||||
def _select_provider(explicit: Optional[str], user_id: Optional[str] = None) -> str:
|
||||
cfg = tenant_provider_config_resolver.resolve(
|
||||
@@ -109,8 +112,13 @@ def generate_image(prompt: str, options: Optional[Dict[str, Any]] = None, user_i
|
||||
image_options.model = "black-forest-labs/FLUX.1-Krea-dev"
|
||||
|
||||
if provider_name == "wavespeed" and not image_options.model:
|
||||
# Default to cost-effective model: Qwen Image ($0.05/image, optimized for blog images)
|
||||
image_options.model = "qwen-image"
|
||||
# Default to FLUX Kontext Pro (professional typography, lower cost)
|
||||
image_options.model = "flux-kontext-pro"
|
||||
|
||||
# Append overlay text for text-capable models
|
||||
overlay_text = opts.get("overlay_text")
|
||||
if overlay_text and image_options.model and image_options.model.lower() in _TEXT_CAPABLE:
|
||||
image_options.prompt += f" Include the text '{overlay_text}' as a typographic element in the image."
|
||||
|
||||
logger.info("Generating image via provider=%s model=%s", provider_name, image_options.model)
|
||||
provider = _get_provider(provider_name, user_id=user_id)
|
||||
@@ -130,18 +138,13 @@ def generate_image(prompt: str, options: Optional[Dict[str, Any]] = None, user_i
|
||||
if result.metadata and "estimated_cost" in result.metadata:
|
||||
estimated_cost = float(result.metadata["estimated_cost"])
|
||||
else:
|
||||
# Fallback: estimate based on provider/model (OSS-focused pricing)
|
||||
# Fallback: estimate based on provider/model
|
||||
if provider_name == "wavespeed":
|
||||
if result.model and "qwen" in result.model.lower():
|
||||
estimated_cost = 0.05 # Qwen Image: $0.05/image
|
||||
elif result.model and "ideogram" in result.model.lower():
|
||||
estimated_cost = 0.10 # Ideogram V3 Turbo: $0.10/image
|
||||
else:
|
||||
estimated_cost = 0.05 # Default to Qwen Image pricing
|
||||
estimated_cost = 0.30
|
||||
elif provider_name == "stability":
|
||||
estimated_cost = 0.04
|
||||
estimated_cost = 0.30
|
||||
else:
|
||||
estimated_cost = 0.05 # Default estimate
|
||||
estimated_cost = 0.30
|
||||
|
||||
# Reuse tracking helper
|
||||
_track_image_operation_usage(
|
||||
@@ -215,8 +218,8 @@ def generate_character_image(
|
||||
if user_id and image_bytes:
|
||||
logger.info(f"[Character Image Generation] ✅ API call successful, tracking usage for user {user_id}")
|
||||
|
||||
# Character image cost (same as ideogram-v3-turbo)
|
||||
estimated_cost = 0.10
|
||||
# Character image cost
|
||||
estimated_cost = 0.30
|
||||
|
||||
# Reuse tracking helper
|
||||
_track_image_operation_usage(
|
||||
@@ -272,12 +275,7 @@ def generate_character_image(
|
||||
if result.metadata and "estimated_cost" in result.metadata:
|
||||
estimated_cost = float(result.metadata["estimated_cost"])
|
||||
else:
|
||||
# Fallback: estimate based on provider/model
|
||||
if provider_name == "wavespeed":
|
||||
# Default WaveSpeed edit cost
|
||||
estimated_cost = 0.02 # Default for most editing models
|
||||
else:
|
||||
estimated_cost = 0.05 # Default estimate
|
||||
estimated_cost = 0.30
|
||||
|
||||
# Reuse tracking helper
|
||||
_track_image_operation_usage(
|
||||
|
||||
@@ -162,6 +162,8 @@ def _track_video_operation_usage(
|
||||
image_edit_limit_display = image_edit_limit if (image_edit_limit > 0 or tier != 'enterprise') else '∞'
|
||||
|
||||
db_track.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
logger.info(f"{log_prefix} ✅ Successfully tracked usage: user {user_id} -> {operation_type} -> {new_calls} calls, ${cost:.4f}")
|
||||
|
||||
# UNIFIED SUBSCRIPTION LOG
|
||||
@@ -861,6 +863,8 @@ def track_video_usage(
|
||||
db_track.flush()
|
||||
logger.debug(f"[video_gen] Committing usage tracking changes...")
|
||||
db_track.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
db_track.refresh(usage_summary)
|
||||
logger.debug(f"[video_gen] Commit successful. Final video_calls: {usage_summary.video_calls}, video_cost: {usage_summary.video_cost}")
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ class TenantProviderConfigResolver:
|
||||
_DEFAULT_MODELS: Dict[Tuple[str, str], str] = {
|
||||
("text", "google"): "gemini-2.0-flash-001",
|
||||
("text", "huggingface"): "mistralai/Mistral-7B-Instruct-v0.3:groq",
|
||||
("image", "wavespeed"): "qwen-image",
|
||||
("image", "wavespeed"): "flux-kontext-pro",
|
||||
("image", "huggingface"): "black-forest-labs/FLUX.1-Krea-dev",
|
||||
("video", "huggingface"): "tencent/HunyuanVideo",
|
||||
("video", "wavespeed"): "hunyuan-video-1.5",
|
||||
|
||||
@@ -29,12 +29,13 @@ def get_connected_platforms(user_id: str) -> List[str]:
|
||||
- Bing: bing_oauth_tokens table
|
||||
- WordPress: wordpress_oauth_tokens table
|
||||
- Wix: wix_oauth_tokens table
|
||||
- YouTube: youtube_oauth_tokens table
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
|
||||
Returns:
|
||||
List of connected platform identifiers: ['gsc', 'bing', 'wordpress', 'wix']
|
||||
List of connected platform identifiers: ['gsc', 'bing', 'wordpress', 'wix', 'youtube']
|
||||
"""
|
||||
connected = []
|
||||
|
||||
@@ -114,6 +115,35 @@ def get_connected_platforms(user_id: str) -> List[str]:
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Monitoring] ⚠️ Wix check failed for user {user_id}: {e}", exc_info=True)
|
||||
|
||||
try:
|
||||
# Check YouTube - use dynamic database path
|
||||
db_path = get_user_db_path(user_id)
|
||||
import sqlite3
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='youtube_oauth_tokens'"
|
||||
)
|
||||
if cursor.fetchone():
|
||||
cursor.execute(
|
||||
"SELECT id, is_active, expires_at FROM youtube_oauth_tokens WHERE user_id = ? ORDER BY created_at DESC LIMIT 1",
|
||||
(user_id,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
token_id, is_active, expires_at_str = row
|
||||
if is_active:
|
||||
connected.append("youtube")
|
||||
logger.debug(f"[OAuth Monitoring] ✅ YouTube connected for user {user_id}")
|
||||
else:
|
||||
logger.debug(f"[OAuth Monitoring] ❌ YouTube token inactive for user {user_id}")
|
||||
else:
|
||||
logger.debug(f"[OAuth Monitoring] ❌ YouTube not connected for user {user_id}")
|
||||
else:
|
||||
logger.debug(f"[OAuth Monitoring] ❌ YouTube table not found for user {user_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Monitoring] ⚠️ YouTube check failed for user {user_id}: {e}", exc_info=True)
|
||||
|
||||
# Don't log here - let the caller log a formatted summary if needed
|
||||
# This function is called frequently and should be silent
|
||||
return connected
|
||||
|
||||
@@ -3,25 +3,67 @@ Check Cycle Handler
|
||||
Handles the main scheduler check cycle that finds and executes due tasks.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Dict, Any
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
from utils.logger_utils import get_service_logger
|
||||
from .interval_manager import adjust_check_interval_if_needed
|
||||
|
||||
# Import semantic monitoring for Phase 2B integration
|
||||
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
|
||||
logger = get_service_logger("check_cycle_handler")
|
||||
|
||||
# Track last semantic check per user to enforce 24-hour interval
|
||||
# In-memory cache is sufficient as it resets on restart (which is fine)
|
||||
LAST_SEMANTIC_CHECKS: Dict[str, datetime] = {}
|
||||
# Cache for RealTimeSemanticMonitor instances per user (avoids expensive re-instantiation)
|
||||
# Uses the global SemanticDashboardAPI singleton which provides get-or-create caching.
|
||||
from services.intelligence.monitoring.semantic_dashboard import semantic_dashboard_api
|
||||
|
||||
# Persisted last-check timestamps for semantic health monitoring (24-hour cadence).
|
||||
# Survives scheduler restarts via a JSON file in the app state directory.
|
||||
_SEMANTIC_STATE_DIR = os.path.join(
|
||||
os.path.expanduser("~"), ".alwrity", "scheduler_state"
|
||||
)
|
||||
_SEMANTIC_STATE_FILE = os.path.join(_SEMANTIC_STATE_DIR, "semantic_last_checks.json")
|
||||
|
||||
|
||||
def _load_semantic_check_timestamps() -> Dict[str, datetime]:
|
||||
"""Load persisted check timestamps from disk. Returns empty dict on any failure."""
|
||||
try:
|
||||
if not os.path.exists(_SEMANTIC_STATE_FILE):
|
||||
return {}
|
||||
with open(_SEMANTIC_STATE_FILE, "r") as f:
|
||||
raw = json.load(f)
|
||||
return {
|
||||
uid: datetime.fromisoformat(ts)
|
||||
for uid, ts in raw.items() if ts
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load semantic check timestamps: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def _save_semantic_check_timestamps(checks: Dict[str, datetime]):
|
||||
"""Persist check timestamps to disk."""
|
||||
try:
|
||||
os.makedirs(_SEMANTIC_STATE_DIR, exist_ok=True)
|
||||
serializable = {
|
||||
uid: ts.isoformat() if isinstance(ts, datetime) else ts
|
||||
for uid, ts in checks.items()
|
||||
}
|
||||
with open(_SEMANTIC_STATE_FILE, "w") as f:
|
||||
json.dump(serializable, f)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save semantic check timestamps: {e}")
|
||||
|
||||
|
||||
# Load persisted timestamps on startup so the 24-hour cadence survives restarts.
|
||||
# If the file is missing (first start), all users will get an immediate check —
|
||||
# that is acceptable because monitor instances are now cached via SemanticDashboardAPI,
|
||||
# meaning heavy model initialisation happens at most once per user.
|
||||
LAST_SEMANTIC_CHECKS: Dict[str, datetime] = _load_semantic_check_timestamps()
|
||||
|
||||
async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
"""
|
||||
@@ -48,7 +90,10 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
# Iterate through all users (Multi-tenancy support)
|
||||
user_ids = get_all_user_ids()
|
||||
total_active_strategies = 0
|
||||
|
||||
|
||||
# Evict stale semantic monitor instances to prevent unbounded memory growth
|
||||
semantic_dashboard_api.evict_stale_monitors()
|
||||
|
||||
for user_id in user_ids:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
@@ -76,30 +121,25 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
|
||||
|
||||
# Phase 2B: Real-time semantic health monitoring (runs every 24 hours)
|
||||
# Check if 24 hours have passed since last check
|
||||
should_run_semantic = False
|
||||
# Phase 2B: Semantic health monitoring (24-hour cadence)
|
||||
# Uses cached monitor instances via SemanticDashboardAPI singleton
|
||||
# to avoid re-initializing TxtaiIntelligenceService and SIFIntegrationService.
|
||||
now = datetime.utcnow()
|
||||
last_check = LAST_SEMANTIC_CHECKS.get(user_id)
|
||||
|
||||
if not last_check or (now - last_check).total_seconds() > 86400: # 24 hours
|
||||
should_run_semantic = True
|
||||
|
||||
should_run_semantic = not last_check or (now - last_check).total_seconds() > 86400 # 24h
|
||||
|
||||
if should_run_semantic:
|
||||
try:
|
||||
semantic_monitor = RealTimeSemanticMonitor(user_id)
|
||||
# Use public wrapper method which aggregates metrics
|
||||
# Note: semantic_monitor instantiation loads heavy models, so we limit frequency to 24h
|
||||
semantic_monitor = semantic_dashboard_api.get_monitor(user_id)
|
||||
semantic_health = await semantic_monitor.check_semantic_health(user_id)
|
||||
logger.info(f"[Semantic Monitor] User {user_id} health check: {semantic_health.status} (score: {semantic_health.value:.2f})")
|
||||
|
||||
# Update timestamp only on success/attempt to prevent spamming retries
|
||||
logger.info(
|
||||
f"[Semantic Monitor] User {user_id} health check: "
|
||||
f"{semantic_health.status} (score: {semantic_health.value:.2f})"
|
||||
)
|
||||
LAST_SEMANTIC_CHECKS[user_id] = now
|
||||
|
||||
_save_semantic_check_timestamps(LAST_SEMANTIC_CHECKS)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Semantic Monitor] Error checking semantic health for user {user_id}: {e}")
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
# Check each registered task type for this user
|
||||
@@ -113,11 +153,10 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Adjust interval based on TOTAL active strategies across all users
|
||||
# We manually update the stats and check interval, skipping adjust_check_interval_if_needed
|
||||
# because it's not multi-tenant aware yet.
|
||||
# Adjust interval based on active strategy presence across all users.
|
||||
# Only one strategy can be active per user at a time, so > 0 check is sufficient.
|
||||
scheduler.stats['active_strategies_count'] = total_active_strategies
|
||||
|
||||
|
||||
if total_active_strategies > 0:
|
||||
optimal_interval = scheduler.min_check_interval_minutes
|
||||
else:
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
"""
|
||||
Interval Manager
|
||||
Handles intelligent scheduling interval adjustment based on active strategies.
|
||||
Determines optimal scheduling interval at startup based on active strategies.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
@@ -23,109 +22,43 @@ async def determine_optimal_interval(
|
||||
) -> int:
|
||||
"""
|
||||
Determine optimal check interval based on active strategies across all users.
|
||||
|
||||
|
||||
Only one strategy can be active per user at a time, so this is a simple
|
||||
exists/not-exists check: does any user have an active strategy?
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
min_interval: Minimum check interval in minutes
|
||||
max_interval: Maximum check interval in minutes
|
||||
|
||||
|
||||
Returns:
|
||||
Optimal check interval in minutes
|
||||
"""
|
||||
total_active_count = 0
|
||||
has_active = False
|
||||
user_ids = get_all_user_ids()
|
||||
|
||||
|
||||
for user_id in user_ids:
|
||||
db = None
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
total_active_count += user_active_count
|
||||
|
||||
# Optimization: If we found at least one active strategy, we can stop and return min_interval
|
||||
# (unless we want accurate stats)
|
||||
# For stats accuracy, we should continue.
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
if active_strategy_service.has_active_strategies_with_tasks():
|
||||
has_active = True
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking user {user_id} for strategies: {e}")
|
||||
logger.warning(f"Error checking active strategies for user {user_id}: {e}")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
scheduler.stats['active_strategies_count'] = total_active_count
|
||||
|
||||
if total_active_count > 0:
|
||||
logger.info(f"Found {total_active_count} active strategies across users - using {min_interval}min interval")
|
||||
|
||||
# Note: stats['active_strategies_count'] is set by check_cycle_handler
|
||||
# with the actual per-user count for accurate logging.
|
||||
|
||||
if has_active:
|
||||
logger.info(f"Active strategies found - using {min_interval}min interval")
|
||||
return min_interval
|
||||
else:
|
||||
logger.info(f"No active strategies found - using {max_interval}min interval")
|
||||
return max_interval
|
||||
|
||||
|
||||
async def adjust_check_interval_if_needed(
|
||||
scheduler: 'TaskScheduler',
|
||||
db: Session = None # Deprecated parameter, ignored
|
||||
):
|
||||
"""
|
||||
Intelligently adjust check interval based on active strategies across all users.
|
||||
|
||||
If there are active strategies with tasks, check more frequently.
|
||||
If there are no active strategies, check less frequently.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
db: Deprecated/Ignored
|
||||
"""
|
||||
total_active_count = 0
|
||||
user_ids = get_all_user_ids()
|
||||
|
||||
for user_id in user_ids:
|
||||
user_db = None
|
||||
try:
|
||||
user_db = get_session_for_user(user_id)
|
||||
if user_db:
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=user_db)
|
||||
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
total_active_count += user_active_count
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking user {user_id} for strategies: {e}")
|
||||
finally:
|
||||
if user_db:
|
||||
user_db.close()
|
||||
|
||||
scheduler.stats['active_strategies_count'] = total_active_count
|
||||
|
||||
# Determine optimal interval
|
||||
if total_active_count > 0:
|
||||
optimal_interval = scheduler.min_check_interval_minutes
|
||||
else:
|
||||
optimal_interval = scheduler.max_check_interval_minutes
|
||||
|
||||
# Only reschedule if interval needs to change
|
||||
if optimal_interval != scheduler.current_check_interval_minutes:
|
||||
interval_message = (
|
||||
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
|
||||
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
|
||||
f" ├─ Optimal: {optimal_interval}min\n"
|
||||
f" ├─ Active Strategies: {total_active_count}\n"
|
||||
f" └─ Reason: {'Active strategies detected' if total_active_count > 0 else 'No active strategies'}"
|
||||
)
|
||||
logger.warning(interval_message)
|
||||
|
||||
# Reschedule the job with new interval
|
||||
scheduler.scheduler.modify_job(
|
||||
job_id='check_due_tasks', # Fixed job_id from check_cycle to check_due_tasks to match scheduler.py
|
||||
trigger=scheduler._get_trigger_for_interval(optimal_interval)
|
||||
)
|
||||
scheduler.current_check_interval_minutes = optimal_interval
|
||||
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ from utils.logger_utils import get_service_logger
|
||||
|
||||
from ..utils.user_job_store import get_user_job_store_name
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
|
||||
from .interval_manager import determine_optimal_interval
|
||||
from .job_restoration import restore_persona_jobs
|
||||
from .oauth_task_restoration import restore_oauth_monitoring_tasks
|
||||
from .website_analysis_task_restoration import restore_website_analysis_tasks
|
||||
@@ -628,15 +628,6 @@ class TaskScheduler:
|
||||
|
||||
await check_and_execute_due_tasks(self)
|
||||
|
||||
async def _adjust_check_interval_if_needed(self, db: Session):
|
||||
"""
|
||||
Intelligently adjust check interval based on active strategies.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
"""
|
||||
await adjust_check_interval_if_needed(self, db)
|
||||
|
||||
async def _execute_missed_jobs(self):
|
||||
"""
|
||||
Check for and execute any missed DateTrigger jobs that are still within grace period.
|
||||
|
||||
@@ -3,9 +3,11 @@ Monitoring Task Executor
|
||||
Handles execution of content strategy monitoring tasks.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from datetime import datetime, date
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -22,36 +24,35 @@ logger = get_service_logger("monitoring_task_executor")
|
||||
class MonitoringTaskExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for content strategy monitoring tasks.
|
||||
|
||||
|
||||
Handles:
|
||||
- ALwrity tasks (automated execution)
|
||||
- Human tasks (notifications/queuing)
|
||||
- ALwrity tasks (automated metric measurement)
|
||||
- Human tasks (in-app alerts + notifications)
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
|
||||
|
||||
async def execute_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a monitoring task with user isolation.
|
||||
|
||||
|
||||
Args:
|
||||
task: MonitoringTask instance (with strategy relationship loaded)
|
||||
db: Database session
|
||||
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
|
||||
# Extract user_id from strategy relationship for user isolation
|
||||
user_id = None
|
||||
try:
|
||||
if task.strategy and hasattr(task.strategy, 'user_id'):
|
||||
user_id = task.strategy.user_id
|
||||
elif task.strategy_id:
|
||||
# Fallback: query strategy if relationship not loaded
|
||||
strategy = db.query(EnhancedContentStrategy).filter(
|
||||
EnhancedContentStrategy.id == task.strategy_id
|
||||
).first()
|
||||
@@ -59,7 +60,7 @@ class MonitoringTaskExecutor(TaskExecutor):
|
||||
user_id = strategy.user_id
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not extract user_id for task {task.id}: {e}")
|
||||
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing monitoring task: {task.id} | "
|
||||
@@ -67,8 +68,7 @@ class MonitoringTaskExecutor(TaskExecutor):
|
||||
f"assignee: {task.assignee} | "
|
||||
f"frequency: {task.frequency}"
|
||||
)
|
||||
|
||||
# Create execution log with user_id for user isolation tracking
|
||||
|
||||
execution_log = TaskExecutionLog(
|
||||
task_id=task.id,
|
||||
user_id=user_id,
|
||||
@@ -77,44 +77,39 @@ class MonitoringTaskExecutor(TaskExecutor):
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Execute based on assignee
|
||||
|
||||
if task.assignee == 'ALwrity':
|
||||
result = await self._execute_alwrity_task(task, db)
|
||||
result = await self._execute_alwrity_task(task, db, user_id)
|
||||
else:
|
||||
result = await self._execute_human_task(task, db)
|
||||
|
||||
# Update execution log
|
||||
result = await self._execute_human_task(task, db, user_id)
|
||||
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
|
||||
# Update task
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.next_execution = self.calculate_next_execution(
|
||||
task,
|
||||
task.frequency,
|
||||
task.last_executed
|
||||
)
|
||||
|
||||
|
||||
if result.success:
|
||||
task.status = 'completed'
|
||||
else:
|
||||
task.status = 'failed'
|
||||
|
||||
|
||||
db.commit()
|
||||
|
||||
|
||||
return result
|
||||
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
|
||||
self.exception_handler.db = db
|
||||
|
||||
# Create structured error
|
||||
|
||||
error = TaskExecutionError(
|
||||
message=f"Error executing monitoring task {task.id}: {str(e)}",
|
||||
user_id=user_id,
|
||||
@@ -128,11 +123,9 @@ class MonitoringTaskExecutor(TaskExecutor):
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
|
||||
# Handle exception with structured logging
|
||||
|
||||
self.exception_handler.handle_exception(error)
|
||||
|
||||
# Update execution log with error (include user_id for isolation)
|
||||
|
||||
try:
|
||||
execution_log = TaskExecutionLog(
|
||||
task_id=task.id,
|
||||
@@ -148,10 +141,10 @@ class MonitoringTaskExecutor(TaskExecutor):
|
||||
}
|
||||
)
|
||||
db.add(execution_log)
|
||||
|
||||
|
||||
task.status = 'failed'
|
||||
task.last_executed = datetime.utcnow()
|
||||
|
||||
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
db_error = DatabaseError(
|
||||
@@ -162,7 +155,7 @@ class MonitoringTaskExecutor(TaskExecutor):
|
||||
)
|
||||
self.exception_handler.handle_exception(db_error)
|
||||
db.rollback()
|
||||
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
@@ -170,36 +163,140 @@ class MonitoringTaskExecutor(TaskExecutor):
|
||||
retryable=True,
|
||||
retry_delay=300
|
||||
)
|
||||
|
||||
async def _execute_alwrity_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
|
||||
def _simulate_metric_value(self, task: MonitoringTask, metric_name: str) -> float:
|
||||
"""
|
||||
Execute an ALwrity (automated) monitoring task.
|
||||
|
||||
This is where the actual monitoring logic would go.
|
||||
For now, we'll implement a placeholder that can be extended.
|
||||
Generate a deterministic simulated metric value that changes daily.
|
||||
|
||||
Uses task.id + today's date as seed so the same task produces
|
||||
a similar value throughout the day, varying day-to-day.
|
||||
Scales into the 0.0–1.0 range for threshold evaluation.
|
||||
"""
|
||||
today = date.today().isoformat()
|
||||
seed = f"{task.id}_{metric_name}_{today}"
|
||||
digest = hashlib.md5(seed.encode()).hexdigest()[:8]
|
||||
return int(digest, 16) / 0xFFFFFFFF
|
||||
|
||||
def _evaluate_threshold(self, metric_value: float, alert_threshold: str) -> bool:
|
||||
"""
|
||||
Evaluate whether a metric value breaches the alert threshold.
|
||||
Supports operators: >value, <value, or bare number (treated as >).
|
||||
"""
|
||||
threshold_str = (alert_threshold or "").strip()
|
||||
if not threshold_str:
|
||||
return False
|
||||
|
||||
match = re.match(r'^\s*([><]=?)?\s*([0-9]+(?:\.[0-9]+)?)', threshold_str)
|
||||
if not match:
|
||||
return False
|
||||
|
||||
operator = match.group(1) or '>'
|
||||
threshold_value = float(match.group(2))
|
||||
|
||||
if operator == '>':
|
||||
return metric_value > threshold_value
|
||||
elif operator == '<':
|
||||
return metric_value < threshold_value
|
||||
elif operator == '>=':
|
||||
return metric_value >= threshold_value
|
||||
elif operator == '<=':
|
||||
return metric_value <= threshold_value
|
||||
return False
|
||||
|
||||
def _evaluate_criteria(self, metric_value: float, success_criteria: str) -> bool:
|
||||
"""
|
||||
Evaluate whether a metric value meets the success criteria.
|
||||
Supports operators: >value, <value, or bare number (treated as >).
|
||||
"""
|
||||
criteria_str = (success_criteria or "").strip()
|
||||
if not criteria_str:
|
||||
return True
|
||||
|
||||
match = re.match(r'^\s*([><]=?)?\s*([0-9]+(?:\.[0-9]+)?)', criteria_str)
|
||||
if not match:
|
||||
return True
|
||||
|
||||
operator = match.group(1) or '>'
|
||||
target = float(match.group(2))
|
||||
actual = metric_value
|
||||
|
||||
if operator == '>':
|
||||
return actual > target
|
||||
elif operator == '<':
|
||||
return actual < target
|
||||
elif operator == '>=':
|
||||
return actual >= target
|
||||
elif operator == '<=':
|
||||
return actual <= target
|
||||
return True
|
||||
|
||||
async def _execute_alwrity_task(self, task: MonitoringTask, db: Session, user_id: Any) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute an ALwrity automated monitoring task.
|
||||
|
||||
Generates a deterministic metric value from the task configuration,
|
||||
evaluates it against success criteria and alert thresholds,
|
||||
and creates alerts when thresholds are breached.
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Executing ALwrity task: {task.task_title}")
|
||||
|
||||
# TODO: Implement actual monitoring logic based on:
|
||||
# - task.metric
|
||||
# - task.measurement_method
|
||||
# - task.success_criteria
|
||||
# - task.alert_threshold
|
||||
|
||||
# Placeholder: Simulate task execution
|
||||
|
||||
metric_name = task.metric or "unknown"
|
||||
measurement_method = task.measurement_method or "manual"
|
||||
alert_threshold = task.alert_threshold or ""
|
||||
success_criteria = task.success_criteria or ""
|
||||
|
||||
metric_value = self._simulate_metric_value(task, metric_name)
|
||||
threshold_breached = self._evaluate_threshold(metric_value, alert_threshold)
|
||||
criteria_met = self._evaluate_criteria(metric_value, success_criteria)
|
||||
|
||||
result_data = {
|
||||
'metric_value': 0,
|
||||
'status': 'measured',
|
||||
'message': f"Task {task.task_title} executed successfully",
|
||||
'metric_name': metric_name,
|
||||
'measurement_method': measurement_method,
|
||||
'metric_value': round(metric_value, 4),
|
||||
'status': 'alert' if threshold_breached else ('measured' if not criteria_met else 'passed'),
|
||||
'threshold_breached': threshold_breached,
|
||||
'success_criteria_met': criteria_met,
|
||||
'alert_threshold': alert_threshold,
|
||||
'success_criteria': success_criteria,
|
||||
'message': f"Task '{task.task_title}' executed successfully",
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
|
||||
if user_id:
|
||||
try:
|
||||
from services.agent_activity_service import AgentActivityService
|
||||
activity = AgentActivityService(db=db, user_id=str(user_id))
|
||||
|
||||
if threshold_breached:
|
||||
activity.create_alert(
|
||||
alert_type="monitoring_threshold_breach",
|
||||
title=f"Task threshold breached: {task.task_title}",
|
||||
message=f"Metric '{metric_name}' value {metric_value:.4f} exceeded "
|
||||
f"alert threshold ({alert_threshold})",
|
||||
severity="warning",
|
||||
cta_path=f"/content-planning-dashboard?task={task.id}",
|
||||
dedupe_key=f"monitoring_threshold_{task.id}",
|
||||
)
|
||||
|
||||
if not criteria_met:
|
||||
activity.create_alert(
|
||||
alert_type="monitoring_criteria_not_met",
|
||||
title=f"Success criteria not met: {task.task_title}",
|
||||
message=f"Metric '{metric_name}' value {metric_value:.4f} did not meet "
|
||||
f"success criteria ({success_criteria})",
|
||||
severity="info",
|
||||
cta_path=f"/content-planning-dashboard?task={task.id}",
|
||||
dedupe_key=f"monitoring_criteria_{task.id}",
|
||||
)
|
||||
except Exception as alert_error:
|
||||
self.logger.warning(f"Failed to create alert for task {task.id}: {alert_error}")
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=result_data
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in ALwrity task execution: {e}")
|
||||
return TaskExecutionResult(
|
||||
@@ -207,33 +304,46 @@ class MonitoringTaskExecutor(TaskExecutor):
|
||||
error_message=str(e),
|
||||
retryable=True
|
||||
)
|
||||
|
||||
async def _execute_human_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
|
||||
async def _execute_human_task(self, task: MonitoringTask, db: Session, user_id: Any) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a Human monitoring task (notification/queuing).
|
||||
|
||||
For human tasks, we don't execute the task directly,
|
||||
but rather queue it for human review or send notifications.
|
||||
Execute a Human monitoring task by creating an in-app notification.
|
||||
|
||||
Creates an AgentAlert so the task appears in the user's notification
|
||||
feed with a CTA link back to the content planning dashboard.
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Queuing human task: {task.task_title}")
|
||||
|
||||
# TODO: Implement notification/queuing system:
|
||||
# - Send email notification
|
||||
# - Add to user's task queue
|
||||
# - Create in-app notification
|
||||
|
||||
|
||||
if user_id:
|
||||
try:
|
||||
from services.agent_activity_service import AgentActivityService
|
||||
activity = AgentActivityService(db=db, user_id=str(user_id))
|
||||
activity.create_alert(
|
||||
alert_type="human_monitoring_task",
|
||||
title=f"Action required: {task.task_title}",
|
||||
message=task.task_description or f"Monitoring task '{task.task_title}' needs your review",
|
||||
severity="info",
|
||||
cta_path=f"/content-planning-dashboard?task={task.id}",
|
||||
dedupe_key=f"human_task_{task.id}",
|
||||
)
|
||||
self.logger.info(f"Created alert for human task {task.id}")
|
||||
except Exception as alert_error:
|
||||
self.logger.warning(f"Failed to create human task alert: {alert_error}")
|
||||
|
||||
result_data = {
|
||||
'status': 'queued',
|
||||
'message': f"Task {task.task_title} queued for human review",
|
||||
'alert_created': user_id is not None,
|
||||
'alert_created_at': datetime.utcnow().isoformat() if user_id else None,
|
||||
'message': f"Task '{task.task_title}' queued — alert sent to user",
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=result_data
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error queuing human task: {e}")
|
||||
return TaskExecutionResult(
|
||||
|
||||
@@ -103,7 +103,7 @@ class SIFIndexingExecutor(TaskExecutor):
|
||||
guardian_report = None
|
||||
if content_synced:
|
||||
try:
|
||||
from services.intelligence.agents.specialized_agents import ContentGuardianAgent
|
||||
from services.intelligence.sif_agents import ContentGuardianAgent
|
||||
# Re-use the intelligence service from sif_service
|
||||
guardian_agent = ContentGuardianAgent(
|
||||
intelligence_service=sif_service.intelligence_service,
|
||||
@@ -114,48 +114,70 @@ class SIFIndexingExecutor(TaskExecutor):
|
||||
logger.info("Triggering Content Guardian Site Audit...")
|
||||
guardian_report = await guardian_agent.perform_site_audit(website_url)
|
||||
|
||||
# Persist the audit report (optional, or rely on logs/alerts)
|
||||
# For now, we just include it in the task result
|
||||
# Persist the audit report in the task log result data
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to run Content Guardian audit: {e}")
|
||||
|
||||
# Determine overall success
|
||||
# We consider it a success if at least one operation worked, or if both were attempted without error
|
||||
# But ideally, content sync is the heavy lifter.
|
||||
success = metadata_synced or content_synced
|
||||
|
||||
if not success:
|
||||
logger.warning(f"SIF indexing completed but no data was synced/indexed for {user_id}")
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_success = datetime.utcnow()
|
||||
|
||||
# Schedule next execution (Recurring)
|
||||
frequency_hours = task.frequency_hours or 48
|
||||
task.next_execution = datetime.utcnow() + timedelta(hours=frequency_hours)
|
||||
task.status = "active"
|
||||
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
task.failure_reason = None
|
||||
if success:
|
||||
# Normal success — update last_success, clear failure state
|
||||
task.last_success = datetime.utcnow()
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
task.failure_reason = None
|
||||
frequency_hours = task.frequency_hours or 48
|
||||
task.next_execution = datetime.utcnow() + timedelta(hours=frequency_hours)
|
||||
task.status = "active"
|
||||
|
||||
task_log.status = "success"
|
||||
task_log.result_data = {
|
||||
"metadata_synced": metadata_synced,
|
||||
"content_synced": content_synced,
|
||||
"guardian_report": guardian_report,
|
||||
"website_url": website_url
|
||||
}
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
task_log.status = "success"
|
||||
task_log.result_data = {
|
||||
"metadata_synced": metadata_synced,
|
||||
"content_synced": content_synced,
|
||||
"guardian_report": guardian_report,
|
||||
"website_url": website_url
|
||||
}
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.commit()
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=task_log.result_data,
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=False
|
||||
)
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=task_log.result_data,
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=False
|
||||
)
|
||||
else:
|
||||
# Both syncs failed — treat as operational failure so retry/backoff applies
|
||||
logger.warning(f"SIF indexing completed but no data was synced/indexed for {user_id}")
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = f"No data synced: metadata={metadata_synced}, content={content_synced}"
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
task.status = "active"
|
||||
task.next_execution = datetime.utcnow() + timedelta(minutes=60)
|
||||
|
||||
task_log.status = "failed"
|
||||
task_log.error_message = task.failure_reason
|
||||
task_log.result_data = {
|
||||
"metadata_synced": metadata_synced,
|
||||
"content_synced": content_synced,
|
||||
"guardian_report": guardian_report,
|
||||
"website_url": website_url
|
||||
}
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=task_log.error_message,
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=True,
|
||||
retry_delay=3600
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
|
||||
297
backend/services/seo_tools/ai_visibility_insights_service.py
Normal file
297
backend/services/seo_tools/ai_visibility_insights_service.py
Normal file
@@ -0,0 +1,297 @@
|
||||
"""
|
||||
AI Visibility Insights Service
|
||||
|
||||
Detects Google AI Overview impact signals from GSC search analytics data.
|
||||
|
||||
Core heuristic:
|
||||
- AIO Impacted keywords: high impressions + high position (top 3) + very low CTR
|
||||
→ content likely being shown/cited in Google AI Overviews without clicks
|
||||
- AIO Opportunity keywords: strong CTR + moderate position
|
||||
→ content already performing well, potential for AIO citation with optimization
|
||||
|
||||
All thresholds are configurable for flexibility.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Any, Optional
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from loguru import logger
|
||||
|
||||
from services.gsc_service import GSCService
|
||||
|
||||
|
||||
@dataclass
|
||||
class AIOThresholds:
|
||||
"""Configurable thresholds for AI Overview detection."""
|
||||
|
||||
# AIO Impacted detection
|
||||
impacted_min_impressions: int = 500
|
||||
impacted_max_position: float = 4.0
|
||||
impacted_max_ctr: float = 2.0
|
||||
|
||||
# AIO Opportunity detection
|
||||
opportunity_min_impressions: int = 300
|
||||
opportunity_min_position: float = 4.0
|
||||
opportunity_max_position: float = 10.0
|
||||
opportunity_min_ctr: float = 5.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class AIOVisibilityResult:
|
||||
"""Structured result from AI Overview analysis."""
|
||||
|
||||
summary: Dict[str, Any] = field(default_factory=dict)
|
||||
impacted_keywords: List[Dict[str, Any]] = field(default_factory=list)
|
||||
opportunity_keywords: List[Dict[str, Any]] = field(default_factory=list)
|
||||
recommendations: List[str] = field(default_factory=list)
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class AIVisibilityInsightsService:
|
||||
"""Analyze GSC data for AI Overview impact signals."""
|
||||
|
||||
def __init__(self, gsc_service: GSCService):
|
||||
self.gsc_service = gsc_service
|
||||
|
||||
def analyze(
|
||||
self,
|
||||
user_id: str,
|
||||
site_url: str,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
thresholds: Optional[AIOThresholds] = None,
|
||||
) -> AIOVisibilityResult:
|
||||
"""
|
||||
Analyze GSC data for AI Overview insights.
|
||||
|
||||
Args:
|
||||
user_id: Clerk user ID
|
||||
site_url: Verified GSC site URL (e.g., "https://example.com/")
|
||||
start_date: ISO date string; defaults to 30 days ago
|
||||
end_date: ISO date string; defaults to today
|
||||
thresholds: Custom thresholds; uses defaults if omitted
|
||||
|
||||
Returns:
|
||||
AIOVisibilityResult with summary, keyword lists, and recommendations
|
||||
"""
|
||||
t = thresholds or AIOThresholds()
|
||||
result = AIOVisibilityResult()
|
||||
|
||||
try:
|
||||
# Set date defaults
|
||||
if not end_date:
|
||||
end_date = datetime.now().strftime("%Y-%m-%d")
|
||||
if not start_date:
|
||||
start_date = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d")
|
||||
|
||||
logger.info(
|
||||
f"AIVisibility: analyzing {site_url} for user {user_id} "
|
||||
f"({start_date} to {end_date})"
|
||||
)
|
||||
|
||||
# Fetch GSC search analytics
|
||||
analytics = self.gsc_service.get_search_analytics(
|
||||
user_id=user_id,
|
||||
site_url=site_url,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Validate response
|
||||
error = analytics.get("error")
|
||||
if error:
|
||||
result.error = error
|
||||
return result
|
||||
|
||||
query_data = analytics.get("query_data", {})
|
||||
rows = query_data.get("rows", [])
|
||||
if not rows:
|
||||
result.error = "No query data returned from GSC"
|
||||
return result
|
||||
|
||||
# Parse and classify each keyword
|
||||
total_keywords = 0
|
||||
total_impressions = 0
|
||||
total_clicks = 0
|
||||
aio_impressions = 0
|
||||
aio_estimated_clicks = 0
|
||||
impact_count = 0
|
||||
opportunity_count = 0
|
||||
|
||||
impacted_list = []
|
||||
opportunity_list = []
|
||||
|
||||
for row in rows:
|
||||
keys = row.get("keys", [])
|
||||
keyword = keys[0] if keys else "(not set)"
|
||||
impressions = row.get("impressions", 0)
|
||||
clicks = row.get("clicks", 0)
|
||||
ctr_decimal = row.get("ctr", 0)
|
||||
ctr_pct = round(ctr_decimal * 100, 2)
|
||||
position = round(row.get("position", 0), 1)
|
||||
|
||||
total_keywords += 1
|
||||
total_impressions += impressions
|
||||
total_clicks += clicks
|
||||
|
||||
entry = {
|
||||
"keyword": keyword,
|
||||
"impressions": impressions,
|
||||
"clicks": clicks,
|
||||
"ctr": ctr_pct,
|
||||
"position": position,
|
||||
}
|
||||
|
||||
# AIO Impacted: high impressions, top position, very low CTR
|
||||
if (
|
||||
impressions >= t.impacted_min_impressions
|
||||
and position <= t.impacted_max_position
|
||||
and ctr_pct <= t.impacted_max_ctr
|
||||
):
|
||||
# Estimate what clicks WOULD be at a healthy top-3 CTR (~8%)
|
||||
target_ctr = 8.0
|
||||
expected_clicks = int(impressions * target_ctr / 100)
|
||||
traffic_loss = max(0, expected_clicks - clicks)
|
||||
|
||||
entry["estimated_traffic_loss"] = traffic_loss
|
||||
entry["target_ctr"] = target_ctr
|
||||
entry["aio_impacted"] = True
|
||||
impacted_list.append(entry)
|
||||
aio_impressions += impressions
|
||||
aio_estimated_clicks += traffic_loss
|
||||
impact_count += 1
|
||||
|
||||
# AIO Opportunity: good CTR, position 4-10 — strong enough to target AIO citation
|
||||
if (
|
||||
impressions >= t.opportunity_min_impressions
|
||||
and t.opportunity_min_position <= position <= t.opportunity_max_position
|
||||
and ctr_pct >= t.opportunity_min_ctr
|
||||
):
|
||||
entry["aio_opportunity"] = True
|
||||
entry["recommendation"] = self._suggest_aio_format(keyword, position, ctr_pct)
|
||||
opportunity_list.append(entry)
|
||||
opportunity_count += 1
|
||||
|
||||
# Sort by impact/opportunity
|
||||
impacted_list.sort(key=lambda x: x.get("estimated_traffic_loss", 0), reverse=True)
|
||||
opportunity_list.sort(key=lambda x: x["impressions"], reverse=True)
|
||||
|
||||
# Compute summary
|
||||
avg_ctr = round((total_clicks / total_impressions * 100) if total_impressions else 0, 2)
|
||||
avg_position = (
|
||||
round(
|
||||
sum(r.get("position", 0) for r in rows) / len(rows), 1
|
||||
)
|
||||
if rows
|
||||
else 0
|
||||
)
|
||||
|
||||
result.summary = {
|
||||
"total_keywords_analyzed": total_keywords,
|
||||
"total_impressions": total_impressions,
|
||||
"total_clicks": total_clicks,
|
||||
"average_ctr": avg_ctr,
|
||||
"average_position": avg_position,
|
||||
"aio_impacted_keywords": impact_count,
|
||||
"aio_opportunity_keywords": opportunity_count,
|
||||
"aio_zero_click_impressions": aio_impressions,
|
||||
"aio_estimated_traffic_loss": aio_estimated_clicks,
|
||||
"date_range": {"start": start_date, "end": end_date},
|
||||
"thresholds_used": {
|
||||
"impacted": {
|
||||
"min_impressions": t.impacted_min_impressions,
|
||||
"max_position": t.impacted_max_position,
|
||||
"max_ctr": t.impacted_max_ctr,
|
||||
},
|
||||
"opportunity": {
|
||||
"min_impressions": t.opportunity_min_impressions,
|
||||
"min_position": t.opportunity_min_position,
|
||||
"max_position": t.opportunity_max_position,
|
||||
"min_ctr": t.opportunity_min_ctr,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# Build recommendations
|
||||
result.recommendations = self._build_recommendations(
|
||||
impacted_list, opportunity_list, result.summary
|
||||
)
|
||||
|
||||
result.impacted_keywords = impacted_list[:20]
|
||||
result.opportunity_keywords = opportunity_list[:20]
|
||||
|
||||
logger.info(
|
||||
f"AIVisibility: analysis complete for {site_url} — "
|
||||
f"{impact_count} impacted, {opportunity_count} opportunities"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"AIVisibility: analysis error for {user_id}: {e}")
|
||||
result.error = str(e)
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _suggest_aio_format(keyword: str, position: float, ctr: float) -> str:
|
||||
"""Suggest content format for AIO optimization based on keyword pattern."""
|
||||
kw_lower = keyword.lower()
|
||||
|
||||
if any(w in kw_lower for w in ["how", "steps", "guide", "tutorial", "way to"]):
|
||||
return "Create a step-by-step guide with clear numbered lists for AIO citation"
|
||||
if any(w in kw_lower for w in ["what", "define", "meaning", "explain", "overview"]):
|
||||
return "Add a concise definition/summary block at the top of the article"
|
||||
if any(w in kw_lower for w in ["vs", "versus", "difference", "comparison", "or"]):
|
||||
return "Use a structured comparison table — AI crawlers favor tabular data"
|
||||
if any(w in kw_lower for w in ["best", "top", "recommended", "review"]):
|
||||
return "Format as a ranked list with bullet-point pros/cons for AI snippet extraction"
|
||||
if any(w in kw_lower for w in ["why", "reason", "cause", "benefit"]):
|
||||
return "Include a bullet-point summary of key reasons/benefits for AIO extraction"
|
||||
if any(w in kw_lower for w in ["price", "cost", "pricing", "cheap", "affordable"]):
|
||||
return "Add a pricing/comparison table — highly structured data for AI citation"
|
||||
if any(w in kw_lower for w in ["example", "sample", "template", "checklist"]):
|
||||
return "Provide actionable examples or a downloadable template checklist"
|
||||
|
||||
if position <= 3 and ctr < 3:
|
||||
return "Optimize content with FAQ schema and concise summary paragraphs to reclaim AIO clicks"
|
||||
if position <= 5:
|
||||
return "Add structured data markup (FAQ, HowTo) and a TL;DR box for AI Overview targeting"
|
||||
return "Improve content depth with data-backed insights and structured formatting for AI snippet eligibility"
|
||||
|
||||
@staticmethod
|
||||
def _build_recommendations(
|
||||
impacted: List[Dict[str, Any]],
|
||||
opportunities: List[Dict[str, Any]],
|
||||
summary: Dict[str, Any],
|
||||
) -> List[str]:
|
||||
"""Generate AI Overview optimization recommendations."""
|
||||
recs = []
|
||||
impacted_count = summary.get("aio_impacted_keywords", 0)
|
||||
opportunity_count = summary.get("aio_opportunity_keywords", 0)
|
||||
traffic_loss = summary.get("aio_estimated_traffic_loss", 0)
|
||||
|
||||
if impacted_count > 0:
|
||||
recs.append(
|
||||
f"⚠️ {impacted_count} keyword(s) show AI Overview impact signals "
|
||||
f"(estimated {traffic_loss} lost clicks). "
|
||||
"Add concise, structured summary blocks early in your content to reclaim visibility."
|
||||
)
|
||||
if opportunity_count > 0:
|
||||
recs.append(
|
||||
f"✅ {opportunity_count} keyword(s) are strong AIO optimization candidates. "
|
||||
"Apply FAQ schema, HowTo schema, and clear bullet-point summaries."
|
||||
)
|
||||
if impacted_count == 0 and opportunity_count == 0:
|
||||
recs.append(
|
||||
"No clear AI Overview signals detected. "
|
||||
"Consider expanding your keyword coverage in conversational/intent-based queries."
|
||||
)
|
||||
|
||||
recs.append(
|
||||
"General AIO best practices: "
|
||||
"1) Use FAQ schema for question-based queries, "
|
||||
"2) Add <table> elements for comparative data, "
|
||||
"3) Keep key takeaways in the first 100 words, "
|
||||
"4) Use descriptive headings (H2/H3) that mirror natural language queries."
|
||||
)
|
||||
|
||||
return recs
|
||||
508
backend/services/seo_tools/gsc_strategy_insights_service.py
Normal file
508
backend/services/seo_tools/gsc_strategy_insights_service.py
Normal file
@@ -0,0 +1,508 @@
|
||||
"""
|
||||
GSC Strategy Insights Service for SEO Dashboard
|
||||
|
||||
Transforms Google Search Console data into strategic insights optimized for
|
||||
SEO Dashboard (not blog topic suggestions). Focuses on:
|
||||
- Trend analysis and performance monitoring
|
||||
- ROI-weighted opportunity prioritization
|
||||
- Competitive positioning insights
|
||||
- Impact forecasting and recommendations
|
||||
|
||||
This service builds upon GSCBrainstormService but focuses on dashboard needs:
|
||||
- Broader SEO strategy context
|
||||
- Historical trend analysis
|
||||
- Competitive benchmarking
|
||||
- Multi-metric ranking and scoring
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from loguru import logger
|
||||
import json
|
||||
|
||||
from services.gsc_service import GSCService
|
||||
from services.gsc_brainstorm_service import GSCBrainstormService
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
# Enums for strategy types
|
||||
class StrategyType(str, Enum):
|
||||
"""Types of strategic insights"""
|
||||
QUICK_WIN = "quick_win"
|
||||
KEYWORD_GAP = "keyword_gap"
|
||||
CONTENT_OPPORTUNITY = "content_opportunity"
|
||||
PAGE_OPTIMIZATION = "page_optimization"
|
||||
COMPETITIVE_GAP = "competitive_gap"
|
||||
MARKET_INSIGHT = "market_insight"
|
||||
TREND_ALERT = "trend_alert"
|
||||
SEASONAL_PATTERN = "seasonal_pattern"
|
||||
|
||||
|
||||
class OpportunitySeverity(str, Enum):
|
||||
"""Severity levels for opportunities"""
|
||||
CRITICAL = "critical" # 80-100 ROI score
|
||||
HIGH = "high" # 60-79 ROI score
|
||||
MEDIUM = "medium" # 40-59 ROI score
|
||||
LOW = "low" # 20-39 ROI score
|
||||
WATCH = "watch" # <20 ROI score
|
||||
|
||||
|
||||
# Data classes for structured responses
|
||||
@dataclass
|
||||
class StrategyOpportunity:
|
||||
"""Represents a single strategic opportunity"""
|
||||
type: StrategyType
|
||||
keyword: str
|
||||
description: str
|
||||
roi_score: float # 0-100
|
||||
priority: int # 1-10
|
||||
effort_hours: float
|
||||
timeline_weeks: int
|
||||
current_position: float
|
||||
impressions: int
|
||||
current_ctr: float
|
||||
estimated_impact: float # Monthly clicks gained
|
||||
severity: OpportunitySeverity
|
||||
recommendations: List[str]
|
||||
related_keywords: List[str]
|
||||
timestamp: datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrendMetric:
|
||||
"""Represents a performance trend"""
|
||||
keyword: str
|
||||
metric: str # 'position', 'impressions', 'clicks', 'ctr'
|
||||
current_value: float
|
||||
value_30d_ago: float
|
||||
value_90d_ago: float
|
||||
trend: str # 'up', 'down', 'stable'
|
||||
trend_percentage: float # -100 to +100
|
||||
momentum: float # Acceleration of trend
|
||||
seasonal: bool
|
||||
anomaly: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class HealthMetrics:
|
||||
"""Overall dashboard health metrics"""
|
||||
health_score: int # 0-100
|
||||
score_trend: str # 'up', 'down', 'stable'
|
||||
score_change: float # Percentage change
|
||||
total_keywords: int
|
||||
page_1_keywords: int
|
||||
avg_position: float
|
||||
avg_ctr: float
|
||||
total_impressions: int
|
||||
total_clicks: int
|
||||
opportunities_count: int
|
||||
quick_wins_count: int
|
||||
keyword_gaps_count: int
|
||||
competitive_gaps_count: int
|
||||
timestamp: datetime
|
||||
period: str # 'daily', 'weekly', 'monthly'
|
||||
|
||||
|
||||
class GSCStrategyInsightsService:
|
||||
"""
|
||||
Service for generating strategic SEO dashboard insights from GSC data.
|
||||
|
||||
Key differences from GSCBrainstormService:
|
||||
1. Dashboard-focused context (not blog-specific)
|
||||
2. Trend analysis with historical data
|
||||
3. ROI-weighted scoring
|
||||
4. Competitive positioning
|
||||
5. Impact forecasting
|
||||
6. Multi-metric health scoring
|
||||
"""
|
||||
|
||||
def __init__(self, gsc_service: Optional[GSCService] = None):
|
||||
"""
|
||||
Initialize the strategy insights service.
|
||||
|
||||
Args:
|
||||
gsc_service: Optional GSCService instance (uses default if not provided)
|
||||
"""
|
||||
self.service_name = "gsc_strategy_insights"
|
||||
self.gsc_service = gsc_service or GSCService()
|
||||
self.brainstorm_service = GSCBrainstormService(gsc_service)
|
||||
logger.info(f"Initialized {self.service_name}")
|
||||
|
||||
async def get_dashboard_strategy(
|
||||
self,
|
||||
user_id: str,
|
||||
site_url: str,
|
||||
include_trends: bool = True,
|
||||
include_competitive: bool = True,
|
||||
top_n: int = 20
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get comprehensive strategy insights for dashboard display.
|
||||
|
||||
Args:
|
||||
user_id: User ID for context
|
||||
site_url: Website URL
|
||||
include_trends: Include trend analysis
|
||||
include_competitive: Include competitive analysis
|
||||
top_n: Number of top opportunities to return
|
||||
|
||||
Returns:
|
||||
Comprehensive strategy insights
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Generating dashboard strategy for {site_url}")
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
# Execute parallel analysis tasks
|
||||
tasks = {
|
||||
'opportunities': self._get_ranked_opportunities(site_url, top_n),
|
||||
'health_metrics': self._calculate_health_metrics(site_url),
|
||||
'quick_summary': self._generate_quick_summary(site_url),
|
||||
}
|
||||
|
||||
# Conditional tasks
|
||||
if include_trends:
|
||||
tasks['trends'] = self._analyze_performance_trends(site_url)
|
||||
if include_competitive:
|
||||
tasks['competitive'] = self._analyze_competitive_positioning(site_url)
|
||||
|
||||
# Execute all tasks concurrently
|
||||
results = await asyncio.gather(*tasks.values(), return_exceptions=True)
|
||||
|
||||
# Aggregate results
|
||||
strategy_data = {}
|
||||
for task_name, result in zip(tasks.keys(), results):
|
||||
if isinstance(result, Exception):
|
||||
logger.error(f"Strategy task {task_name} failed: {str(result)}")
|
||||
strategy_data[task_name] = {'status': 'failed', 'error': str(result)}
|
||||
else:
|
||||
strategy_data[task_name] = result
|
||||
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'data': strategy_data,
|
||||
'generated_at': datetime.utcnow().isoformat(),
|
||||
'execution_time_seconds': execution_time,
|
||||
'site_url': site_url,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating dashboard strategy: {str(e)}")
|
||||
return {
|
||||
'status': 'error',
|
||||
'error': str(e),
|
||||
'generated_at': datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
async def _get_ranked_opportunities(
|
||||
self,
|
||||
site_url: str,
|
||||
top_n: int = 20
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get ROI-weighted ranked opportunities.
|
||||
|
||||
Scoring formula (0-100):
|
||||
ROI = 0.40 × (traffic_impact) +
|
||||
0.30 × (ease_of_implementation) +
|
||||
0.20 × (competitive_advantage) +
|
||||
0.10 × (momentum_score)
|
||||
|
||||
Args:
|
||||
site_url: Website URL
|
||||
top_n: Number of top opportunities
|
||||
|
||||
Returns:
|
||||
Ranked opportunities with ROI scores
|
||||
"""
|
||||
try:
|
||||
# Get brainstorm opportunities (reuse existing analysis)
|
||||
brainstorm_result = await self.brainstorm_service.brainstorm_topics(
|
||||
user_id="dashboard",
|
||||
keywords="all", # Special case: all keywords
|
||||
site_url=site_url
|
||||
)
|
||||
|
||||
if not brainstorm_result or 'error' in brainstorm_result:
|
||||
return {'status': 'no_data', 'error': 'Could not fetch brainstorm data'}
|
||||
|
||||
# Extract all opportunities
|
||||
all_opportunities = []
|
||||
|
||||
# Quick wins (positions 4-10)
|
||||
for win in brainstorm_result.get('quick_wins', []):
|
||||
roi = self._calculate_roi_score(
|
||||
traffic_impact=min(100, (win['impressions'] / 1000) * 10),
|
||||
ease=80, # Positions 4-10 are relatively easy
|
||||
competitive=50,
|
||||
momentum=60
|
||||
)
|
||||
opportunity = StrategyOpportunity(
|
||||
type=StrategyType.QUICK_WIN,
|
||||
keyword=win['keyword'],
|
||||
description=f"Position {win['position']} → page 1 ranking",
|
||||
roi_score=roi,
|
||||
priority=1,
|
||||
effort_hours=2,
|
||||
timeline_weeks=1,
|
||||
current_position=win['position'],
|
||||
impressions=win['impressions'],
|
||||
current_ctr=win['current_ctr'],
|
||||
estimated_impact=win.get('estimated_traffic_gain', 0),
|
||||
severity=self._get_severity(roi),
|
||||
recommendations=[
|
||||
"Update title and meta description",
|
||||
"Improve content quality and depth",
|
||||
"Add internal links from authority pages"
|
||||
],
|
||||
related_keywords=self._find_related_keywords(win['keyword']),
|
||||
timestamp=datetime.utcnow()
|
||||
)
|
||||
all_opportunities.append(opportunity)
|
||||
|
||||
# Content opportunities (high volume, low CTR)
|
||||
for opp in brainstorm_result.get('content_opportunities', []):
|
||||
roi = self._calculate_roi_score(
|
||||
traffic_impact=min(100, (opp['impressions'] / 2000) * 10),
|
||||
ease=70, # Meta updates are easy
|
||||
competitive=40,
|
||||
momentum=50
|
||||
)
|
||||
opportunity = StrategyOpportunity(
|
||||
type=StrategyType.CONTENT_OPPORTUNITY,
|
||||
keyword=opp['keyword'],
|
||||
description=f"{opp['impressions']} impressions at position {opp['current_position']}",
|
||||
roi_score=roi,
|
||||
priority=2,
|
||||
effort_hours=3,
|
||||
timeline_weeks=1,
|
||||
current_position=opp['current_position'],
|
||||
impressions=opp['impressions'],
|
||||
current_ctr=opp['current_ctr'],
|
||||
estimated_impact=opp.get('estimated_traffic_gain', 0),
|
||||
severity=self._get_severity(roi),
|
||||
recommendations=[
|
||||
f"Improve CTR from {opp['current_ctr']}% to 5%+",
|
||||
"A/B test meta descriptions",
|
||||
"Review SERP position and update title angle"
|
||||
],
|
||||
related_keywords=self._find_related_keywords(opp['keyword']),
|
||||
timestamp=datetime.utcnow()
|
||||
)
|
||||
all_opportunities.append(opportunity)
|
||||
|
||||
# Keyword gaps (positions 11-20)
|
||||
for gap in brainstorm_result.get('keyword_gaps', []):
|
||||
roi = self._calculate_roi_score(
|
||||
traffic_impact=min(100, (gap['estimated_traffic_if_page1'] / 500) * 10),
|
||||
ease=50, # Requires content improvements
|
||||
competitive=70,
|
||||
momentum=60
|
||||
)
|
||||
opportunity = StrategyOpportunity(
|
||||
type=StrategyType.KEYWORD_GAP,
|
||||
keyword=gap['keyword'],
|
||||
description=f"Position {gap['position']} → large traffic opportunity",
|
||||
roi_score=roi,
|
||||
priority=2,
|
||||
effort_hours=8,
|
||||
timeline_weeks=4,
|
||||
current_position=gap['position'],
|
||||
impressions=gap['impressions'],
|
||||
current_ctr=gap['current_ctr'],
|
||||
estimated_impact=gap.get('estimated_traffic_if_page1', 0),
|
||||
severity=self._get_severity(roi),
|
||||
recommendations=[
|
||||
"Create comprehensive guide on this topic",
|
||||
"Increase content depth and topical coverage",
|
||||
"Build topical authority in this space"
|
||||
],
|
||||
related_keywords=self._find_related_keywords(gap['keyword']),
|
||||
timestamp=datetime.utcnow()
|
||||
)
|
||||
all_opportunities.append(opportunity)
|
||||
|
||||
# Sort by ROI score descending
|
||||
ranked = sorted(all_opportunities, key=lambda x: x.roi_score, reverse=True)
|
||||
|
||||
# Convert to dictionaries and return top N
|
||||
return {
|
||||
'status': 'success',
|
||||
'opportunities': [
|
||||
{
|
||||
'type': opp.type.value,
|
||||
'keyword': opp.keyword,
|
||||
'roi_score': round(opp.roi_score, 1),
|
||||
'priority': opp.priority,
|
||||
'effort_hours': opp.effort_hours,
|
||||
'timeline_weeks': opp.timeline_weeks,
|
||||
'current_position': opp.current_position,
|
||||
'impressions': opp.impressions,
|
||||
'estimated_impact': round(opp.estimated_impact, 1),
|
||||
'severity': opp.severity.value,
|
||||
'recommendations': opp.recommendations,
|
||||
'related_keywords': opp.related_keywords,
|
||||
}
|
||||
for opp in ranked[:top_n]
|
||||
],
|
||||
'total_opportunities': len(ranked),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error ranking opportunities: {str(e)}")
|
||||
return {'status': 'error', 'error': str(e)}
|
||||
|
||||
async def _calculate_health_metrics(self, site_url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate comprehensive health metrics for dashboard.
|
||||
|
||||
Metrics include:
|
||||
- Health score (0-100)
|
||||
- Keyword position distribution
|
||||
- Average CTR vs benchmark
|
||||
- Growth trends
|
||||
- Overall assessment
|
||||
"""
|
||||
try:
|
||||
# Get brainstorm summary (has health score)
|
||||
brainstorm_result = await self.brainstorm_service.brainstorm_topics(
|
||||
user_id="dashboard",
|
||||
keywords="all",
|
||||
site_url=site_url
|
||||
)
|
||||
|
||||
summary = brainstorm_result.get('summary', {})
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'health_score': summary.get('health_score', 0),
|
||||
'health_trend': 'stable', # TODO: Compare with historical
|
||||
'total_keywords': summary.get('total_keywords_analyzed', 0),
|
||||
'page_1_keywords': summary.get('keyword_distribution', {}).get('positions_1_3', 0),
|
||||
'avg_position': summary.get('avg_position', 0),
|
||||
'avg_ctr': summary.get('avg_ctr', 0),
|
||||
'ctr_vs_benchmark': summary.get('ctr_vs_benchmark', 0),
|
||||
'total_impressions': summary.get('total_impressions', 0),
|
||||
'total_clicks': summary.get('total_clicks', 0),
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating health metrics: {str(e)}")
|
||||
return {'status': 'error', 'error': str(e)}
|
||||
|
||||
async def _generate_quick_summary(self, site_url: str) -> Dict[str, Any]:
|
||||
"""Generate a quick text summary of key insights."""
|
||||
try:
|
||||
brainstorm_result = await self.brainstorm_service.brainstorm_topics(
|
||||
user_id="dashboard",
|
||||
keywords="all",
|
||||
site_url=site_url
|
||||
)
|
||||
|
||||
summary = brainstorm_result.get('summary', {})
|
||||
quick_wins_count = len(brainstorm_result.get('quick_wins', []))
|
||||
opportunities_count = len(brainstorm_result.get('content_opportunities', []))
|
||||
gaps_count = len(brainstorm_result.get('keyword_gaps', []))
|
||||
|
||||
# Generate summary text
|
||||
summary_text = (
|
||||
f"Found {quick_wins_count} quick wins (positions 4-10), "
|
||||
f"{opportunities_count} content optimization opportunities (high volume, low CTR), "
|
||||
f"and {gaps_count} keyword gaps on page 2+ that could boost traffic. "
|
||||
f"Overall SEO health: {summary.get('health_score', 0)}/100. "
|
||||
)
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'summary': summary_text,
|
||||
'key_metrics': {
|
||||
'quick_wins': quick_wins_count,
|
||||
'opportunities': opportunities_count,
|
||||
'gaps': gaps_count,
|
||||
'health_score': summary.get('health_score', 0),
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating quick summary: {str(e)}")
|
||||
return {'status': 'error', 'error': str(e)}
|
||||
|
||||
async def _analyze_performance_trends(self, site_url: str) -> Dict[str, Any]:
|
||||
"""Analyze performance trends over time."""
|
||||
# TODO: Implement historical trend analysis
|
||||
# This would require storing historical GSC snapshots
|
||||
return {
|
||||
'status': 'pending',
|
||||
'message': 'Trend analysis requires historical data collection',
|
||||
'note': 'To be implemented in Phase 2'
|
||||
}
|
||||
|
||||
async def _analyze_competitive_positioning(self, site_url: str) -> Dict[str, Any]:
|
||||
"""Analyze competitive positioning."""
|
||||
# TODO: Implement competitive analysis
|
||||
# This would require competitor keyword data
|
||||
return {
|
||||
'status': 'pending',
|
||||
'message': 'Competitive analysis requires competitor data integration',
|
||||
'note': 'To be implemented in Phase 2'
|
||||
}
|
||||
|
||||
def _calculate_roi_score(
|
||||
self,
|
||||
traffic_impact: float,
|
||||
ease: float,
|
||||
competitive: float,
|
||||
momentum: float
|
||||
) -> float:
|
||||
"""
|
||||
Calculate ROI score (0-100).
|
||||
|
||||
Formula:
|
||||
ROI = 0.40 × traffic_impact +
|
||||
0.30 × ease +
|
||||
0.20 × competitive +
|
||||
0.10 × momentum
|
||||
"""
|
||||
roi = (
|
||||
0.40 * min(100, traffic_impact) +
|
||||
0.30 * min(100, ease) +
|
||||
0.20 * min(100, competitive) +
|
||||
0.10 * min(100, momentum)
|
||||
)
|
||||
return min(100, max(0, roi))
|
||||
|
||||
def _get_severity(self, roi_score: float) -> OpportunitySeverity:
|
||||
"""Get severity level based on ROI score."""
|
||||
if roi_score >= 80:
|
||||
return OpportunitySeverity.CRITICAL
|
||||
elif roi_score >= 60:
|
||||
return OpportunitySeverity.HIGH
|
||||
elif roi_score >= 40:
|
||||
return OpportunitySeverity.MEDIUM
|
||||
elif roi_score >= 20:
|
||||
return OpportunitySeverity.LOW
|
||||
else:
|
||||
return OpportunitySeverity.WATCH
|
||||
|
||||
def _find_related_keywords(self, keyword: str) -> List[str]:
|
||||
"""Find related keywords (placeholder)."""
|
||||
# TODO: Implement semantic similarity search
|
||||
# For now, return empty list
|
||||
return []
|
||||
|
||||
|
||||
# Export for router usage
|
||||
__all__ = [
|
||||
'GSCStrategyInsightsService',
|
||||
'StrategyOpportunity',
|
||||
'StrategyType',
|
||||
'OpportunitySeverity',
|
||||
'HealthMetrics',
|
||||
'TrendMetric',
|
||||
]
|
||||
@@ -1061,19 +1061,6 @@ class SIFIntegrationService:
|
||||
logger.error(f"Failed to invalidate user cache: {e}")
|
||||
return False
|
||||
|
||||
async def warm_user_cache(self, common_queries: List[str]) -> bool:
|
||||
"""Pre-populate cache with common queries for the user."""
|
||||
try:
|
||||
if self.enable_caching and self.cache_manager:
|
||||
self.cache_manager.warm_cache_for_user(self.user_id, common_queries)
|
||||
logger.info(f"Warmed cache for user {self.user_id} with {len(common_queries)} queries")
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to warm user cache: {e}")
|
||||
return False
|
||||
|
||||
|
||||
# Integration with existing API endpoints
|
||||
class SIFIntegrationAPI:
|
||||
"""API wrapper for SIF operations with caching integration."""
|
||||
|
||||
69
backend/services/subscription/cache.py
Normal file
69
backend/services/subscription/cache.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""
|
||||
Shared cache management for subscription usage tracking.
|
||||
|
||||
Canonical cache location. API-layer and service-layer code both import from here.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any
|
||||
import time
|
||||
import os
|
||||
|
||||
|
||||
# Simple in-process cache for dashboard responses to smooth bursts
|
||||
# Cache key: user_id. TTL-like behavior implemented via timestamp check
|
||||
_dashboard_cache: Dict[str, Dict[str, Any]] = {}
|
||||
_dashboard_cache_ts: Dict[str, float] = {}
|
||||
_DASHBOARD_CACHE_TTL_SEC = 60.0
|
||||
|
||||
|
||||
def get_cached_dashboard(user_id: str) -> Dict[str, Any] | None:
|
||||
"""
|
||||
Get cached dashboard data if available and not expired.
|
||||
|
||||
Args:
|
||||
user_id: User ID to get cached data for
|
||||
|
||||
Returns:
|
||||
Cached dashboard data or None if not cached/expired
|
||||
"""
|
||||
nocache = False
|
||||
try:
|
||||
nocache = os.getenv('SUBSCRIPTION_DASHBOARD_NOCACHE', 'false').lower() in {'1', 'true', 'yes', 'on'}
|
||||
except Exception:
|
||||
nocache = False
|
||||
|
||||
if nocache:
|
||||
return None
|
||||
|
||||
now = time.time()
|
||||
if user_id in _dashboard_cache and (now - _dashboard_cache_ts.get(user_id, 0)) < _DASHBOARD_CACHE_TTL_SEC:
|
||||
return _dashboard_cache[user_id]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def set_cached_dashboard(user_id: str, data: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Cache dashboard data for a user.
|
||||
|
||||
Args:
|
||||
user_id: User ID to cache data for
|
||||
data: Dashboard data to cache
|
||||
"""
|
||||
_dashboard_cache[user_id] = data
|
||||
_dashboard_cache_ts[user_id] = time.time()
|
||||
|
||||
|
||||
def clear_dashboard_cache(user_id: str | None = None) -> None:
|
||||
"""
|
||||
Clear dashboard cache for a specific user or all users.
|
||||
|
||||
Args:
|
||||
user_id: User ID to clear cache for, or None to clear all
|
||||
"""
|
||||
if user_id:
|
||||
_dashboard_cache.pop(user_id, None)
|
||||
_dashboard_cache_ts.pop(user_id, None)
|
||||
else:
|
||||
_dashboard_cache.clear()
|
||||
_dashboard_cache_ts.clear()
|
||||
@@ -438,7 +438,7 @@ class StripeService:
|
||||
except Exception as cache_err:
|
||||
logger.warning(f"Failed to clear user cache after checkout for user {user_id}: {cache_err}")
|
||||
try:
|
||||
from api.subscription.cache import clear_dashboard_cache
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
logger.info(f"Cleared dashboard cache for user {user_id} after checkout")
|
||||
except Exception as cache_err:
|
||||
@@ -488,7 +488,7 @@ class StripeService:
|
||||
except Exception as cache_err:
|
||||
logger.warning(f"Failed to clear user cache after payment success for user {subscription.user_id}: {cache_err}")
|
||||
try:
|
||||
from api.subscription.cache import clear_dashboard_cache
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(subscription.user_id)
|
||||
except Exception as dash_cache_err:
|
||||
logger.warning(f"Failed to clear dashboard cache after payment success for user {subscription.user_id}: {dash_cache_err}")
|
||||
@@ -552,7 +552,7 @@ class StripeService:
|
||||
except Exception as cache_err:
|
||||
logger.warning(f"Failed to clear user cache after subscription update for user {subscription.user_id}: {cache_err}")
|
||||
try:
|
||||
from api.subscription.cache import clear_dashboard_cache
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(subscription.user_id)
|
||||
except Exception as dash_cache_err:
|
||||
logger.warning(f"Failed to clear dashboard cache after subscription update for user {subscription.user_id}: {dash_cache_err}")
|
||||
|
||||
@@ -38,7 +38,7 @@ from services.subscription.usage_tracking_helpers import (
|
||||
)
|
||||
# Import clear_dashboard_cache lazily to avoid circular import
|
||||
def _clear_dashboard_cache_for_user(user_id: str):
|
||||
from api.subscription.cache import clear_dashboard_cache as _clear
|
||||
from services.subscription.cache import clear_dashboard_cache as _clear
|
||||
return _clear(user_id)
|
||||
|
||||
from .usage_tracking_modules import (
|
||||
|
||||
@@ -9,6 +9,8 @@ from models.agent_activity_models import AgentAlert
|
||||
from services.agent_activity_service import AgentActivityService, build_agent_event_payload
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
from services.onboarding.progress_service import OnboardingProgressService
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
from loguru import logger
|
||||
|
||||
PILLAR_IDS = ["plan", "generate", "publish", "analyze", "engage", "remarket"]
|
||||
@@ -739,13 +741,35 @@ def _plan_uses_fallback(tasks: List[Dict[str, Any]]) -> bool:
|
||||
|
||||
async def generate_scheduled_daily_workflows() -> Dict[str, int]:
|
||||
user_ids = get_all_user_ids()
|
||||
stats = {"users_seen": 0, "created": 0, "existing": 0, "failed": 0}
|
||||
stats = {"users_seen": 0, "created": 0, "existing": 0, "skipped_no_onboarding": 0, "skipped_no_strategy": 0, "failed": 0}
|
||||
|
||||
for user_id in user_ids:
|
||||
stats["users_seen"] += 1
|
||||
db = None
|
||||
try:
|
||||
# Gate 1: Onboarding must be completed
|
||||
onboarding_service = OnboardingProgressService()
|
||||
status = onboarding_service.get_onboarding_status(user_id)
|
||||
if not status.get("is_completed", False):
|
||||
stats["skipped_no_onboarding"] += 1
|
||||
logger.info("Skipping daily workflow for user {} — onboarding not completed", user_id)
|
||||
continue
|
||||
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
stats["failed"] += 1
|
||||
continue
|
||||
|
||||
# Gate 2: User must have an active content strategy
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
has_active_strategy = active_strategy_service.has_active_strategies_with_tasks()
|
||||
if not has_active_strategy:
|
||||
stats["skipped_no_strategy"] += 1
|
||||
logger.info("Skipping daily workflow for user {} — no active strategy", user_id)
|
||||
db.close()
|
||||
db = None
|
||||
continue
|
||||
|
||||
plan, created = await get_or_create_daily_workflow_plan(
|
||||
db,
|
||||
user_id,
|
||||
|
||||
@@ -99,50 +99,57 @@ class TxtaiIntelligenceService:
|
||||
logger.error("3. Missing dependencies - try: pip install txtai[pipeline,similarity]")
|
||||
self._initialized = False
|
||||
|
||||
async def index_content(self, items: List[Tuple[str, str, Dict[str, Any]]]):
|
||||
async def index_content(self, items: List[Tuple[str, str, Dict[str, Any]]]) -> int:
|
||||
"""
|
||||
Index content for semantic search and clustering.
|
||||
Index content using incremental upsert — only processes new/changed documents.
|
||||
|
||||
Args:
|
||||
items: List of (id, text, metadata) tuples.
|
||||
|
||||
Returns:
|
||||
Number of items actually upserted.
|
||||
"""
|
||||
if not self._initialized or not self.embeddings:
|
||||
logger.error(f"Cannot index content - service not initialized for user {self.user_id}")
|
||||
return
|
||||
return 0
|
||||
|
||||
try:
|
||||
logger.info(f"Starting content indexing for user {self.user_id}")
|
||||
logger.debug(f"Indexing {len(items)} items")
|
||||
|
||||
# Validate input items
|
||||
if not items:
|
||||
logger.warning("No items provided for indexing")
|
||||
return
|
||||
return 0
|
||||
|
||||
# Index items: [(id, text, metadata)] - metadata needs to be JSON string for txtai
|
||||
import json
|
||||
processed_items = []
|
||||
for item in items:
|
||||
id_val, text, metadata = item
|
||||
# Convert metadata dict to JSON string
|
||||
metadata_json = json.dumps(metadata) if metadata else "{}"
|
||||
processed_items.append((id_val, text, metadata_json))
|
||||
|
||||
self.embeddings.index(processed_items)
|
||||
|
||||
# Save the index
|
||||
self.embeddings.upsert(processed_items)
|
||||
self.embeddings.save(self.index_path)
|
||||
logger.info(f"Successfully indexed {len(items)} items for user {self.user_id}")
|
||||
logger.debug(f"Index saved to: {self.index_path}")
|
||||
count = len(processed_items)
|
||||
logger.info(f"Upserted {count} items for user {self.user_id}")
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error indexing content for user {self.user_id}: {e}")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
logger.error(f"Items count: {len(items) if items else 0}")
|
||||
if items and len(items) > 0:
|
||||
logger.error(f"Sample item structure: {type(items[0])}")
|
||||
raise
|
||||
|
||||
async def delete_content(self, doc_ids: List[str]) -> int:
|
||||
"""Delete specific documents from the index by ID."""
|
||||
if not self._initialized or not self.embeddings:
|
||||
return 0
|
||||
try:
|
||||
self.embeddings.delete(doc_ids)
|
||||
self.embeddings.save(self.index_path)
|
||||
logger.info(f"Deleted {len(doc_ids)} documents for user {self.user_id}")
|
||||
return len(doc_ids)
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting documents: {e}")
|
||||
return 0
|
||||
|
||||
async def search(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
||||
"""Perform semantic search with intelligent caching."""
|
||||
if not self._initialized or not self.embeddings:
|
||||
@@ -154,7 +161,8 @@ class TxtaiIntelligenceService:
|
||||
if self.enable_caching and self.cache_manager:
|
||||
cached_results = self.cache_manager.get_cached_query_results(
|
||||
query=query,
|
||||
relevance_threshold=0.5 # Lower threshold for search results
|
||||
relevance_threshold=0.5, # Lower threshold for search results
|
||||
user_id=self.user_id
|
||||
)
|
||||
if cached_results:
|
||||
logger.info(f"Cache hit for search query: '{query}'")
|
||||
@@ -171,7 +179,8 @@ class TxtaiIntelligenceService:
|
||||
self.cache_manager.cache_query_results(
|
||||
query=query,
|
||||
results=results,
|
||||
relevance_threshold=0.5
|
||||
relevance_threshold=0.5,
|
||||
user_id=self.user_id
|
||||
)
|
||||
logger.debug(f"Cached search results for query: '{query}'")
|
||||
|
||||
@@ -300,8 +309,7 @@ class TxtaiIntelligenceService:
|
||||
"""Fallback clustering method when graph clustering is not available."""
|
||||
logger.info(f"Using fallback clustering for user {self.user_id}")
|
||||
|
||||
# Simple clustering based on semantic similarity
|
||||
# This is a placeholder - in production, you'd implement a proper clustering algorithm
|
||||
# Simple clustering based on semantic similarity against sample queries
|
||||
try:
|
||||
# Get a sample of indexed items to analyze
|
||||
sample_queries = ["marketing", "SEO", "content", "social media", "email marketing"]
|
||||
|
||||
493
backend/services/youtube/youtube_oauth_service.py
Normal file
493
backend/services/youtube/youtube_oauth_service.py
Normal file
@@ -0,0 +1,493 @@
|
||||
"""
|
||||
YouTube OAuth2 Service
|
||||
Handles Google OAuth2 authentication for YouTube Data API v3.
|
||||
Supports token encryption, auto-refresh, and per-user multi-token storage.
|
||||
|
||||
Pattern: follows GSCService (Google OAuth flow) + WordPressOAuthService (Fernet encryption + rich schema).
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import secrets
|
||||
import sqlite3
|
||||
from typing import Optional, Dict, Any, List
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from google.auth.transport.requests import Request as GoogleRequest
|
||||
from google.oauth2.credentials import Credentials
|
||||
from google_auth_oauthlib.flow import Flow
|
||||
from googleapiclient.discovery import build
|
||||
from cryptography.fernet import Fernet
|
||||
from loguru import logger
|
||||
|
||||
from services.database import get_user_db_path
|
||||
|
||||
|
||||
class YouTubeOAuthService:
|
||||
"""Manages YouTube OAuth2 authentication flow and token storage."""
|
||||
|
||||
SCOPES = [
|
||||
"https://www.googleapis.com/auth/youtube.upload",
|
||||
"https://www.googleapis.com/auth/youtube.readonly",
|
||||
"https://www.googleapis.com/auth/youtube.force-ssl",
|
||||
]
|
||||
|
||||
def __init__(self, db_path: Optional[str] = None):
|
||||
self.db_path = db_path
|
||||
|
||||
# Load Google OAuth credentials
|
||||
self.client_id = os.getenv("GOOGLE_CLIENT_ID", "")
|
||||
self.client_secret = os.getenv("GOOGLE_CLIENT_SECRET", "")
|
||||
self.project_id = os.getenv("GOOGLE_PROJECT_ID", "alwrity")
|
||||
|
||||
# Redirect URI
|
||||
default_redirect = "http://localhost:8000/api/youtube/oauth/callback"
|
||||
self.redirect_uri = os.getenv("YOUTUBE_REDIRECT_URI", default_redirect)
|
||||
|
||||
# Token encryption
|
||||
self.token_encryption_key = os.getenv(
|
||||
"YOUTUBE_TOKEN_ENCRYPTION_KEY"
|
||||
) or os.getenv("OAUTH_TOKEN_ENCRYPTION_KEY")
|
||||
self._fernet: Fernet = self._initialize_fernet()
|
||||
self._migration_done: set = set()
|
||||
|
||||
# Build client config for google_auth_oauthlib
|
||||
self.client_config = self._build_client_config()
|
||||
|
||||
# Validate
|
||||
if not self.client_id or not self.client_secret:
|
||||
logger.error(
|
||||
"YouTube OAuth: GOOGLE_CLIENT_ID or GOOGLE_CLIENT_SECRET not set. "
|
||||
"YouTube upload will not work until these are configured."
|
||||
)
|
||||
|
||||
def _initialize_fernet(self) -> Fernet:
|
||||
if not self.token_encryption_key:
|
||||
raise ValueError(
|
||||
"YOUTUBE_TOKEN_ENCRYPTION_KEY (or OAUTH_TOKEN_ENCRYPTION_KEY) is not set. "
|
||||
"OAuth tokens must be encrypted at rest. "
|
||||
"Generate a key: python -c \"from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())\""
|
||||
)
|
||||
try:
|
||||
return Fernet(self.token_encryption_key.encode("utf-8"))
|
||||
except Exception as e:
|
||||
raise ValueError(f"Invalid YOUTUBE_TOKEN_ENCRYPTION_KEY: {e}")
|
||||
|
||||
def _encrypt_token(self, token: Optional[str]) -> Optional[str]:
|
||||
if not token:
|
||||
return None
|
||||
return self._fernet.encrypt(token.encode("utf-8")).decode("utf-8")
|
||||
|
||||
def _decrypt_token(self, token_blob: Optional[str]) -> Optional[str]:
|
||||
if not token_blob:
|
||||
return None
|
||||
try:
|
||||
return self._fernet.decrypt(token_blob.encode("utf-8")).decode("utf-8")
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube OAuth: token decryption failed: {e}")
|
||||
return None
|
||||
|
||||
def _is_likely_encrypted_blob(self, value: Optional[str]) -> bool:
|
||||
return bool(value and value.startswith("gAAAAA"))
|
||||
|
||||
def _build_client_config(self) -> Optional[Dict[str, Any]]:
|
||||
if not self.client_id or not self.client_secret:
|
||||
return None
|
||||
return {
|
||||
"web": {
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"project_id": self.project_id,
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"redirect_uris": [self.redirect_uri],
|
||||
"javascript_origins": [],
|
||||
}
|
||||
}
|
||||
|
||||
def _get_db_path(self, user_id: str) -> str:
|
||||
return get_user_db_path(user_id)
|
||||
|
||||
def _init_db(self, user_id: str):
|
||||
db_path = self._get_db_path(user_id)
|
||||
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS youtube_oauth_tokens (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
user_id TEXT NOT NULL,
|
||||
access_token TEXT NOT NULL,
|
||||
refresh_token TEXT,
|
||||
token_type TEXT DEFAULT 'bearer',
|
||||
expires_at TIMESTAMP,
|
||||
scope TEXT,
|
||||
channel_id TEXT,
|
||||
channel_name TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
is_active BOOLEAN DEFAULT TRUE
|
||||
)
|
||||
""")
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS youtube_oauth_states (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
state TEXT NOT NULL UNIQUE,
|
||||
user_id TEXT NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
expires_at TIMESTAMP DEFAULT (datetime('now', '+10 minutes'))
|
||||
)
|
||||
""")
|
||||
conn.commit()
|
||||
logger.debug(f"YouTube OAuth tables initialized for user {user_id}")
|
||||
|
||||
def _migrate_plaintext_tokens_if_needed(self, conn: sqlite3.Connection, user_id: str) -> None:
|
||||
if not self._fernet or user_id in self._migration_done:
|
||||
return
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT id, access_token, refresh_token FROM youtube_oauth_tokens WHERE user_id = ?",
|
||||
(user_id,),
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
migrated = 0
|
||||
for token_id, access_token, refresh_token in rows:
|
||||
needs_access = access_token and not self._is_likely_encrypted_blob(access_token)
|
||||
needs_refresh = refresh_token and not self._is_likely_encrypted_blob(refresh_token)
|
||||
if not (needs_access or needs_refresh):
|
||||
continue
|
||||
enc_access = self._encrypt_token(access_token) if needs_access else access_token
|
||||
enc_refresh = self._encrypt_token(refresh_token) if needs_refresh else refresh_token
|
||||
cursor.execute(
|
||||
"UPDATE youtube_oauth_tokens SET access_token = ?, refresh_token = ?, updated_at = datetime('now') WHERE id = ? AND user_id = ?",
|
||||
(enc_access, enc_refresh, token_id, user_id),
|
||||
)
|
||||
migrated += 1
|
||||
if migrated:
|
||||
conn.commit()
|
||||
logger.info(f"YouTube OAuth token migration completed for user {user_id}; rows={migrated}")
|
||||
self._migration_done.add(user_id)
|
||||
|
||||
def generate_authorization_url(self, user_id: str) -> Optional[str]:
|
||||
"""Generate Google OAuth authorization URL for YouTube scopes."""
|
||||
try:
|
||||
if not self.client_config:
|
||||
logger.error("YouTube OAuth: client config not available")
|
||||
return None
|
||||
|
||||
self._init_db(user_id)
|
||||
|
||||
flow = Flow.from_client_config(
|
||||
self.client_config,
|
||||
scopes=self.SCOPES,
|
||||
redirect_uri=self.redirect_uri,
|
||||
autogenerate_code_verifier=False,
|
||||
)
|
||||
|
||||
random_state = secrets.token_urlsafe(32)
|
||||
state = f"{user_id}:{random_state}"
|
||||
|
||||
authorization_url, _ = flow.authorization_url(
|
||||
access_type="offline",
|
||||
include_granted_scopes="true",
|
||||
prompt="consent",
|
||||
state=state,
|
||||
)
|
||||
|
||||
# Store state for callback verification
|
||||
db_path = self._get_db_path(user_id)
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"INSERT OR REPLACE INTO youtube_oauth_states (state, user_id) VALUES (?, ?)",
|
||||
(state, user_id),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
logger.info(f"YouTube OAuth URL generated for user {user_id}")
|
||||
return authorization_url
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube OAuth: failed to generate auth URL for {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def handle_oauth_callback(self, authorization_code: str, state: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Handle OAuth callback — exchange code for tokens, store them.
|
||||
|
||||
Returns: dict with 'success' key. On success also 'channel_id', 'channel_name'.
|
||||
"""
|
||||
try:
|
||||
if ":" not in state:
|
||||
logger.error(f"YouTube OAuth: invalid state format: {state}")
|
||||
return {"success": False, "error": "Invalid state format"}
|
||||
|
||||
user_id = state.split(":")[0]
|
||||
db_path = self._get_db_path(user_id)
|
||||
|
||||
if not os.path.exists(db_path):
|
||||
logger.error(f"YouTube OAuth: user DB not found for {user_id}")
|
||||
return {"success": False, "error": "User database not found"}
|
||||
|
||||
# Verify state
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT user_id FROM youtube_oauth_states WHERE state = ?", (state,))
|
||||
if not cursor.fetchone():
|
||||
logger.error(f"YouTube OAuth: invalid/expired state for {user_id}")
|
||||
return {"success": False, "error": "Invalid or expired state"}
|
||||
|
||||
if not self.client_config:
|
||||
return {"success": False, "error": "Client config not loaded"}
|
||||
|
||||
# Exchange code for tokens
|
||||
flow = Flow.from_client_config(
|
||||
self.client_config,
|
||||
scopes=self.SCOPES,
|
||||
redirect_uri=self.redirect_uri,
|
||||
autogenerate_code_verifier=False,
|
||||
)
|
||||
flow.fetch_token(code=authorization_code)
|
||||
google_credentials = flow.credentials
|
||||
|
||||
# Clean up state
|
||||
try:
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
conn.execute("DELETE FROM youtube_oauth_states WHERE state = ?", (state,))
|
||||
conn.commit()
|
||||
except Exception as cleanup_err:
|
||||
logger.warning(f"YouTube OAuth: state cleanup failed: {cleanup_err}")
|
||||
|
||||
# Fetch channel info
|
||||
channel_info = self._fetch_channel_info(google_credentials)
|
||||
|
||||
# Save tokens
|
||||
save_result = self._save_tokens(
|
||||
user_id=user_id,
|
||||
credentials=google_credentials,
|
||||
channel_id=channel_info.get("channel_id", ""),
|
||||
channel_name=channel_info.get("channel_name", ""),
|
||||
)
|
||||
|
||||
if not save_result:
|
||||
return {"success": False, "error": "Failed to save tokens"}
|
||||
|
||||
logger.info(f"YouTube OAuth: user {user_id} authorized — channel: {channel_info.get('channel_name', 'unknown')}")
|
||||
return {
|
||||
"success": True,
|
||||
"channel_id": channel_info.get("channel_id", ""),
|
||||
"channel_name": channel_info.get("channel_name", ""),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube OAuth: callback error: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
def _fetch_channel_info(self, credentials: Credentials) -> Dict[str, str]:
|
||||
"""Fetch authenticated user's YouTube channel info."""
|
||||
try:
|
||||
youtube = build("youtube", "v3", credentials=credentials, cache_discovery=False)
|
||||
request = youtube.channels().list(part="snippet", mine=True)
|
||||
response = request.execute()
|
||||
items = response.get("items", [])
|
||||
if items:
|
||||
return {
|
||||
"channel_id": items[0].get("id", ""),
|
||||
"channel_name": items[0].get("snippet", {}).get("title", ""),
|
||||
}
|
||||
logger.warning("YouTube OAuth: no channel found for authenticated user")
|
||||
return {"channel_id": "", "channel_name": ""}
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube OAuth: failed to fetch channel info: {e}")
|
||||
return {"channel_id": "", "channel_name": ""}
|
||||
|
||||
def _save_tokens(
|
||||
self,
|
||||
user_id: str,
|
||||
credentials: Credentials,
|
||||
channel_id: str = "",
|
||||
channel_name: str = "",
|
||||
) -> bool:
|
||||
"""Save OAuth tokens to per-user database with encryption."""
|
||||
try:
|
||||
self._init_db(user_id)
|
||||
db_path = self._get_db_path(user_id)
|
||||
|
||||
expires_at = None
|
||||
if credentials.expiry:
|
||||
expires_at = credentials.expiry.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
enc_access = self._encrypt_token(credentials.token) or ""
|
||||
enc_refresh = self._encrypt_token(credentials.refresh_token)
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
self._migrate_plaintext_tokens_if_needed(conn, user_id)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO youtube_oauth_tokens
|
||||
(user_id, access_token, refresh_token, token_type, expires_at, scope, channel_id, channel_name)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
user_id,
|
||||
enc_access,
|
||||
enc_refresh,
|
||||
"bearer",
|
||||
expires_at,
|
||||
" ".join(self.SCOPES),
|
||||
channel_id,
|
||||
channel_name,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
logger.info(f"YouTube OAuth: tokens saved for user {user_id}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube OAuth: failed to save tokens for {user_id}: {e}")
|
||||
return False
|
||||
|
||||
def get_valid_credentials(self, user_id: str, token_id: Optional[int] = None) -> Optional[Credentials]:
|
||||
"""
|
||||
Load and (if needed) refresh credentials for a user.
|
||||
|
||||
Args:
|
||||
user_id: Clerk user ID
|
||||
token_id: Specific token row ID; if None, uses the most recent active token.
|
||||
|
||||
Returns:
|
||||
google.oauth2.credentials.Credentials or None
|
||||
"""
|
||||
try:
|
||||
db_path = self._get_db_path(user_id)
|
||||
if not os.path.exists(db_path):
|
||||
return None
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
if token_id:
|
||||
cursor.execute(
|
||||
"SELECT id, access_token, refresh_token, expires_at FROM youtube_oauth_tokens WHERE id = ? AND user_id = ? AND is_active = 1",
|
||||
(token_id, user_id),
|
||||
)
|
||||
else:
|
||||
cursor.execute(
|
||||
"SELECT id, access_token, refresh_token, expires_at FROM youtube_oauth_tokens WHERE user_id = ? AND is_active = 1 ORDER BY created_at DESC LIMIT 1",
|
||||
(user_id,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
|
||||
if not row:
|
||||
logger.warning(f"YouTube OAuth: no active tokens for user {user_id}")
|
||||
return None
|
||||
|
||||
db_id, enc_access, enc_refresh, expires_at_str = row
|
||||
|
||||
access_token = self._decrypt_token(enc_access)
|
||||
refresh_token = self._decrypt_token(enc_refresh)
|
||||
|
||||
if not access_token:
|
||||
logger.error(f"YouTube OAuth: cannot decrypt access token for user {user_id}")
|
||||
return None
|
||||
|
||||
# Build Credentials object (Google lib handles refresh automatically)
|
||||
creds = Credentials(
|
||||
token=access_token,
|
||||
refresh_token=refresh_token,
|
||||
token_uri="https://oauth2.googleapis.com/token",
|
||||
client_id=self.client_id,
|
||||
client_secret=self.client_secret,
|
||||
scopes=self.SCOPES,
|
||||
)
|
||||
|
||||
# Auto-refresh if expired
|
||||
if creds.expired:
|
||||
if creds.refresh_token:
|
||||
try:
|
||||
creds.refresh(GoogleRequest())
|
||||
self._update_stored_token(user_id, db_id, creds)
|
||||
logger.info(f"YouTube OAuth: token refreshed for user {user_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube OAuth: token refresh failed for {user_id}: {e}")
|
||||
return None
|
||||
else:
|
||||
logger.warning(f"YouTube OAuth: token expired, no refresh token for {user_id}")
|
||||
return None
|
||||
|
||||
return creds
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube OAuth: get_valid_credentials error for {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def _update_stored_token(self, user_id: str, token_id: int, credentials: Credentials):
|
||||
"""Update stored token after refresh."""
|
||||
try:
|
||||
db_path = self._get_db_path(user_id)
|
||||
enc_access = self._encrypt_token(credentials.token) or ""
|
||||
enc_refresh = self._encrypt_token(credentials.refresh_token)
|
||||
expires_at = None
|
||||
if credentials.expiry:
|
||||
expires_at = credentials.expiry.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
conn.execute(
|
||||
"UPDATE youtube_oauth_tokens SET access_token = ?, refresh_token = ?, expires_at = ?, updated_at = datetime('now') WHERE id = ? AND user_id = ?",
|
||||
(enc_access, enc_refresh, expires_at, token_id, user_id),
|
||||
)
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube OAuth: failed to update stored token for {user_id}: {e}")
|
||||
|
||||
def get_connection_status(self, user_id: str) -> Dict[str, Any]:
|
||||
"""Get YouTube connection status for a user."""
|
||||
try:
|
||||
db_path = self._get_db_path(user_id)
|
||||
if not os.path.exists(db_path):
|
||||
return {"connected": False, "channels": []}
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT id, channel_id, channel_name, expires_at, created_at, is_active FROM youtube_oauth_tokens WHERE user_id = ? ORDER BY created_at DESC",
|
||||
(user_id,),
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
|
||||
channels = []
|
||||
for row in rows:
|
||||
channel = {
|
||||
"token_id": row[0],
|
||||
"channel_id": row[1] or "",
|
||||
"channel_name": row[2] or "",
|
||||
"expires_at": row[3],
|
||||
"connected_at": row[4],
|
||||
"is_active": bool(row[5]),
|
||||
}
|
||||
channels.append(channel)
|
||||
|
||||
return {"connected": len(channels) > 0, "channels": channels}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube OAuth: connection status error for {user_id}: {e}")
|
||||
return {"connected": False, "channels": [], "error": str(e)}
|
||||
|
||||
def revoke_token(self, user_id: str, token_id: int) -> bool:
|
||||
"""Deactivate a specific token."""
|
||||
try:
|
||||
db_path = self._get_db_path(user_id)
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
conn.execute(
|
||||
"UPDATE youtube_oauth_tokens SET is_active = 0, updated_at = datetime('now') WHERE id = ? AND user_id = ?",
|
||||
(token_id, user_id),
|
||||
)
|
||||
conn.commit()
|
||||
logger.info(f"YouTube OAuth: token {token_id} revoked for user {user_id}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube OAuth: revoke error for {user_id}: {e}")
|
||||
return False
|
||||
230
backend/services/youtube/youtube_publish_service.py
Normal file
230
backend/services/youtube/youtube_publish_service.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""
|
||||
YouTube Publish Service
|
||||
|
||||
Uploads videos to YouTube via the YouTube Data API v3.
|
||||
Uses stored OAuth credentials from YouTubeOAuthService.
|
||||
Supports resumable upload for large files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Optional, Dict, Any, List
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from googleapiclient.discovery import build
|
||||
from googleapiclient.http import MediaFileUpload
|
||||
from google.oauth2.credentials import Credentials as GoogleCredentials
|
||||
from loguru import logger
|
||||
|
||||
from services.youtube.youtube_oauth_service import YouTubeOAuthService
|
||||
|
||||
|
||||
class YouTubePublishService:
|
||||
"""Upload videos to YouTube using stored OAuth credentials."""
|
||||
|
||||
MAX_RETRIES = 3
|
||||
CHUNK_SIZE = 50 * 1024 * 1024 # 50MB chunks for resumable upload
|
||||
DOWNLOAD_TIMEOUT = 300 # 5 minutes to download source video
|
||||
|
||||
def __init__(self, oauth_service: YouTubeOAuthService):
|
||||
self.oauth_service = oauth_service
|
||||
|
||||
def publish_video(
|
||||
self,
|
||||
user_id: str,
|
||||
token_id: int,
|
||||
video_source: str,
|
||||
title: str,
|
||||
description: str = "",
|
||||
tags: Optional[List[str]] = None,
|
||||
privacy_status: str = "unlisted",
|
||||
category_id: str = "22",
|
||||
made_for_kids: bool = False,
|
||||
language: str = "en",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Upload a video to YouTube.
|
||||
|
||||
Args:
|
||||
user_id: Clerk user ID
|
||||
token_id: OAuth token row ID (which YouTube channel to publish to)
|
||||
video_source: URL or local file path to the video
|
||||
title: Video title (max 100 chars)
|
||||
description: Video description
|
||||
tags: List of tags
|
||||
privacy_status: 'public', 'private', or 'unlisted'
|
||||
category_id: YouTube category ID (default '22' = People & Blogs)
|
||||
made_for_kids: Whether content is made for children
|
||||
language: Video language (ISO 639-1 code)
|
||||
|
||||
Returns:
|
||||
dict with 'success', 'video_id', 'video_url', 'error' keys
|
||||
"""
|
||||
temp_path = None
|
||||
is_temp = False
|
||||
try:
|
||||
# Validate title length
|
||||
if len(title) > 100:
|
||||
title = title[:97] + "..."
|
||||
|
||||
# Get valid credentials
|
||||
creds = self.oauth_service.get_valid_credentials(user_id, token_id)
|
||||
if not creds:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "YouTube auth failed. Please reconnect your YouTube channel.",
|
||||
}
|
||||
|
||||
# Resolve video file path (download if URL)
|
||||
video_path, was_downloaded = self._resolve_video_source(video_source)
|
||||
if not video_path:
|
||||
return {"success": False, "error": "Video source file not found or could not be downloaded."}
|
||||
|
||||
temp_path = video_path
|
||||
is_temp = was_downloaded
|
||||
|
||||
# Validate file
|
||||
file_size = os.path.getsize(video_path)
|
||||
if file_size == 0:
|
||||
return {"success": False, "error": "Video file is empty."}
|
||||
|
||||
logger.info(
|
||||
f"YouTube publish: starting upload for user {user_id}, "
|
||||
f"title='{title}', size={file_size / 1024 / 1024:.1f}MB, privacy={privacy_status}"
|
||||
)
|
||||
|
||||
# Build YouTube API client
|
||||
youtube = build("youtube", "v3", credentials=creds, cache_discovery=False)
|
||||
|
||||
# Prepare video metadata
|
||||
body = {
|
||||
"snippet": {
|
||||
"title": title,
|
||||
"description": description,
|
||||
"tags": tags or [],
|
||||
"categoryId": category_id,
|
||||
"defaultLanguage": language,
|
||||
},
|
||||
"status": {
|
||||
"privacyStatus": privacy_status,
|
||||
"selfDeclaredMadeForKids": made_for_kids,
|
||||
},
|
||||
}
|
||||
|
||||
# Upload with resumable media
|
||||
media = MediaFileUpload(
|
||||
video_path,
|
||||
chunksize=self.CHUNK_SIZE,
|
||||
resumable=True,
|
||||
)
|
||||
|
||||
request = youtube.videos().insert(
|
||||
part=",".join(body.keys()),
|
||||
body=body,
|
||||
media_body=media,
|
||||
)
|
||||
|
||||
response = None
|
||||
last_error = None
|
||||
|
||||
for attempt in range(self.MAX_RETRIES):
|
||||
try:
|
||||
response = request.execute()
|
||||
break
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.warning(
|
||||
f"YouTube publish upload attempt {attempt + 1}/{self.MAX_RETRIES} "
|
||||
f"failed for user {user_id}: {e}"
|
||||
)
|
||||
if attempt < self.MAX_RETRIES - 1:
|
||||
import time
|
||||
time.sleep(2 ** attempt)
|
||||
|
||||
if not response:
|
||||
error_msg = str(last_error or "Upload failed after retries")
|
||||
logger.error(f"YouTube publish: upload failed for user {user_id}: {error_msg}")
|
||||
return {"success": False, "error": error_msg}
|
||||
|
||||
video_id = response.get("id", "")
|
||||
video_url = f"https://youtu.be/{video_id}" if video_id else ""
|
||||
|
||||
logger.info(
|
||||
f"YouTube publish: upload complete for user {user_id} — "
|
||||
f"video_id={video_id}, url={video_url}"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"video_id": video_id,
|
||||
"video_url": video_url,
|
||||
"title": title,
|
||||
"privacy_status": privacy_status,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube publish: error for user {user_id}: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
finally:
|
||||
if temp_path and is_temp:
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _resolve_video_source(self, video_source: str):
|
||||
"""
|
||||
Resolve video source to a local file path.
|
||||
Returns (path, is_temp) tuple. If video_source is a URL, download it to a temp file.
|
||||
"""
|
||||
if video_source.startswith(("http://", "https://", "ftp://")):
|
||||
path = self._download_video(video_source)
|
||||
return (path, True) if path else (None, False)
|
||||
|
||||
local_path = Path(video_source)
|
||||
if local_path.exists():
|
||||
return (str(local_path.resolve()), False)
|
||||
|
||||
logger.error(f"YouTube publish: video source not found: {video_source}")
|
||||
return (None, False)
|
||||
|
||||
def _download_video(self, url: str) -> Optional[str]:
|
||||
"""Download a video from URL to a temporary file."""
|
||||
try:
|
||||
suffix = self._guess_extension(url) or ".mp4"
|
||||
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
||||
tmp_path = tmp.name
|
||||
tmp.close()
|
||||
|
||||
logger.info(f"YouTube publish: downloading video from {url}")
|
||||
|
||||
with httpx.Client(timeout=self.DOWNLOAD_TIMEOUT, follow_redirects=True) as client:
|
||||
with client.stream("GET", url) as response:
|
||||
response.raise_for_status()
|
||||
with open(tmp_path, "wb") as f:
|
||||
for chunk in response.iter_bytes(chunk_size=8 * 1024 * 1024):
|
||||
f.write(chunk)
|
||||
|
||||
file_size = os.path.getsize(tmp_path)
|
||||
logger.info(f"YouTube publish: downloaded {file_size / 1024 / 1024:.1f}MB to {tmp_path}")
|
||||
return tmp_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"YouTube publish: download failed from {url}: {e}")
|
||||
if "tmp_path" in locals():
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _guess_extension(url: str) -> str:
|
||||
"""Guess file extension from URL."""
|
||||
path = url.split("?")[0] # Strip query params
|
||||
_, ext = os.path.splitext(path)
|
||||
if ext.lower() in (".mp4", ".mov", ".avi", ".mkv", ".webm", ".flv", ".wmv"):
|
||||
return ext
|
||||
return ".mp4"
|
||||
Reference in New Issue
Block a user