Added onboarding progress tracking & landing page

This commit is contained in:
ajaysi
2025-10-02 13:20:15 +05:30
parent e57d2577f8
commit 510b79bbf8
135 changed files with 25917 additions and 5768 deletions

View File

@@ -6,6 +6,7 @@ replacing mock research with real-time industry information.
Available Services:
- GoogleSearchService: Real-time industry research using Google Custom Search API
- ExaService: Competitor discovery and analysis using Exa API
- Source ranking and credibility assessment
- Content extraction and insight generation
@@ -14,8 +15,10 @@ Version: 1.0
Last Updated: January 2025
"""
from services.research.google_search_service import GoogleSearchService
from .google_search_service import GoogleSearchService
from .exa_service import ExaService
__all__ = [
"GoogleSearchService"
"GoogleSearchService",
"ExaService"
]

View File

@@ -0,0 +1,270 @@
"""
AI Prompts for Competitor Analysis
This module contains prompts for analyzing competitor data from Exa API
to generate actionable insights for content strategy and competitive positioning.
"""
COMPETITOR_ANALYSIS_PROMPT = """
You are a competitive intelligence analyst specializing in content strategy and market positioning.
**TASK**: Analyze competitor data to provide actionable insights for content strategy and competitive positioning.
**COMPETITOR DATA**:
{competitor_context}
**USER'S WEBSITE**: {user_url}
**INDUSTRY CONTEXT**: {industry_context}
**ANALYSIS REQUIREMENTS**:
1. **Market Position Analysis**
- Identify the competitive landscape structure
- Determine market leaders vs. challengers
- Assess market saturation and opportunities
2. **Content Strategy Insights**
- Analyze competitor content themes and topics
- Identify content gaps and opportunities
- Suggest unique content angles for differentiation
3. **Competitive Advantages**
- Highlight what makes each competitor unique
- Identify areas where the user can differentiate
- Suggest positioning strategies
4. **SEO and Marketing Insights**
- Analyze competitor positioning and messaging
- Identify keyword and content opportunities
- Suggest marketing strategies
**OUTPUT FORMAT** (JSON):
{{
"market_analysis": {{
"competitive_landscape": "Description of market structure",
"market_leaders": ["List of top 3 competitors"],
"market_opportunities": ["List of 3-5 opportunities"],
"saturation_level": "high/medium/low"
}},
"content_strategy": {{
"common_themes": ["List of common content themes"],
"content_gaps": ["List of 5 content opportunities"],
"unique_angles": ["List of 3 unique content angles"],
"content_frequency_insights": "Analysis of publishing patterns"
}},
"competitive_positioning": {{
"differentiation_opportunities": ["List of 5 ways to differentiate"],
"unique_value_propositions": ["List of 3 unique positioning ideas"],
"target_audience_insights": "Analysis of competitor audience targeting"
}},
"seo_opportunities": {{
"keyword_gaps": ["List of 5 keyword opportunities"],
"content_topics": ["List of 5 high-value content topics"],
"marketing_channels": ["List of competitor marketing strategies"]
}},
"actionable_recommendations": [
"List of 5 specific, actionable recommendations"
],
"risk_assessment": {{
"competitive_threats": ["List of 3 main threats"],
"market_barriers": ["List of 2-3 barriers to entry"],
"success_factors": ["List of 3 key success factors"]
}}
}}
**INSTRUCTIONS**:
- Be specific and actionable in your recommendations
- Focus on opportunities for differentiation
- Consider the user's industry context
- Prioritize recommendations by impact and feasibility
- Use data from the competitor analysis to support insights
- Keep recommendations practical and implementable
**QUALITY STANDARDS**:
- Each recommendation should be specific and actionable
- Insights should be based on actual competitor data
- Focus on differentiation and competitive advantage
- Consider both short-term and long-term strategies
- Ensure recommendations are relevant to the user's industry
"""
CONTENT_GAP_ANALYSIS_PROMPT = """
You are a content strategist analyzing competitor content to identify gaps and opportunities.
**TASK**: Analyze competitor content patterns to identify content gaps and opportunities.
**COMPETITOR CONTENT DATA**:
{competitor_context}
**USER'S INDUSTRY**: {industry_context}
**TARGET AUDIENCE**: {target_audience}
**ANALYSIS FOCUS**:
1. **Content Topic Analysis**
- Identify most common content topics across competitors
- Find underserved or missing topics
- Analyze content depth and quality patterns
2. **Content Format Opportunities**
- Identify popular content formats among competitors
- Find format gaps and opportunities
- Suggest innovative content approaches
3. **Audience Targeting Gaps**
- Analyze competitor audience targeting
- Identify underserved audience segments
- Suggest audience expansion opportunities
4. **SEO Content Opportunities**
- Identify high-value keywords competitors are missing
- Find long-tail keyword opportunities
- Suggest content clusters for SEO
**OUTPUT FORMAT** (JSON):
{{
"content_gaps": [
{{
"topic": "Specific content topic",
"opportunity_level": "high/medium/low",
"reasoning": "Why this is an opportunity",
"content_angle": "Unique angle for this topic",
"estimated_difficulty": "easy/medium/hard"
}}
],
"format_opportunities": [
{{
"format": "Content format type",
"gap_reason": "Why competitors aren't using this",
"potential_impact": "Expected impact level",
"implementation_tips": "How to implement"
}}
],
"audience_gaps": [
{{
"audience_segment": "Underserved audience",
"opportunity_size": "large/medium/small",
"content_needs": "What content this audience needs",
"engagement_strategy": "How to engage this audience"
}}
],
"seo_opportunities": [
{{
"keyword_theme": "Keyword cluster theme",
"search_volume": "estimated_high/medium/low",
"competition_level": "low/medium/high",
"content_ideas": ["3-5 content ideas for this theme"]
}}
],
"priority_recommendations": [
"Top 5 prioritized content opportunities with implementation order"
]
}}
"""
COMPETITIVE_INTELLIGENCE_PROMPT = """
You are a competitive intelligence expert providing strategic insights for market positioning.
**TASK**: Generate comprehensive competitive intelligence insights for strategic decision-making.
**COMPETITOR INTELLIGENCE DATA**:
{competitor_context}
**BUSINESS CONTEXT**:
- User Website: {user_url}
- Industry: {industry_context}
- Business Model: {business_model}
- Target Market: {target_market}
**INTELLIGENCE AREAS**:
1. **Competitive Landscape Mapping**
- Market positioning analysis
- Competitive strength assessment
- Market share estimation
2. **Strategic Positioning Opportunities**
- Blue ocean opportunities
- Differentiation strategies
- Competitive moats
3. **Threat Assessment**
- Competitive threats
- Market disruption risks
- Barrier to entry analysis
4. **Growth Strategy Insights**
- Market expansion opportunities
- Partnership possibilities
- Acquisition targets
**OUTPUT FORMAT** (JSON):
{{
"competitive_landscape": {{
"market_structure": "Description of market structure",
"key_players": [
{{
"name": "Competitor name",
"position": "market_leader/challenger/niche",
"strengths": ["List of key strengths"],
"weaknesses": ["List of key weaknesses"],
"market_share": "estimated_percentage"
}}
],
"market_dynamics": "Analysis of market trends and forces"
}},
"positioning_opportunities": {{
"blue_ocean_opportunities": ["List of uncontested market spaces"],
"differentiation_strategies": ["List of positioning strategies"],
"competitive_advantages": ["List of potential advantages to build"]
}},
"threat_analysis": {{
"immediate_threats": ["List of current competitive threats"],
"future_risks": ["List of potential future risks"],
"market_barriers": ["List of barriers to success"]
}},
"strategic_recommendations": {{
"short_term_actions": ["List of 3-5 immediate actions"],
"medium_term_strategy": ["List of 3-5 strategic initiatives"],
"long_term_vision": ["List of 2-3 long-term strategic goals"]
}},
"success_metrics": {{
"kpis_to_track": ["List of key performance indicators"],
"competitive_benchmarks": ["List of metrics to benchmark against"],
"success_thresholds": ["List of success criteria"]
}}
}}
"""
# Utility function to format prompts with data
def format_competitor_analysis_prompt(competitor_context: str, user_url: str, industry_context: str = None) -> str:
"""Format the competitor analysis prompt with actual data."""
return COMPETITOR_ANALYSIS_PROMPT.format(
competitor_context=competitor_context,
user_url=user_url,
industry_context=industry_context or "Not specified"
)
def format_content_gap_prompt(competitor_context: str, industry_context: str = None, target_audience: str = None) -> str:
"""Format the content gap analysis prompt with actual data."""
return CONTENT_GAP_ANALYSIS_PROMPT.format(
competitor_context=competitor_context,
industry_context=industry_context or "Not specified",
target_audience=target_audience or "Not specified"
)
def format_competitive_intelligence_prompt(
competitor_context: str,
user_url: str,
industry_context: str = None,
business_model: str = None,
target_market: str = None
) -> str:
"""Format the competitive intelligence prompt with actual data."""
return COMPETITIVE_INTELLIGENCE_PROMPT.format(
competitor_context=competitor_context,
user_url=user_url,
industry_context=industry_context or "Not specified",
business_model=business_model or "Not specified",
target_market=target_market or "Not specified"
)

View File

@@ -0,0 +1,769 @@
"""
Exa API Service for ALwrity
This service provides competitor discovery and analysis using the Exa API,
which uses neural search to find semantically similar websites and content.
Key Features:
- Competitor discovery using neural search
- Content analysis and summarization
- Competitive intelligence gathering
- Cost-effective API usage with caching
- Integration with onboarding Step 3
Dependencies:
- aiohttp (for async HTTP requests)
- os (for environment variables)
- logging (for debugging)
Author: ALwrity Team
Version: 1.0
Last Updated: January 2025
"""
import os
import json
import asyncio
from typing import Dict, List, Optional, Any, Union
from datetime import datetime, timedelta
from loguru import logger
from urllib.parse import urlparse
from exa_py import Exa
class ExaService:
"""
Service for competitor discovery and analysis using the Exa API.
This service provides neural search capabilities to find semantically similar
websites and analyze their content for competitive intelligence.
"""
def __init__(self):
"""Initialize the Exa Service with API credentials."""
self.api_key = os.getenv("EXA_API_KEY")
if not self.api_key:
raise ValueError("Exa API key not configured. Please set EXA_API_KEY environment variable.")
else:
self.exa = Exa(api_key=self.api_key)
self.enabled = True
logger.info("Exa Service initialized successfully")
async def discover_competitors(
self,
user_url: str,
num_results: int = 10,
include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None,
industry_context: Optional[str] = None,
website_analysis_data: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Discover competitors for a given website using Exa's neural search.
Args:
user_url: The website URL to find competitors for
num_results: Number of competitor results to return (max 100)
include_domains: List of domains to include in search
exclude_domains: List of domains to exclude from search
industry_context: Industry context for better competitor discovery
Returns:
Dictionary containing competitor analysis results
"""
try:
if not self.enabled:
raise ValueError("Exa Service is not enabled - API key missing")
logger.info(f"Starting competitor discovery for: {user_url}")
# Extract user domain for exclusion
user_domain = urlparse(user_url).netloc
exclude_domains_list = exclude_domains or []
exclude_domains_list.append(user_domain)
logger.info(f"Excluding domains: {exclude_domains_list}")
# Extract insights from website analysis for better targeting
include_text_queries = []
summary_query = f"Business model, target audience, content strategy{f' in {industry_context}' if industry_context else ''}"
if website_analysis_data:
analysis = website_analysis_data.get('analysis', {})
# Extract key business terms from the analysis
if 'target_audience' in analysis:
audience = analysis['target_audience']
if isinstance(audience, dict) and 'primary_audience' in audience:
primary_audience = audience['primary_audience']
if len(primary_audience.split()) <= 5: # Exa limit
include_text_queries.append(primary_audience)
# Use industry context from analysis
if 'industry' in analysis and analysis['industry']:
industry = analysis['industry']
if len(industry.split()) <= 5:
include_text_queries.append(industry)
# Enhance summary query with analysis insights
if 'content_type' in analysis:
content_type = analysis['content_type']
summary_query += f", {content_type} content strategy"
logger.info(f"Enhanced targeting with analysis data: {include_text_queries}")
# Use the Exa SDK to find similar links with content and context
search_result = self.exa.find_similar_and_contents(
url=user_url,
num_results=min(num_results, 10), # Exa API limit
include_domains=include_domains,
exclude_domains=exclude_domains_list,
include_text=include_text_queries if include_text_queries else None,
text=True,
highlights={
"numSentences": 2,
"highlightsPerUrl": 3,
"query": "Unique value proposition, competitive advantages, market position"
},
summary={
"query": summary_query
}
)
# TODO: Add context generation once SDK supports it
# For now, we'll generate a basic context from the results
context_result = None
# Log the raw Exa API response summary (avoiding verbose markdown content)
logger.info(f"📊 Exa API response for {user_url}:")
logger.info(f" ├─ Request ID: {getattr(search_result, 'request_id', 'N/A')}")
logger.info(f" ├─ Results count: {len(getattr(search_result, 'results', []))}")
logger.info(f" └─ Cost: ${getattr(getattr(search_result, 'cost_dollars', None), 'total', 0)}")
# Note: Full raw response contains verbose markdown content - logging only summary
# To see full response, set EXA_DEBUG=true in environment
# Extract results from search
results = getattr(search_result, 'results', [])
# Log summary of results
logger.info(f" - Found {len(results)} competitors")
# Process and structure the results
competitors = self._process_competitor_results(search_result, user_url)
logger.info(f"Successfully discovered {len(competitors)} competitors for {user_url}")
return {
"success": True,
"user_url": user_url,
"competitors": competitors,
"total_competitors": len(competitors),
"analysis_timestamp": datetime.utcnow().isoformat(),
"industry_context": industry_context,
"api_cost": getattr(getattr(search_result, 'cost_dollars', None), 'total', 0) if hasattr(search_result, 'cost_dollars') and getattr(search_result, 'cost_dollars', None) else 0,
"request_id": getattr(search_result, 'request_id', None) if hasattr(search_result, 'request_id') else None
}
except asyncio.TimeoutError:
logger.error("Exa API request timed out")
return {
"success": False,
"error": "Request timed out",
"details": "The competitor discovery request took too long to complete"
}
except Exception as e:
logger.error(f"Error in competitor discovery: {str(e)}")
return {
"success": False,
"error": str(e),
"details": "An unexpected error occurred during competitor discovery"
}
def _process_competitor_results(self, search_result, user_url: str) -> List[Dict[str, Any]]:
"""
Process and structure the Exa SDK response into competitor data.
Args:
search_result: Response from Exa SDK
user_url: Original user URL for reference
Returns:
List of processed competitor data
"""
competitors = []
user_domain = urlparse(user_url).netloc
# Extract results from the SDK response
results = getattr(search_result, 'results', [])
for result in results:
try:
# Extract basic information from the result object
competitor_url = getattr(result, 'url', '')
competitor_domain = urlparse(competitor_url).netloc
# Skip if it's the same domain as the user
if competitor_domain == user_domain:
continue
# Extract content insights
summary = getattr(result, 'summary', '')
highlights = getattr(result, 'highlights', [])
highlight_scores = getattr(result, 'highlight_scores', [])
# Calculate competitive relevance score
relevance_score = self._calculate_relevance_score(result, user_url)
competitor_data = {
"url": competitor_url,
"domain": competitor_domain,
"title": getattr(result, 'title', ''),
"published_date": getattr(result, 'published_date', None),
"author": getattr(result, 'author', None),
"favicon": getattr(result, 'favicon', None),
"image": getattr(result, 'image', None),
"summary": summary,
"highlights": highlights,
"highlight_scores": highlight_scores,
"relevance_score": relevance_score,
"competitive_insights": self._extract_competitive_insights(summary, highlights),
"content_analysis": self._analyze_content_quality(result)
}
competitors.append(competitor_data)
except Exception as e:
logger.warning(f"Error processing competitor result: {str(e)}")
continue
# Sort by relevance score (highest first)
competitors.sort(key=lambda x: x["relevance_score"], reverse=True)
return competitors
def _calculate_relevance_score(self, result, user_url: str) -> float:
"""
Calculate a relevance score for competitor ranking.
Args:
result: Competitor result from Exa SDK
user_url: Original user URL
Returns:
Relevance score between 0 and 1
"""
score = 0.0
# Base score from highlight scores
highlight_scores = getattr(result, 'highlight_scores', [])
if highlight_scores:
score += sum(highlight_scores) / len(highlight_scores) * 0.4
# Score from summary quality
summary = getattr(result, 'summary', '')
if summary and len(summary) > 100:
score += 0.3
# Score from title relevance
title = getattr(result, 'title', '').lower()
if any(keyword in title for keyword in ["business", "company", "service", "solution", "platform"]):
score += 0.2
# Score from URL structure similarity
competitor_url = getattr(result, 'url', '')
if self._url_structure_similarity(user_url, competitor_url) > 0.5:
score += 0.1
return min(score, 1.0)
def _url_structure_similarity(self, url1: str, url2: str) -> float:
"""
Calculate URL structure similarity.
Args:
url1: First URL
url2: Second URL
Returns:
Similarity score between 0 and 1
"""
try:
parsed1 = urlparse(url1)
parsed2 = urlparse(url2)
# Compare path structure
path1_parts = [part for part in parsed1.path.split('/') if part]
path2_parts = [part for part in parsed2.path.split('/') if part]
if not path1_parts or not path2_parts:
return 0.0
# Calculate similarity based on path length and structure
max_parts = max(len(path1_parts), len(path2_parts))
common_parts = sum(1 for p1, p2 in zip(path1_parts, path2_parts) if p1 == p2)
return common_parts / max_parts
except Exception:
return 0.0
def _extract_competitive_insights(self, summary: str, highlights: List[str]) -> Dict[str, Any]:
"""
Extract competitive insights from summary and highlights.
Args:
summary: Content summary
highlights: Content highlights
Returns:
Dictionary of competitive insights
"""
insights = {
"business_model": "",
"target_audience": "",
"value_proposition": "",
"competitive_advantages": [],
"content_strategy": ""
}
# Combine summary and highlights for analysis
content = f"{summary} {' '.join(highlights)}".lower()
# Extract business model indicators
business_models = ["saas", "platform", "service", "product", "consulting", "agency", "marketplace"]
for model in business_models:
if model in content:
insights["business_model"] = model.title()
break
# Extract target audience indicators
audiences = ["enterprise", "small business", "startups", "developers", "marketers", "consumers"]
for audience in audiences:
if audience in content:
insights["target_audience"] = audience.title()
break
# Extract value proposition from highlights
if highlights:
insights["value_proposition"] = highlights[0][:100] + "..." if len(highlights[0]) > 100 else highlights[0]
return insights
def _analyze_content_quality(self, result) -> Dict[str, Any]:
"""
Analyze the content quality of a competitor.
Args:
result: Competitor result from Exa SDK
Returns:
Dictionary of content quality metrics
"""
quality_metrics = {
"content_depth": "medium",
"technical_sophistication": "medium",
"content_freshness": "unknown",
"engagement_potential": "medium"
}
# Analyze content depth from summary length
summary = getattr(result, 'summary', '')
if len(summary) > 300:
quality_metrics["content_depth"] = "high"
elif len(summary) < 100:
quality_metrics["content_depth"] = "low"
# Analyze technical sophistication
technical_keywords = ["api", "integration", "automation", "analytics", "data", "platform"]
highlights = getattr(result, 'highlights', [])
content_text = f"{summary} {' '.join(highlights)}".lower()
technical_count = sum(1 for keyword in technical_keywords if keyword in content_text)
if technical_count >= 3:
quality_metrics["technical_sophistication"] = "high"
elif technical_count == 0:
quality_metrics["technical_sophistication"] = "low"
return quality_metrics
async def discover_social_media_accounts(self, user_url: str) -> Dict[str, Any]:
"""
Discover social media accounts for a given website using Exa's answer API.
Args:
user_url: The website URL to find social media accounts for
Returns:
Dictionary containing social media discovery results
"""
try:
if not self.enabled:
raise ValueError("Exa Service is not enabled - API key missing")
logger.info(f"Starting social media discovery for: {user_url}")
# Extract domain from URL for better targeting
domain = urlparse(user_url).netloc.replace('www.', '')
# Use Exa's answer API to find social media accounts
result = self.exa.answer(
f"Find all social media accounts of the url: {domain}. Return a JSON object with facebook, twitter, instagram, linkedin, youtube, and tiktok fields containing the URLs or empty strings if not found.",
model="exa-pro",
text=True
)
# Log the raw Exa API response for debugging
logger.info(f"Raw Exa social media response for {user_url}:")
logger.info(f" - Request ID: {getattr(result, 'request_id', 'N/A')}")
logger.info(f" └─ Cost: ${getattr(getattr(result, 'cost_dollars', None), 'total', 0)}")
# Note: Full raw response contains verbose content - logging only summary
# To see full response, set EXA_DEBUG=true in environment
# Extract social media data
answer_text = getattr(result, 'answer', '')
citations = getattr(result, 'citations', [])
# Convert AnswerResult objects to dictionaries for JSON serialization
citations_dicts = []
for citation in citations:
if hasattr(citation, '__dict__'):
# Convert object to dictionary
citation_dict = {
'id': getattr(citation, 'id', ''),
'title': getattr(citation, 'title', ''),
'url': getattr(citation, 'url', ''),
'text': getattr(citation, 'text', ''),
'snippet': getattr(citation, 'snippet', ''),
'published_date': getattr(citation, 'published_date', None),
'author': getattr(citation, 'author', None),
'image': getattr(citation, 'image', None),
'favicon': getattr(citation, 'favicon', None)
}
citations_dicts.append(citation_dict)
else:
# If it's already a dict, use as is
citations_dicts.append(citation)
logger.info(f" - Raw answer text: {answer_text}")
logger.info(f" - Citations count: {len(citations_dicts)}")
# Parse the response from the answer (could be JSON or markdown format)
try:
import json
import re
if answer_text.strip().startswith('{'):
# Direct JSON format
answer_data = json.loads(answer_text.strip())
else:
# Parse markdown format with URLs
answer_data = {
"facebook": "",
"twitter": "",
"instagram": "",
"linkedin": "",
"youtube": "",
"tiktok": ""
}
# Extract URLs using regex patterns
facebook_match = re.search(r'Facebook.*?\[([^\]]+)\]', answer_text)
if facebook_match:
answer_data["facebook"] = facebook_match.group(1)
twitter_match = re.search(r'Twitter.*?\[([^\]]+)\]', answer_text)
if twitter_match:
answer_data["twitter"] = twitter_match.group(1)
instagram_match = re.search(r'Instagram.*?\[([^\]]+)\]', answer_text)
if instagram_match:
answer_data["instagram"] = instagram_match.group(1)
linkedin_match = re.search(r'LinkedIn.*?\[([^\]]+)\]', answer_text)
if linkedin_match:
answer_data["linkedin"] = linkedin_match.group(1)
youtube_match = re.search(r'YouTube.*?\[([^\]]+)\]', answer_text)
if youtube_match:
answer_data["youtube"] = youtube_match.group(1)
tiktok_match = re.search(r'TikTok.*?\[([^\]]+)\]', answer_text)
if tiktok_match:
answer_data["tiktok"] = tiktok_match.group(1)
except (json.JSONDecodeError, AttributeError, KeyError):
# If parsing fails, create empty structure
answer_data = {
"facebook": "",
"twitter": "",
"instagram": "",
"linkedin": "",
"youtube": "",
"tiktok": ""
}
logger.info(f" - Parsed social media accounts:")
for platform, url in answer_data.items():
if url:
logger.info(f" {platform}: {url}")
return {
"success": True,
"user_url": user_url,
"social_media_accounts": answer_data,
"citations": citations_dicts,
"analysis_timestamp": datetime.utcnow().isoformat(),
"api_cost": getattr(getattr(result, 'cost_dollars', None), 'total', 0) if hasattr(result, 'cost_dollars') and getattr(result, 'cost_dollars', None) else 0,
"request_id": getattr(result, 'request_id', None) if hasattr(result, 'request_id') else None
}
except Exception as e:
logger.error(f"Error in social media discovery: {str(e)}")
return {
"success": False,
"error": str(e),
"details": "An unexpected error occurred during social media discovery"
}
def _generate_basic_context(self, results: List[Any], user_url: str) -> str:
"""
Generate a basic context string from competitor results for LLM consumption.
Args:
results: List of competitor results from Exa API
user_url: Original user URL for reference
Returns:
Formatted context string
"""
context_parts = [
f"Competitive Analysis for: {user_url}",
f"Found {len(results)} similar websites/competitors:",
""
]
for i, result in enumerate(results[:5], 1): # Limit to top 5 for context
url = getattr(result, 'url', 'Unknown URL')
title = getattr(result, 'title', 'Unknown Title')
summary = getattr(result, 'summary', 'No summary available')
context_parts.extend([
f"{i}. {title}",
f" URL: {url}",
f" Summary: {summary[:200]}{'...' if len(summary) > 200 else ''}",
""
])
context_parts.append("Key insights:")
context_parts.append("- These competitors offer similar services or content")
context_parts.append("- Analyze their content strategy and positioning")
context_parts.append("- Identify opportunities for differentiation")
return "\n".join(context_parts)
async def analyze_competitor_content(
self,
competitor_url: str,
analysis_depth: str = "standard"
) -> Dict[str, Any]:
"""
Perform deeper analysis of a specific competitor.
Args:
competitor_url: URL of the competitor to analyze
analysis_depth: Depth of analysis ("quick", "standard", "deep")
Returns:
Dictionary containing detailed competitor analysis
"""
try:
logger.info(f"Starting detailed analysis for competitor: {competitor_url}")
# Get similar content from this competitor
similar_results = await self.discover_competitors(
competitor_url,
num_results=10,
include_domains=[urlparse(competitor_url).netloc]
)
if not similar_results["success"]:
return similar_results
# Analyze content patterns
content_patterns = self._analyze_content_patterns(similar_results["competitors"])
# Generate competitive insights
competitive_insights = self._generate_competitive_insights(
competitor_url,
similar_results["competitors"],
content_patterns
)
return {
"success": True,
"competitor_url": competitor_url,
"content_patterns": content_patterns,
"competitive_insights": competitive_insights,
"analysis_timestamp": datetime.utcnow().isoformat(),
"analysis_depth": analysis_depth
}
except Exception as e:
logger.error(f"Error in competitor content analysis: {str(e)}")
return {
"success": False,
"error": str(e),
"details": "An unexpected error occurred during competitor analysis"
}
def _analyze_content_patterns(self, competitors: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Analyze content patterns across competitors.
Args:
competitors: List of competitor data
Returns:
Dictionary of content patterns
"""
patterns = {
"common_themes": [],
"content_types": [],
"publishing_patterns": {},
"target_keywords": [],
"content_strategies": []
}
# Analyze common themes
all_summaries = [comp.get("summary", "") for comp in competitors]
# This would be enhanced with NLP analysis in a full implementation
# Analyze content types from URLs
content_types = set()
for comp in competitors:
url = comp.get("url", "")
if "/blog/" in url:
content_types.add("blog")
elif "/product/" in url or "/service/" in url:
content_types.add("product")
elif "/about/" in url:
content_types.add("about")
elif "/contact/" in url:
content_types.add("contact")
patterns["content_types"] = list(content_types)
return patterns
def _generate_competitive_insights(
self,
competitor_url: str,
competitors: List[Dict[str, Any]],
content_patterns: Dict[str, Any]
) -> Dict[str, Any]:
"""
Generate competitive insights from analysis data.
Args:
competitor_url: URL of the competitor
competitors: List of competitor data
content_patterns: Content pattern analysis
Returns:
Dictionary of competitive insights
"""
insights = {
"competitive_strengths": [],
"content_opportunities": [],
"market_positioning": "unknown",
"strategic_recommendations": []
}
# Analyze competitive strengths
for comp in competitors:
if comp.get("relevance_score", 0) > 0.7:
insights["competitive_strengths"].append({
"strength": comp.get("summary", "")[:100],
"relevance": comp.get("relevance_score", 0)
})
# Generate content opportunities
if content_patterns.get("content_types"):
insights["content_opportunities"] = [
f"Develop {content_type} content"
for content_type in content_patterns["content_types"]
]
return insights
def health_check(self) -> Dict[str, Any]:
"""
Check the health of the Exa service.
Returns:
Dictionary containing service health status
"""
try:
if not self.enabled:
return {
"status": "disabled",
"message": "Exa API key not configured",
"timestamp": datetime.utcnow().isoformat()
}
# Test with a simple request using the SDK directly
test_result = self.exa.find_similar(
url="https://example.com",
num_results=1
)
# If we get here without an exception, the API is working
return {
"status": "healthy",
"message": "Exa API is operational",
"timestamp": datetime.utcnow().isoformat(),
"test_successful": True
}
except Exception as e:
return {
"status": "error",
"message": f"Health check failed: {str(e)}",
"timestamp": datetime.utcnow().isoformat()
}
def get_cost_estimate(self, num_results: int, include_content: bool = True) -> Dict[str, Any]:
"""
Get cost estimate for Exa API usage.
Args:
num_results: Number of results requested
include_content: Whether to include content analysis
Returns:
Dictionary containing cost estimate
"""
# Exa API pricing (as of documentation)
if num_results <= 25:
search_cost = 0.005
elif num_results <= 100:
search_cost = 0.025
else:
search_cost = 1.0
content_cost = 0.0
if include_content:
# Estimate content analysis cost
content_cost = num_results * 0.001 # Rough estimate
total_cost = search_cost + content_cost
return {
"search_cost": search_cost,
"content_cost": content_cost,
"total_estimated_cost": total_cost,
"num_results": num_results,
"include_content": include_content
}