chore: push all remaining changes

- Blog writer enhancements and bug fixes
- Wix integration improvements
- Frontend UI updates
- GSC dashboard docs cleanup
- Image studio assets
- LinkedIn requirements file
- Various dependency updates
This commit is contained in:
ajaysi
2026-06-12 20:32:03 +05:30
parent 63a0df2536
commit d90d441019
78 changed files with 3963 additions and 2899 deletions

View File

@@ -18,7 +18,7 @@ class CompetitorAnalyzer:
Analyze the following research content and extract competitor insights:
Research Content:
{content[:3000]}
{content[:8000]}
Extract and analyze:
1. Top competitors mentioned (companies, brands, platforms)

View File

@@ -17,7 +17,7 @@ class ContentAngleGenerator:
Analyze the following research content and create strategic content angles for: {topic} in {industry}
Research Content:
{content[:3000]}
{content[:8000]}
Create 7 compelling content angles that:
1. Leverage current trends and data from the research

View File

@@ -7,6 +7,8 @@ Neural search implementation using Exa API for high-quality, citation-rich resea
from exa_py import Exa
import os
import asyncio
from datetime import datetime
from urllib.parse import urlparse
from typing import List, Dict, Any
from loguru import logger
from models.subscription_models import APIProvider
@@ -355,6 +357,125 @@ class ExaResearchProvider(BaseProvider):
return None
def _calculate_credibility_score(self, result) -> float:
"""Dynamic credibility score based on domain authority, recency, and content substance."""
scores = []
weights = []
# Domain authority (weight: 3) — most important signal
url = result.url if hasattr(result, 'url') else ''
domain_score = self._score_domain_authority(url)
scores.append(domain_score)
weights.append(3)
# Recency (weight: 2) — fresher content is more valuable
recency_score = self._score_recency(result)
scores.append(recency_score)
weights.append(2)
# Content substance (weight: 2) — richer content = more substantive source
substance_score = self._score_substance(result)
scores.append(substance_score)
weights.append(2)
# Exa relevance score (weight: 2) — Exa's own relevance ranking
exa_score = 0.5
if hasattr(result, 'score') and result.score is not None:
exa_score = float(result.score)
scores.append(exa_score)
weights.append(2)
total = sum(s * w for s, w in zip(scores, weights))
total_weight = sum(weights)
return round(total / total_weight, 3)
@staticmethod
def _score_domain_authority(url: str) -> float:
if not url:
return 0.5
try:
domain = urlparse(url).netloc.lower()
except Exception:
return 0.5
if domain.startswith('www.'):
domain = domain[4:]
# Tier 1: Government, educational, major research
if domain.endswith('.gov') or domain.endswith('.edu'):
return 0.95
if domain in ('arxiv.org', 'pubmed.ncbi.nlm.nih.gov', 'ncbi.nlm.nih.gov',
'scholar.google.com', 'researchgate.net', 'sciencedaily.com',
'nature.com', 'science.org', 'pnas.org'):
return 0.92
# Tier 2: Major established news and professional publications
tier2 = {
'reuters.com', 'apnews.com', 'bbc.com', 'bbc.co.uk', 'npr.org',
'wsj.com', 'nytimes.com', 'economist.com', 'bloomberg.com',
'theguardian.com', 'ft.com', 'washingtonpost.com',
'forbes.com', 'hbr.org', 'techcrunch.com', 'wired.com',
'cnn.com', 'nbcnews.com', 'cbsnews.com', 'abcnews.go.com',
}
# Extract base domain
parts = domain.split('.')
base = '.'.join(parts[-2:]) if len(parts) >= 2 else domain
if base in tier2:
return 0.88
# Tier 3: Industry research and established .org
tier3 = {
'statista.com', 'pewresearch.org', 'gartner.com', 'mckinsey.com',
'deloitte.com', 'pwc.com', 'ey.com', 'kpmg.com',
'hubspot.com', 'moz.com', 'searchengineland.com',
'neilpatel.com', 'backlinko.com', 'copyblogger.com',
}
if base in tier3:
return 0.80
if domain.endswith('.org'):
return 0.75
return 0.60
def _score_recency(self, result) -> float:
if not hasattr(result, 'publishedDate') or not result.publishedDate:
return 0.70
try:
published = datetime.strptime(result.publishedDate[:10], '%Y-%m-%d')
days_old = (datetime.now() - published).days
if days_old < 30:
return 1.0
elif days_old < 180:
return 0.90
elif days_old < 365:
return 0.80
elif days_old < 730:
return 0.65
elif days_old < 1825:
return 0.45
else:
return 0.25
except Exception:
return 0.70
def _score_substance(self, result) -> float:
total_chars = 0
if hasattr(result, 'highlights') and result.highlights:
total_chars += sum(len(h or '') for h in result.highlights)
if hasattr(result, 'summary') and result.summary:
total_chars += len(result.summary)
if hasattr(result, 'text') and result.text:
total_chars += len(result.text)
if total_chars > 2000:
return 0.95
elif total_chars > 1000:
return 0.85
elif total_chars > 500:
return 0.75
elif total_chars > 100:
return 0.60
return 0.40
def _transform_sources(self, results):
"""Transform Exa results to ResearchSource format."""
sources = []
@@ -368,7 +489,7 @@ class ExaResearchProvider(BaseProvider):
'title': result.title if hasattr(result, 'title') else '',
'url': result.url if hasattr(result, 'url') else '',
'excerpt': self._get_excerpt(result),
'credibility_score': 0.85, # Exa results are high quality
'credibility_score': self._calculate_credibility_score(result),
'published_at': result.publishedDate if hasattr(result, 'publishedDate') else None,
'index': idx,
'source_type': source_type,
@@ -388,7 +509,7 @@ class ExaResearchProvider(BaseProvider):
if hasattr(result, 'summary') and result.summary:
return result.summary
if hasattr(result, 'text') and result.text:
return result.text[:500]
return result.text[:1000]
return ''
def _determine_source_type(self, url):

View File

@@ -19,7 +19,7 @@ class KeywordAnalyzer:
Analyze the following research content and extract comprehensive keyword insights for: {', '.join(original_keywords)}
Research Content:
{content[:3000]} # Limit to avoid token limits
{content[:8000]}
Extract and analyze:
1. Primary keywords (main topic terms)

View File

@@ -250,10 +250,32 @@ class ResearchService:
if 'content' not in locals() or 'sources' not in locals():
raise RuntimeError(f"{config.provider.value} research did not return content or sources. Research failed.")
# Build compact all-source summary for richer analysis
analysis_content = self._build_analysis_content(sources)
# Run dedicated competitor search for richer competitor intelligence
competitor_content = analysis_content
try:
comp_query = f"top {industry} companies or competitors {topic}"
comp_results = await exa_provider.simple_search(
query=comp_query, num_results=5, user_id=user_id,
)
if comp_results:
comp_lines = ["COMPETITOR SEARCH RESULTS:"]
for r in comp_results:
title = r.get('title', '')
text = (r.get('text', '') or '')[:400]
comp_lines.append(f"- {title}")
if text:
comp_lines.append(f" {text[:200]}")
competitor_content = "\n".join(comp_lines) + "\n\n" + analysis_content
except Exception as e:
logger.warning(f"Competitor search failed (non-critical): {e}")
# Continue with common analysis (same for both providers)
keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
competitor_analysis = self.competitor_analyzer.analyze(content, user_id=user_id)
suggested_angles = self.content_angle_generator.generate(content, topic, industry, user_id=user_id)
keyword_analysis = self.keyword_analyzer.analyze(analysis_content, request.keywords, user_id=user_id)
competitor_analysis = self.competitor_analyzer.analyze(competitor_content, user_id=user_id)
suggested_angles = self.content_angle_generator.generate(analysis_content, topic, industry, user_id=user_id)
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
@@ -586,9 +608,30 @@ class ResearchService:
# Continue with common analysis (same for both providers)
await task_manager.update_progress(task_id, "🔍 Analyzing keywords and content angles...")
keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
competitor_analysis = self.competitor_analyzer.analyze(content, user_id=user_id)
suggested_angles = self.content_angle_generator.generate(content, topic, industry, user_id=user_id)
analysis_content = self._build_analysis_content(sources)
# Run dedicated competitor search for richer competitor intelligence
competitor_content = analysis_content
try:
comp_query = f"top {industry} companies or competitors {topic}"
comp_results = await exa_provider.simple_search(
query=comp_query, num_results=5, user_id=user_id,
)
if comp_results:
comp_lines = ["COMPETITOR SEARCH RESULTS:"]
for r in comp_results:
title = r.get('title', '')
text = (r.get('text', '') or '')[:400]
comp_lines.append(f"- {title}")
if text:
comp_lines.append(f" {text[:200]}")
competitor_content = "\n".join(comp_lines) + "\n\n" + analysis_content
except Exception as e:
logger.warning(f"Competitor search failed (non-critical): {e}")
keyword_analysis = self.keyword_analyzer.analyze(analysis_content, request.keywords, user_id=user_id)
competitor_analysis = self.competitor_analyzer.analyze(competitor_content, user_id=user_id)
suggested_angles = self.content_angle_generator.generate(analysis_content, topic, industry, user_id=user_id)
await task_manager.update_progress(task_id, "💾 Caching results for future use...")
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
@@ -780,6 +823,33 @@ class ResearchService:
web_search_queries=search_queries or [],
)
def _build_analysis_content(self, sources: List[Dict[str, Any]]) -> str:
"""Build compact all-source summary for LLM analysis.
Each source is distilled to one line with title, key content, and highlights.
This ensures ALL sources are visible to keyword, competitor, and angle
analyzers instead of only the first few (raw content[:3000]).
"""
if not sources:
return ""
lines = []
for src in sources:
title = src.get('title', '') or ''
summary = src.get('summary', '') or ''
highlights = src.get('highlights', []) or []
excerpt = src.get('excerpt', '') or ''
part = f"{title}"
if summary:
part += f"{summary[:250]}"
elif excerpt:
part += f"{excerpt[:250]}"
if highlights:
findings = [h[:120] for h in highlights[:2] if h]
if findings:
part += f" | {'; '.join(findings)}"
lines.append(part)
return "\n".join(lines)
def _normalize_cached_research_data(self, cached_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Normalize cached research data to fix None values in confidence_scores.