chore: push all remaining changes

- Blog writer enhancements and bug fixes - Wix integration improvements - Frontend UI updates - GSC dashboard docs cleanup - Image studio assets - LinkedIn requirements file - Various dependency updates
2026-06-12 20:32:03 +05:30
parent 63a0df2536
commit d90d441019
78 changed files with 3963 additions and 2899 deletions
--- a/backend/services/blog_writer/research/competitor_analyzer.py
+++ b/backend/services/blog_writer/research/competitor_analyzer.py
@@ -18,7 +18,7 @@ class CompetitorAnalyzer:
        Analyze the following research content and extract competitor insights:
        
        Research Content:
-        {content[:3000]}
+        {content[:8000]}
        
        Extract and analyze:
        1. Top competitors mentioned (companies, brands, platforms)
--- a/backend/services/blog_writer/research/content_angle_generator.py
+++ b/backend/services/blog_writer/research/content_angle_generator.py
@@ -17,7 +17,7 @@ class ContentAngleGenerator:
        Analyze the following research content and create strategic content angles for: {topic} in {industry}
        
        Research Content:
-        {content[:3000]}
+        {content[:8000]}
        
        Create 7 compelling content angles that:
        1. Leverage current trends and data from the research
--- a/backend/services/blog_writer/research/exa_provider.py
+++ b/backend/services/blog_writer/research/exa_provider.py
@@ -7,6 +7,8 @@ Neural search implementation using Exa API for high-quality, citation-rich resea
 from exa_py import Exa
 import os
 import asyncio
+from datetime import datetime
+from urllib.parse import urlparse
 from typing import List, Dict, Any
 from loguru import logger
 from models.subscription_models import APIProvider
@@ -355,6 +357,125 @@ class ExaResearchProvider(BaseProvider):
        
        return None
    
+    def _calculate_credibility_score(self, result) -> float:
+        """Dynamic credibility score based on domain authority, recency, and content substance."""
+        scores = []
+        weights = []
+
+        # Domain authority (weight: 3) — most important signal
+        url = result.url if hasattr(result, 'url') else ''
+        domain_score = self._score_domain_authority(url)
+        scores.append(domain_score)
+        weights.append(3)
+
+        # Recency (weight: 2) — fresher content is more valuable
+        recency_score = self._score_recency(result)
+        scores.append(recency_score)
+        weights.append(2)
+
+        # Content substance (weight: 2) — richer content = more substantive source
+        substance_score = self._score_substance(result)
+        scores.append(substance_score)
+        weights.append(2)
+
+        # Exa relevance score (weight: 2) — Exa's own relevance ranking
+        exa_score = 0.5
+        if hasattr(result, 'score') and result.score is not None:
+            exa_score = float(result.score)
+        scores.append(exa_score)
+        weights.append(2)
+
+        total = sum(s * w for s, w in zip(scores, weights))
+        total_weight = sum(weights)
+        return round(total / total_weight, 3)
+
+    @staticmethod
+    def _score_domain_authority(url: str) -> float:
+        if not url:
+            return 0.5
+        try:
+            domain = urlparse(url).netloc.lower()
+        except Exception:
+            return 0.5
+        if domain.startswith('www.'):
+            domain = domain[4:]
+
+        # Tier 1: Government, educational, major research
+        if domain.endswith('.gov') or domain.endswith('.edu'):
+            return 0.95
+        if domain in ('arxiv.org', 'pubmed.ncbi.nlm.nih.gov', 'ncbi.nlm.nih.gov',
+                      'scholar.google.com', 'researchgate.net', 'sciencedaily.com',
+                      'nature.com', 'science.org', 'pnas.org'):
+            return 0.92
+
+        # Tier 2: Major established news and professional publications
+        tier2 = {
+            'reuters.com', 'apnews.com', 'bbc.com', 'bbc.co.uk', 'npr.org',
+            'wsj.com', 'nytimes.com', 'economist.com', 'bloomberg.com',
+            'theguardian.com', 'ft.com', 'washingtonpost.com',
+            'forbes.com', 'hbr.org', 'techcrunch.com', 'wired.com',
+            'cnn.com', 'nbcnews.com', 'cbsnews.com', 'abcnews.go.com',
+        }
+        # Extract base domain
+        parts = domain.split('.')
+        base = '.'.join(parts[-2:]) if len(parts) >= 2 else domain
+        if base in tier2:
+            return 0.88
+
+        # Tier 3: Industry research and established .org
+        tier3 = {
+            'statista.com', 'pewresearch.org', 'gartner.com', 'mckinsey.com',
+            'deloitte.com', 'pwc.com', 'ey.com', 'kpmg.com',
+            'hubspot.com', 'moz.com', 'searchengineland.com',
+            'neilpatel.com', 'backlinko.com', 'copyblogger.com',
+        }
+        if base in tier3:
+            return 0.80
+        if domain.endswith('.org'):
+            return 0.75
+
+        return 0.60
+
+    def _score_recency(self, result) -> float:
+        if not hasattr(result, 'publishedDate') or not result.publishedDate:
+            return 0.70
+        try:
+            published = datetime.strptime(result.publishedDate[:10], '%Y-%m-%d')
+            days_old = (datetime.now() - published).days
+            if days_old < 30:
+                return 1.0
+            elif days_old < 180:
+                return 0.90
+            elif days_old < 365:
+                return 0.80
+            elif days_old < 730:
+                return 0.65
+            elif days_old < 1825:
+                return 0.45
+            else:
+                return 0.25
+        except Exception:
+            return 0.70
+
+    def _score_substance(self, result) -> float:
+        total_chars = 0
+        if hasattr(result, 'highlights') and result.highlights:
+            total_chars += sum(len(h or '') for h in result.highlights)
+        if hasattr(result, 'summary') and result.summary:
+            total_chars += len(result.summary)
+        if hasattr(result, 'text') and result.text:
+            total_chars += len(result.text)
+
+        if total_chars > 2000:
+            return 0.95
+        elif total_chars > 1000:
+            return 0.85
+        elif total_chars > 500:
+            return 0.75
+        elif total_chars > 100:
+            return 0.60
+        return 0.40
+
    def _transform_sources(self, results):
        """Transform Exa results to ResearchSource format."""
        sources = []
@@ -368,7 +489,7 @@ class ExaResearchProvider(BaseProvider):
                'title': result.title if hasattr(result, 'title') else '',
                'url': result.url if hasattr(result, 'url') else '',
                'excerpt': self._get_excerpt(result),
-                'credibility_score': 0.85,  # Exa results are high quality
+                'credibility_score': self._calculate_credibility_score(result),
                'published_at': result.publishedDate if hasattr(result, 'publishedDate') else None,
                'index': idx,
                'source_type': source_type,
@@ -388,7 +509,7 @@ class ExaResearchProvider(BaseProvider):
        if hasattr(result, 'summary') and result.summary:
            return result.summary
        if hasattr(result, 'text') and result.text:
-            return result.text[:500]
+            return result.text[:1000]
        return ''
    
    def _determine_source_type(self, url):
--- a/backend/services/blog_writer/research/keyword_analyzer.py
+++ b/backend/services/blog_writer/research/keyword_analyzer.py
@@ -19,7 +19,7 @@ class KeywordAnalyzer:
        Analyze the following research content and extract comprehensive keyword insights for: {', '.join(original_keywords)}
        
        Research Content:
-        {content[:3000]}  # Limit to avoid token limits
+        {content[:8000]}
        
        Extract and analyze:
        1. Primary keywords (main topic terms)
--- a/backend/services/blog_writer/research/research_service.py
+++ b/backend/services/blog_writer/research/research_service.py
@@ -250,10 +250,32 @@ class ResearchService:
            if 'content' not in locals() or 'sources' not in locals():
                raise RuntimeError(f"{config.provider.value} research did not return content or sources. Research failed.")
            
+            # Build compact all-source summary for richer analysis
+            analysis_content = self._build_analysis_content(sources)
+            
+            # Run dedicated competitor search for richer competitor intelligence
+            competitor_content = analysis_content
+            try:
+                comp_query = f"top {industry} companies or competitors {topic}"
+                comp_results = await exa_provider.simple_search(
+                    query=comp_query, num_results=5, user_id=user_id,
+                )
+                if comp_results:
+                    comp_lines = ["COMPETITOR SEARCH RESULTS:"]
+                    for r in comp_results:
+                        title = r.get('title', '')
+                        text = (r.get('text', '') or '')[:400]
+                        comp_lines.append(f"- {title}")
+                        if text:
+                            comp_lines.append(f"  {text[:200]}")
+                    competitor_content = "\n".join(comp_lines) + "\n\n" + analysis_content
+            except Exception as e:
+                logger.warning(f"Competitor search failed (non-critical): {e}")
+            
            # Continue with common analysis (same for both providers)
-            keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
-            competitor_analysis = self.competitor_analyzer.analyze(content, user_id=user_id)
-            suggested_angles = self.content_angle_generator.generate(content, topic, industry, user_id=user_id)
+            keyword_analysis = self.keyword_analyzer.analyze(analysis_content, request.keywords, user_id=user_id)
+            competitor_analysis = self.competitor_analyzer.analyze(competitor_content, user_id=user_id)
+            suggested_angles = self.content_angle_generator.generate(analysis_content, topic, industry, user_id=user_id)
            
            logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
            
@@ -586,9 +608,30 @@ class ResearchService:
            
            # Continue with common analysis (same for both providers)
            await task_manager.update_progress(task_id, "🔍 Analyzing keywords and content angles...")
-            keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
-            competitor_analysis = self.competitor_analyzer.analyze(content, user_id=user_id)
-            suggested_angles = self.content_angle_generator.generate(content, topic, industry, user_id=user_id)
+            analysis_content = self._build_analysis_content(sources)
+            
+            # Run dedicated competitor search for richer competitor intelligence
+            competitor_content = analysis_content
+            try:
+                comp_query = f"top {industry} companies or competitors {topic}"
+                comp_results = await exa_provider.simple_search(
+                    query=comp_query, num_results=5, user_id=user_id,
+                )
+                if comp_results:
+                    comp_lines = ["COMPETITOR SEARCH RESULTS:"]
+                    for r in comp_results:
+                        title = r.get('title', '')
+                        text = (r.get('text', '') or '')[:400]
+                        comp_lines.append(f"- {title}")
+                        if text:
+                            comp_lines.append(f"  {text[:200]}")
+                    competitor_content = "\n".join(comp_lines) + "\n\n" + analysis_content
+            except Exception as e:
+                logger.warning(f"Competitor search failed (non-critical): {e}")
+            
+            keyword_analysis = self.keyword_analyzer.analyze(analysis_content, request.keywords, user_id=user_id)
+            competitor_analysis = self.competitor_analyzer.analyze(competitor_content, user_id=user_id)
+            suggested_angles = self.content_angle_generator.generate(analysis_content, topic, industry, user_id=user_id)
            
            await task_manager.update_progress(task_id, "💾 Caching results for future use...")
            logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
@@ -780,6 +823,33 @@ class ResearchService:
            web_search_queries=search_queries or [],
        )

+    def _build_analysis_content(self, sources: List[Dict[str, Any]]) -> str:
+        """Build compact all-source summary for LLM analysis.
+
+        Each source is distilled to one line with title, key content, and highlights.
+        This ensures ALL sources are visible to keyword, competitor, and angle
+        analyzers instead of only the first few (raw content[:3000]).
+        """
+        if not sources:
+            return ""
+        lines = []
+        for src in sources:
+            title = src.get('title', '') or ''
+            summary = src.get('summary', '') or ''
+            highlights = src.get('highlights', []) or []
+            excerpt = src.get('excerpt', '') or ''
+            part = f"• {title}"
+            if summary:
+                part += f" — {summary[:250]}"
+            elif excerpt:
+                part += f" — {excerpt[:250]}"
+            if highlights:
+                findings = [h[:120] for h in highlights[:2] if h]
+                if findings:
+                    part += f" | {'; '.join(findings)}"
+            lines.append(part)
+        return "\n".join(lines)
+
    def _normalize_cached_research_data(self, cached_data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Normalize cached research data to fix None values in confidence_scores.