"""
Content Quality Analyzer Service for ALwrity

This service provides comprehensive quality assessment for generated content,
evaluating factual accuracy, source verification, professional tone, and industry relevance.

Key Features:
- Factual accuracy scoring against source verification
- Professional tone analysis for enterprise content
- Industry relevance metrics and assessment
- Overall quality scoring and recommendations
- Content quality tracking over time

Dependencies:
- re (for pattern matching)
- typing (for type hints)
- logging (for debugging)

Author: ALwrity Team
Version: 1.0
Last Updated: January 2025
"""

import re
from typing import Dict, List, Optional, Any, Tuple
from loguru import logger

class ContentQualityAnalyzer:
    """
    Service for analyzing and scoring content quality.
    
    This service evaluates content across multiple dimensions including
    factual accuracy, professional tone, industry relevance, and overall quality.
    """
    
    def __init__(self):
        """Initialize the Content Quality Analyzer."""
        # Professional tone indicators
        self.professional_indicators = [
            "research", "analysis", "insights", "trends", "strategies",
            "implementation", "optimization", "innovation", "development",
            "leadership", "expertise", "professional", "industry", "enterprise"
        ]
        
        # Unprofessional tone indicators
        self.unprofessional_indicators = [
            "awesome", "amazing", "incredible", "mind-blowing", "crazy",
            "totally", "absolutely", "literally", "basically", "actually",
            "you know", "like", "um", "uh", "lol", "omg"
        ]
        
        # Industry-specific terminology patterns
        self.industry_terminology = {
            "Technology": ["ai", "machine learning", "automation", "digital transformation", "cloud computing"],
            "Healthcare": ["patient care", "medical", "treatment", "diagnosis", "healthcare"],
            "Finance": ["investment", "market", "financial", "portfolio", "risk management"],
            "Marketing": ["brand", "campaign", "audience", "conversion", "engagement"],
            "Education": ["learning", "curriculum", "pedagogy", "student", "academic"]
        }
        
        logger.info("Content Quality Analyzer initialized successfully")
    
    def analyze_content_quality(
        self, 
        content: str, 
        sources: List[Dict[str, Any]], 
        industry: str = "general"
    ) -> Dict[str, Any]:
        """
        Analyze content quality across multiple dimensions.
        
        Args:
            content: The content to analyze
            sources: List of research sources used
            industry: The target industry for relevance assessment
            
        Returns:
            Comprehensive quality analysis results
        """
        try:
            # Analyze different quality aspects
            logger.info("🔍 [Quality Analysis] Starting content quality analysis")
            logger.info(f"🔍 [Quality Analysis] Content length: {len(content)} characters")
            logger.info(f"🔍 [Quality Analysis] Sources count: {len(sources)}")
            
            factual_accuracy = self._assess_factual_accuracy(content, sources)
            logger.info(f"🔍 [Quality Analysis] Factual accuracy score: {factual_accuracy}")
            
            source_verification = self._assess_source_verification(content, sources)
            logger.info(f"🔍 [Quality Analysis] Source verification score: {source_verification}")
            
            professional_tone = self._assess_professional_tone(content)
            logger.info(f"🔍 [Quality Analysis] Professional tone score: {professional_tone}")
            
            industry_relevance = self._assess_industry_relevance(content, industry)
            logger.info(f"🔍 [Quality Analysis] Industry relevance score: {industry_relevance}")
            
            citation_coverage = self._assess_citation_coverage(content, sources)
            logger.info(f"🔍 [Quality Analysis] Citation coverage score: {citation_coverage}")
            
            # Calculate overall quality score
            overall_score = self._calculate_overall_score({
                "factual_accuracy": factual_accuracy,
                "source_verification": source_verification,
                "professional_tone": professional_tone,
                "industry_relevance": industry_relevance,
                "citation_coverage": citation_coverage
            })
            logger.info(f"🔍 [Quality Analysis] Overall score calculated: {overall_score}")
            
            # Generate recommendations
            recommendations = self._generate_recommendations({
                "factual_accuracy": factual_accuracy,
                "source_verification": source_verification,
                "professional_tone": professional_tone,
                "industry_relevance": industry_relevance,
                "citation_coverage": citation_coverage
            })
            logger.info(f"🔍 [Quality Analysis] Generated {len(recommendations)} recommendations")
            
            result = {
                "overall_score": overall_score,
                "metrics": {
                    "factual_accuracy": factual_accuracy,
                    "source_verification": source_verification,
                    "professional_tone": professional_tone,
                    "industry_relevance": industry_relevance,
                    "citation_coverage": citation_coverage
                },
                "recommendations": recommendations,
                "content_length": len(content),
                "word_count": len(content.split()),
                "analysis_timestamp": self._get_timestamp()
            }
            
            logger.info(f"🔍 [Quality Analysis] Final result: {result}")
            return result
            
        except Exception as e:
            logger.error(f"Content quality analysis failed: {str(e)}")
            return {
                "overall_score": 0.0,
                "error": str(e),
                "metrics": {},
                "recommendations": ["Content quality analysis failed. Please try again."]
            }
    
    def _assess_factual_accuracy(self, content: str, sources: List[Dict[str, Any]]) -> float:
        """
        Assess factual accuracy based on source verification.
        
        Args:
            content: The content to analyze
            sources: Research sources used
            
        Returns:
            Factual accuracy score between 0.0 and 1.0
        """
        logger.info(f"🔍 [Factual Accuracy] Starting analysis with {len(sources)} sources")
        logger.info(f"🔍 [Factual Accuracy] Content length: {len(content)} characters")
        
        if not sources:
            logger.warning("🔍 [Factual Accuracy] No sources provided, returning 0.0")
            return 0.0
        
        # Look for factual indicators in the content
        factual_indicators = [
            r'\d+%', r'\d+ percent',  # Percentages
            r'\$\d+', r'\d+ dollars',  # Dollar amounts
            r'\d+ million', r'\d+ billion',  # Billions
            r'research shows', r'studies indicate', r'data reveals',
            r'experts say', r'according to', r'statistics show',
            r'\d{4}',  # Years
            r'\d+ organizations', r'\d+ companies', r'\d+ enterprises',
            r'AI', r'artificial intelligence', r'machine learning',  # Technology terms
            r'content creation', r'digital marketing', r'technology industry',  # Industry terms
            r'efficiency', r'innovation', r'development', r'growth',  # Business terms
            r'businesses', r'companies', r'organizations',  # Entity terms
            r'tools', r'platforms', r'systems', r'solutions'  # Product terms
        ]
        
        factual_claims = 0
        supported_claims = 0
        
        for pattern in factual_indicators:
            matches = re.findall(pattern, content, re.IGNORECASE)
            if matches:
                logger.info(f"🔍 [Factual Accuracy] Pattern {pattern} found {len(matches)} matches: {matches}")
            factual_claims += len(matches)
            
            # Check if claims are near citations
            for match in matches:
                if self._is_claim_supported(match, content, sources):
                    supported_claims += 1
        
        logger.info(f"🔍 [Factual Accuracy] Total factual claims: {factual_claims}")
        logger.info(f"🔍 [Factual Accuracy] Supported claims: {supported_claims}")
        
        # Calculate accuracy score - be more lenient
        if factual_claims == 0:
            logger.info("🔍 [Factual Accuracy] No factual claims to verify, returning 0.8")
            return 0.8  # No factual claims to verify
        
        # Base accuracy score
        accuracy_score = supported_claims / factual_claims
        logger.info(f"🔍 [Factual Accuracy] Base accuracy score: {accuracy_score}")
        
        # Boost score if we have good source quality
        if sources:
            avg_credibility = sum(
                (s.credibility_score or 0) if hasattr(s, 'credibility_score') else (s.get("credibility_score", 0) or 0)
                for s in sources
            ) / len(sources)
            
            logger.info(f"🔍 [Factual Accuracy] Average credibility: {avg_credibility}")
            
            # Boost accuracy if sources are credible
            if avg_credibility > 0.7:
                accuracy_score = min(accuracy_score * 1.3, 1.0)
                logger.info(f"🔍 [Factual Accuracy] Applied high credibility boost: {accuracy_score}")
            elif avg_credibility > 0.5:
                accuracy_score = min(accuracy_score * 1.1, 1.0)
                logger.info(f"🔍 [Factual Accuracy] Applied medium credibility boost: {accuracy_score}")
        
        # Boost score if we have multiple sources (diversity)
        if len(sources) >= 3:
            accuracy_score = min(accuracy_score * 1.2, 1.0)
            logger.info(f"🔍 [Factual Accuracy] Applied diversity boost: {accuracy_score}")
        
        final_score = round(min(accuracy_score, 1.0), 3)
        logger.info(f"🔍 [Factual Accuracy] Final accuracy score: {final_score}")
        return final_score
    
    def _assess_source_verification(self, content: str, sources: List[Dict[str, Any]]) -> float:
        """
        Assess source verification quality.
        
        Args:
            content: The content to analyze
            sources: Research sources used
            
        Returns:
            Source verification score between 0.0 and 1.0
        """
        if not sources:
            return 0.0
        
        # Calculate source quality metrics
        total_sources = len(sources)
        
        # Source credibility scores - handle both Dict and ResearchSource objects
        credibility_scores = []
        relevance_scores = []
        domain_scores = []
        source_types = set()
        
        for s in sources:
            if hasattr(s, 'credibility_score'):
                # ResearchSource Pydantic model
                credibility_scores.append(s.credibility_score or 0)
                relevance_scores.append(s.relevance_score or 0)
                domain_scores.append(s.domain_authority or 0)
                source_types.add(s.source_type or "general")
            else:
                # Dictionary object
                credibility_scores.append(s.get("credibility_score", 0))
                relevance_scores.append(s.get("relevance_score", 0))
                domain_scores.append(s.get("domain_authority", 0))
                source_types.add(s.get("source_type", "general"))
        
        avg_credibility = sum(credibility_scores) / len(credibility_scores) if credibility_scores else 0
        avg_relevance = sum(relevance_scores) / len(relevance_scores) if relevance_scores else 0
        avg_domain_authority = sum(domain_scores) / len(domain_scores) if domain_scores else 0
        diversity_score = min(len(source_types) / 3, 1.0)  # Normalize to 3+ types
        
        # Calculate verification score
        verification_score = (
            avg_credibility * 0.3 +
            avg_relevance * 0.3 +
            avg_domain_authority * 0.2 +
            diversity_score * 0.2
        )
        
        return round(verification_score, 3)
    
    def _assess_professional_tone(self, content: str) -> float:
        """
        Assess professional tone appropriateness.
        
        Args:
            content: The content to analyze
            
        Returns:
            Professional tone score between 0.0 and 1.0
        """
        content_lower = content.lower()
        
        # Count professional indicators
        professional_count = sum(1 for indicator in self.professional_indicators if indicator in content_lower)
        
        # Count unprofessional indicators
        unprofessional_count = sum(1 for indicator in self.unprofessional_indicators if indicator in content_lower)
        
        # Calculate tone score
        total_indicators = len(self.professional_indicators) + len(self.unprofessional_indicators)
        
        if total_indicators == 0:
            return 0.7  # Neutral score
        
        professional_score = professional_count / len(self.professional_indicators)
        unprofessional_penalty = unprofessional_count / len(self.unprofessional_indicators)
        
        tone_score = professional_score - unprofessional_penalty
        tone_score = max(0.0, min(1.0, tone_score))  # Clamp between 0 and 1
        
        return round(tone_score, 3)
    
    def _assess_industry_relevance(self, content: str, industry: str) -> float:
        """
        Assess industry relevance of the content.
        
        Args:
            content: The content to analyze
            industry: The target industry
            
        Returns:
            Industry relevance score between 0.0 and 1.0
        """
        if industry.lower() == "general":
            return 0.7  # Neutral score for general industry
        
        content_lower = content.lower()
        industry_lower = industry.lower()
        
        # Get industry-specific terminology
        industry_terms = self.industry_terminology.get(industry, [])
        
        # Count industry-specific terms
        industry_term_count = sum(1 for term in industry_terms if term in content_lower)
        
        # Count industry mentions
        industry_mentions = content_lower.count(industry_lower)
        
        # Calculate relevance score
        if not industry_terms:
            return 0.6  # Fallback score
        
        term_relevance = min(industry_term_count / len(industry_terms), 1.0)
        mention_relevance = min(industry_mentions / 3, 1.0)  # Normalize to 3+ mentions
        
        relevance_score = (term_relevance * 0.7) + (mention_relevance * 0.3)
        
        return round(relevance_score, 3)
    
    def _assess_citation_coverage(self, content: str, sources: List[Dict[str, Any]]) -> float:
        """
        Assess citation coverage in the content.
        
        Args:
            content: The content to analyze
            sources: Research sources used
            
        Returns:
            Citation coverage score between 0.0 and 1.0
        """
        logger.info(f"🔍 [Citation Coverage] Starting analysis with {len(sources)} sources")
        logger.info(f"🔍 [Citation Coverage] Content length: {len(content)} characters")
        
        # Debug: Show sample of content to see what we're analyzing
        content_sample = content[:500] + "..." if len(content) > 500 else content
        logger.info(f"🔍 [Citation Coverage] Content sample: {content_sample}")
        
        if not sources:
            logger.warning("🔍 [Citation Coverage] No sources provided, returning 0.0")
            return 0.0
        
        # Look for citation patterns - updated to match our actual citation format
        citation_patterns = [
            r'<sup class="liw-cite"[^>]*>\[(\d+)\]</sup>',  # HTML format - PRIORITY 1
            r'\[(\d+)\]',  # Our primary format: [1], [2], etc.
            r'\[Source (\d+)\]', r'\(Source (\d+)\)',
            r'\((\d+)\)', r'Source (\d+)', r'Ref\. (\d+)', r'Reference (\d+)'
        ]
        
        total_citations = 0
        for pattern in citation_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            if matches:
                logger.info(f"🔍 [Citation Coverage] Pattern {pattern} found {len(matches)} matches: {matches}")
            total_citations += len(matches)
        
        logger.info(f"🔍 [Citation Coverage] Total citations found: {total_citations}")
        
        # Calculate coverage score - be more lenient since we strategically place citations
        expected_citations = min(len(sources), len(sources) * 0.8)  # Allow 80% coverage
        if expected_citations == 0:
            logger.warning("🔍 [Citation Coverage] Expected citations is 0, returning 0.0")
            return 0.0
            
        coverage_score = min(total_citations / expected_citations, 1.0)
        logger.info(f"🔍 [Citation Coverage] Coverage score before boost: {coverage_score}")
        
        # Boost score if we have good source diversity
        if len(sources) >= 3:
            coverage_score = min(coverage_score * 1.2, 1.0)
            logger.info(f"🔍 [Citation Coverage] Applied diversity boost, final score: {coverage_score}")
        
        final_score = round(coverage_score, 3)
        logger.info(f"🔍 [Citation Coverage] Final coverage score: {final_score}")
        return final_score
    
    def _is_claim_supported(self, claim: str, content: str, sources: List[Dict[str, Any]]) -> bool:
        """
        Check if a factual claim is supported by nearby citations.
        
        Args:
            claim: The factual claim to check
            content: The content containing the claim
            sources: Research sources used
            
        Returns:
            True if the claim appears to be supported
        """
        # Find the position of the claim
        claim_pos = content.lower().find(claim.lower())
        if claim_pos == -1:
            return False
        
        # Look for citations within 300 characters of the claim (increased range)
        start_pos = max(0, claim_pos - 150)
        end_pos = min(len(content), claim_pos + len(claim) + 150)
        
        nearby_text = content[start_pos:end_pos]
        
        # Check for citation patterns - updated to match our actual format
        citation_patterns = [
            r'<sup class="liw-cite"[^>]*>\[(\d+)\]</sup>',  # HTML format - PRIORITY 1
            r'\[(\d+)\]',  # Our primary format: [1], [2], etc.
            r'\[Source (\d+)\]', r'\[(\d+)\]', r'\(Source (\d+)\)',
            r'\((\d+)\)', r'Source (\d+)', r'Ref\. (\d+)', r'Reference (\d+)'
        ]
        
        for pattern in citation_patterns:
            if re.search(pattern, nearby_text, re.IGNORECASE):
                return True
        
        return False
    
    def _calculate_overall_score(self, metrics: Dict[str, float]) -> float:
        """
        Calculate overall quality score from individual metrics.
        
        Args:
            metrics: Dictionary of quality metrics
            
        Returns:
            Overall quality score between 0.0 and 1.0
        """
        # Weighted scoring system
        weights = {
            "factual_accuracy": 0.25,
            "source_verification": 0.25,
            "professional_tone": 0.20,
            "industry_relevance": 0.15,
            "citation_coverage": 0.15
        }
        
        overall_score = 0.0
        total_weight = 0.0
        
        for metric_name, weight in weights.items():
            if metric_name in metrics:
                overall_score += metrics[metric_name] * weight
                total_weight += weight
        
        if total_weight == 0:
            return 0.0
        
        final_score = overall_score / total_weight
        return round(final_score, 3)
    
    def _generate_recommendations(self, metrics: Dict[str, float]) -> List[str]:
        """
        Generate improvement recommendations based on quality metrics.
        
        Args:
            metrics: Dictionary of quality metrics
            
        Returns:
            List of improvement recommendations
        """
        recommendations = []
        
        # Factual accuracy recommendations
        if metrics.get("factual_accuracy", 0) < 0.7:
            recommendations.append("Improve factual accuracy by ensuring all claims are properly supported by sources.")
        
        if metrics.get("factual_accuracy", 0) < 0.5:
            recommendations.append("Significant factual accuracy issues detected. Review and verify all claims against sources.")
        
        # Source verification recommendations
        if metrics.get("source_verification", 0) < 0.6:
            recommendations.append("Enhance source quality by using more credible and relevant sources.")
        
        if metrics.get("source_verification", 0) < 0.4:
            recommendations.append("Low source verification quality. Consider using more authoritative and recent sources.")
        
        # Professional tone recommendations
        if metrics.get("professional_tone", 0) < 0.7:
            recommendations.append("Improve professional tone by using more industry-appropriate language.")
        
        if metrics.get("professional_tone", 0) < 0.5:
            recommendations.append("Content tone needs significant improvement for professional audiences.")
        
        # Industry relevance recommendations
        if metrics.get("industry_relevance", 0) < 0.6:
            recommendations.append("Increase industry relevance by using more industry-specific terminology and examples.")
        
        if metrics.get("industry_relevance", 0) < 0.4:
            recommendations.append("Content lacks industry focus. Add more industry-specific content and context.")
        
        # Citation coverage recommendations
        if metrics.get("citation_coverage", 0) < 0.8:
            recommendations.append("Improve citation coverage by adding more inline citations throughout the content.")
        
        if metrics.get("citation_coverage", 0) < 0.5:
            recommendations.append("Low citation coverage. Add citations for all factual claims and data points.")
        
        # General recommendations
        if not recommendations:
            recommendations.append("Content quality is good. Consider adding more specific examples or expanding on key points.")
        
        return recommendations
    
    def _get_timestamp(self) -> str:
        """Get current timestamp for analysis tracking."""
        from datetime import datetime
        return datetime.utcnow().isoformat()
    
    def track_quality_over_time(
        self, 
        content_id: str, 
        quality_metrics: Dict[str, Any]
    ) -> Dict[str, Any]:
        """
        Track content quality metrics over time for analysis.
        
        Args:
            content_id: Unique identifier for the content
            quality_metrics: Quality analysis results
            
        Returns:
            Tracking information and trends
        """
        # This would typically integrate with a database or analytics system
        # For now, we'll return the tracking structure
        
        tracking_data = {
            "content_id": content_id,
            "timestamp": quality_metrics.get("analysis_timestamp"),
            "overall_score": quality_metrics.get("overall_score", 0.0),
            "metrics": quality_metrics.get("metrics", {}),
            "content_length": quality_metrics.get("content_length", 0),
            "word_count": quality_metrics.get("word_count", 0)
        }
        
        logger.info(f"Quality metrics tracked for content {content_id}: {tracking_data['overall_score']}")
        
        return {
            "tracked": True,
            "tracking_data": tracking_data,
            "message": f"Quality metrics tracked for content {content_id}"
        }
    
    def compare_content_quality(
        self, 
        content_a: Dict[str, Any], 
        content_b: Dict[str, Any]
    ) -> Dict[str, Any]:
        """
        Compare quality between two pieces of content.
        
        Args:
            content_a: Quality metrics for first content piece
            content_b: Quality metrics for second content piece
            
        Returns:
            Comparison analysis and recommendations
        """
        comparison = {
            "content_a_score": content_a.get("overall_score", 0.0),
            "content_b_score": content_b.get("overall_score", 0.0),
            "score_difference": 0.0,
            "better_content": "content_a",
            "improvement_areas": [],
            "strength_areas": []
        }
        
        # Calculate score difference
        score_a = content_a.get("overall_score", 0.0)
        score_b = content_b.get("overall_score", 0.0)
        comparison["score_difference"] = round(abs(score_a - score_b), 3)
        
        # Determine better content
        if score_a > score_b:
            comparison["better_content"] = "content_a"
            better_metrics = content_a.get("metrics", {})
            worse_metrics = content_b.get("metrics", {})
        else:
            comparison["better_content"] = "content_b"
            better_metrics = content_b.get("metrics", {})
            worse_metrics = content_a.get("metrics", {})
        
        # Identify improvement areas
        for metric_name in better_metrics:
            if metric_name in worse_metrics:
                if worse_metrics[metric_name] < better_metrics[metric_name] - 0.2:
                    comparison["improvement_areas"].append(f"Improve {metric_name.replace('_', ' ')}")
        
        # Identify strength areas
        for metric_name in better_metrics:
            if better_metrics[metric_name] > 0.8:
                comparison["strength_areas"].append(f"Strong {metric_name.replace('_', ' ')}")
        
        return comparison
    
    def generate_quality_report(
        self, 
        content: str, 
        sources: List[Any], 
        industry: str = "general"
    ) -> Dict[str, Any]:
        """
        Generate a comprehensive quality report for content.
        
        Args:
            content: The content to analyze
            sources: Research sources used (can be Dict or ResearchSource objects)
            industry: Target industry
            
        Returns:
            Comprehensive quality report
        """
        # Perform full quality analysis
        quality_analysis = self.analyze_content_quality(content, sources, industry)
        
        # Generate detailed report
        report = {
            "summary": {
                "overall_score": quality_analysis["overall_score"],
                "quality_level": self._get_quality_level(quality_analysis["overall_score"]),
                "content_length": quality_analysis["content_length"],
                "word_count": quality_analysis["word_count"]
            },
            "detailed_metrics": quality_analysis["metrics"],
            "recommendations": quality_analysis["recommendations"],
            "source_analysis": {
                "total_sources": len(sources),
                "source_types": self._extract_source_types(sources),
                "avg_credibility": self._calculate_avg_score(sources, "credibility_score"),
                "avg_relevance": self._calculate_avg_score(sources, "relevance_score")
            },
            "improvement_plan": self._generate_improvement_plan(quality_analysis["metrics"]),
            "analysis_timestamp": quality_analysis["analysis_timestamp"]
        }
        
        return report
    
    def _get_quality_level(self, score: float) -> str:
        """Convert numerical score to quality level description."""
        if score >= 0.9:
            return "Excellent"
        elif score >= 0.8:
            return "Very Good"
        elif score >= 0.7:
            return "Good"
        elif score >= 0.6:
            return "Fair"
        elif score >= 0.5:
            return "Below Average"
        else:
            return "Poor"
    
    def _generate_improvement_plan(self, metrics: Dict[str, float]) -> Dict[str, Any]:
        """
        Generate a structured improvement plan based on quality metrics.
        
        Args:
            metrics: Quality metrics dictionary
            
        Returns:
            Structured improvement plan
        """
        improvement_plan = {
            "priority_high": [],
            "priority_medium": [],
            "priority_low": [],
            "estimated_effort": "medium"
        }
        
        # Categorize improvements by priority
        for metric_name, score in metrics.items():
            if score < 0.4:
                improvement_plan["priority_high"].append(f"Significantly improve {metric_name.replace('_', ' ')}")
            elif score < 0.6:
                improvement_plan["priority_medium"].append(f"Improve {metric_name.replace('_', ' ')}")
            elif score < 0.8:
                improvement_plan["priority_low"].append(f"Enhance {metric_name.replace('_', ' ')}")
        
        # Estimate effort based on number of high-priority items
        high_priority_count = len(improvement_plan["priority_high"])
        if high_priority_count >= 3:
            improvement_plan["estimated_effort"] = "high"
        elif high_priority_count >= 1:
            improvement_plan["estimated_effort"] = "medium"
        else:
            improvement_plan["estimated_effort"] = "low"
        
        return improvement_plan
    
    def _extract_source_types(self, sources: List[Any]) -> List[str]:
        """Extract source types from sources, handling both Dict and ResearchSource objects."""
        source_types = set()
        for s in sources:
            if hasattr(s, 'source_type'):
                # ResearchSource Pydantic model
                source_types.add(s.source_type or "general")
            else:
                # Dictionary object
                source_types.add(s.get("source_type", "general"))
        return list(source_types)
    
    def _calculate_avg_score(self, sources: List[Any], score_field: str) -> float:
        """Calculate average score from sources, handling both Dict and ResearchSource objects."""
        if not sources:
            return 0.0
        
        total_score = 0.0
        valid_sources = 0
        
        for s in sources:
            if hasattr(s, score_field):
                # ResearchSource Pydantic model
                score = getattr(s, score_field)
                if score is not None:
                    total_score += score
                    valid_sources += 1
            else:
                # Dictionary object
                score = s.get(score_field, 0)
                if score:
                    total_score += score
                    valid_sources += 1
        
        return total_score / valid_sources if valid_sources > 0 else 0.0