Base code

This commit is contained in:
Kunthawat Greethong
2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions

View File

@@ -0,0 +1,755 @@
"""
Content Quality Analyzer Service for ALwrity
This service provides comprehensive quality assessment for generated content,
evaluating factual accuracy, source verification, professional tone, and industry relevance.
Key Features:
- Factual accuracy scoring against source verification
- Professional tone analysis for enterprise content
- Industry relevance metrics and assessment
- Overall quality scoring and recommendations
- Content quality tracking over time
Dependencies:
- re (for pattern matching)
- typing (for type hints)
- logging (for debugging)
Author: ALwrity Team
Version: 1.0
Last Updated: January 2025
"""
import re
from typing import Dict, List, Optional, Any, Tuple
from loguru import logger
class ContentQualityAnalyzer:
"""
Service for analyzing and scoring content quality.
This service evaluates content across multiple dimensions including
factual accuracy, professional tone, industry relevance, and overall quality.
"""
def __init__(self):
"""Initialize the Content Quality Analyzer."""
# Professional tone indicators
self.professional_indicators = [
"research", "analysis", "insights", "trends", "strategies",
"implementation", "optimization", "innovation", "development",
"leadership", "expertise", "professional", "industry", "enterprise"
]
# Unprofessional tone indicators
self.unprofessional_indicators = [
"awesome", "amazing", "incredible", "mind-blowing", "crazy",
"totally", "absolutely", "literally", "basically", "actually",
"you know", "like", "um", "uh", "lol", "omg"
]
# Industry-specific terminology patterns
self.industry_terminology = {
"Technology": ["ai", "machine learning", "automation", "digital transformation", "cloud computing"],
"Healthcare": ["patient care", "medical", "treatment", "diagnosis", "healthcare"],
"Finance": ["investment", "market", "financial", "portfolio", "risk management"],
"Marketing": ["brand", "campaign", "audience", "conversion", "engagement"],
"Education": ["learning", "curriculum", "pedagogy", "student", "academic"]
}
logger.info("Content Quality Analyzer initialized successfully")
def analyze_content_quality(
self,
content: str,
sources: List[Dict[str, Any]],
industry: str = "general"
) -> Dict[str, Any]:
"""
Analyze content quality across multiple dimensions.
Args:
content: The content to analyze
sources: List of research sources used
industry: The target industry for relevance assessment
Returns:
Comprehensive quality analysis results
"""
try:
# Analyze different quality aspects
logger.info("🔍 [Quality Analysis] Starting content quality analysis")
logger.info(f"🔍 [Quality Analysis] Content length: {len(content)} characters")
logger.info(f"🔍 [Quality Analysis] Sources count: {len(sources)}")
factual_accuracy = self._assess_factual_accuracy(content, sources)
logger.info(f"🔍 [Quality Analysis] Factual accuracy score: {factual_accuracy}")
source_verification = self._assess_source_verification(content, sources)
logger.info(f"🔍 [Quality Analysis] Source verification score: {source_verification}")
professional_tone = self._assess_professional_tone(content)
logger.info(f"🔍 [Quality Analysis] Professional tone score: {professional_tone}")
industry_relevance = self._assess_industry_relevance(content, industry)
logger.info(f"🔍 [Quality Analysis] Industry relevance score: {industry_relevance}")
citation_coverage = self._assess_citation_coverage(content, sources)
logger.info(f"🔍 [Quality Analysis] Citation coverage score: {citation_coverage}")
# Calculate overall quality score
overall_score = self._calculate_overall_score({
"factual_accuracy": factual_accuracy,
"source_verification": source_verification,
"professional_tone": professional_tone,
"industry_relevance": industry_relevance,
"citation_coverage": citation_coverage
})
logger.info(f"🔍 [Quality Analysis] Overall score calculated: {overall_score}")
# Generate recommendations
recommendations = self._generate_recommendations({
"factual_accuracy": factual_accuracy,
"source_verification": source_verification,
"professional_tone": professional_tone,
"industry_relevance": industry_relevance,
"citation_coverage": citation_coverage
})
logger.info(f"🔍 [Quality Analysis] Generated {len(recommendations)} recommendations")
result = {
"overall_score": overall_score,
"metrics": {
"factual_accuracy": factual_accuracy,
"source_verification": source_verification,
"professional_tone": professional_tone,
"industry_relevance": industry_relevance,
"citation_coverage": citation_coverage
},
"recommendations": recommendations,
"content_length": len(content),
"word_count": len(content.split()),
"analysis_timestamp": self._get_timestamp()
}
logger.info(f"🔍 [Quality Analysis] Final result: {result}")
return result
except Exception as e:
logger.error(f"Content quality analysis failed: {str(e)}")
return {
"overall_score": 0.0,
"error": str(e),
"metrics": {},
"recommendations": ["Content quality analysis failed. Please try again."]
}
def _assess_factual_accuracy(self, content: str, sources: List[Dict[str, Any]]) -> float:
"""
Assess factual accuracy based on source verification.
Args:
content: The content to analyze
sources: Research sources used
Returns:
Factual accuracy score between 0.0 and 1.0
"""
logger.info(f"🔍 [Factual Accuracy] Starting analysis with {len(sources)} sources")
logger.info(f"🔍 [Factual Accuracy] Content length: {len(content)} characters")
if not sources:
logger.warning("🔍 [Factual Accuracy] No sources provided, returning 0.0")
return 0.0
# Look for factual indicators in the content
factual_indicators = [
r'\d+%', r'\d+ percent', # Percentages
r'\$\d+', r'\d+ dollars', # Dollar amounts
r'\d+ million', r'\d+ billion', # Billions
r'research shows', r'studies indicate', r'data reveals',
r'experts say', r'according to', r'statistics show',
r'\d{4}', # Years
r'\d+ organizations', r'\d+ companies', r'\d+ enterprises',
r'AI', r'artificial intelligence', r'machine learning', # Technology terms
r'content creation', r'digital marketing', r'technology industry', # Industry terms
r'efficiency', r'innovation', r'development', r'growth', # Business terms
r'businesses', r'companies', r'organizations', # Entity terms
r'tools', r'platforms', r'systems', r'solutions' # Product terms
]
factual_claims = 0
supported_claims = 0
for pattern in factual_indicators:
matches = re.findall(pattern, content, re.IGNORECASE)
if matches:
logger.info(f"🔍 [Factual Accuracy] Pattern {pattern} found {len(matches)} matches: {matches}")
factual_claims += len(matches)
# Check if claims are near citations
for match in matches:
if self._is_claim_supported(match, content, sources):
supported_claims += 1
logger.info(f"🔍 [Factual Accuracy] Total factual claims: {factual_claims}")
logger.info(f"🔍 [Factual Accuracy] Supported claims: {supported_claims}")
# Calculate accuracy score - be more lenient
if factual_claims == 0:
logger.info("🔍 [Factual Accuracy] No factual claims to verify, returning 0.8")
return 0.8 # No factual claims to verify
# Base accuracy score
accuracy_score = supported_claims / factual_claims
logger.info(f"🔍 [Factual Accuracy] Base accuracy score: {accuracy_score}")
# Boost score if we have good source quality
if sources:
avg_credibility = sum(
(s.credibility_score or 0) if hasattr(s, 'credibility_score') else (s.get("credibility_score", 0) or 0)
for s in sources
) / len(sources)
logger.info(f"🔍 [Factual Accuracy] Average credibility: {avg_credibility}")
# Boost accuracy if sources are credible
if avg_credibility > 0.7:
accuracy_score = min(accuracy_score * 1.3, 1.0)
logger.info(f"🔍 [Factual Accuracy] Applied high credibility boost: {accuracy_score}")
elif avg_credibility > 0.5:
accuracy_score = min(accuracy_score * 1.1, 1.0)
logger.info(f"🔍 [Factual Accuracy] Applied medium credibility boost: {accuracy_score}")
# Boost score if we have multiple sources (diversity)
if len(sources) >= 3:
accuracy_score = min(accuracy_score * 1.2, 1.0)
logger.info(f"🔍 [Factual Accuracy] Applied diversity boost: {accuracy_score}")
final_score = round(min(accuracy_score, 1.0), 3)
logger.info(f"🔍 [Factual Accuracy] Final accuracy score: {final_score}")
return final_score
def _assess_source_verification(self, content: str, sources: List[Dict[str, Any]]) -> float:
"""
Assess source verification quality.
Args:
content: The content to analyze
sources: Research sources used
Returns:
Source verification score between 0.0 and 1.0
"""
if not sources:
return 0.0
# Calculate source quality metrics
total_sources = len(sources)
# Source credibility scores - handle both Dict and ResearchSource objects
credibility_scores = []
relevance_scores = []
domain_scores = []
source_types = set()
for s in sources:
if hasattr(s, 'credibility_score'):
# ResearchSource Pydantic model
credibility_scores.append(s.credibility_score or 0)
relevance_scores.append(s.relevance_score or 0)
domain_scores.append(s.domain_authority or 0)
source_types.add(s.source_type or "general")
else:
# Dictionary object
credibility_scores.append(s.get("credibility_score", 0))
relevance_scores.append(s.get("relevance_score", 0))
domain_scores.append(s.get("domain_authority", 0))
source_types.add(s.get("source_type", "general"))
avg_credibility = sum(credibility_scores) / len(credibility_scores) if credibility_scores else 0
avg_relevance = sum(relevance_scores) / len(relevance_scores) if relevance_scores else 0
avg_domain_authority = sum(domain_scores) / len(domain_scores) if domain_scores else 0
diversity_score = min(len(source_types) / 3, 1.0) # Normalize to 3+ types
# Calculate verification score
verification_score = (
avg_credibility * 0.3 +
avg_relevance * 0.3 +
avg_domain_authority * 0.2 +
diversity_score * 0.2
)
return round(verification_score, 3)
def _assess_professional_tone(self, content: str) -> float:
"""
Assess professional tone appropriateness.
Args:
content: The content to analyze
Returns:
Professional tone score between 0.0 and 1.0
"""
content_lower = content.lower()
# Count professional indicators
professional_count = sum(1 for indicator in self.professional_indicators if indicator in content_lower)
# Count unprofessional indicators
unprofessional_count = sum(1 for indicator in self.unprofessional_indicators if indicator in content_lower)
# Calculate tone score
total_indicators = len(self.professional_indicators) + len(self.unprofessional_indicators)
if total_indicators == 0:
return 0.7 # Neutral score
professional_score = professional_count / len(self.professional_indicators)
unprofessional_penalty = unprofessional_count / len(self.unprofessional_indicators)
tone_score = professional_score - unprofessional_penalty
tone_score = max(0.0, min(1.0, tone_score)) # Clamp between 0 and 1
return round(tone_score, 3)
def _assess_industry_relevance(self, content: str, industry: str) -> float:
"""
Assess industry relevance of the content.
Args:
content: The content to analyze
industry: The target industry
Returns:
Industry relevance score between 0.0 and 1.0
"""
if industry.lower() == "general":
return 0.7 # Neutral score for general industry
content_lower = content.lower()
industry_lower = industry.lower()
# Get industry-specific terminology
industry_terms = self.industry_terminology.get(industry, [])
# Count industry-specific terms
industry_term_count = sum(1 for term in industry_terms if term in content_lower)
# Count industry mentions
industry_mentions = content_lower.count(industry_lower)
# Calculate relevance score
if not industry_terms:
return 0.6 # Fallback score
term_relevance = min(industry_term_count / len(industry_terms), 1.0)
mention_relevance = min(industry_mentions / 3, 1.0) # Normalize to 3+ mentions
relevance_score = (term_relevance * 0.7) + (mention_relevance * 0.3)
return round(relevance_score, 3)
def _assess_citation_coverage(self, content: str, sources: List[Dict[str, Any]]) -> float:
"""
Assess citation coverage in the content.
Args:
content: The content to analyze
sources: Research sources used
Returns:
Citation coverage score between 0.0 and 1.0
"""
logger.info(f"🔍 [Citation Coverage] Starting analysis with {len(sources)} sources")
logger.info(f"🔍 [Citation Coverage] Content length: {len(content)} characters")
# Debug: Show sample of content to see what we're analyzing
content_sample = content[:500] + "..." if len(content) > 500 else content
logger.info(f"🔍 [Citation Coverage] Content sample: {content_sample}")
if not sources:
logger.warning("🔍 [Citation Coverage] No sources provided, returning 0.0")
return 0.0
# Look for citation patterns - updated to match our actual citation format
citation_patterns = [
r'<sup class="liw-cite"[^>]*>\[(\d+)\]</sup>', # HTML format - PRIORITY 1
r'\[(\d+)\]', # Our primary format: [1], [2], etc.
r'\[Source (\d+)\]', r'\(Source (\d+)\)',
r'\((\d+)\)', r'Source (\d+)', r'Ref\. (\d+)', r'Reference (\d+)'
]
total_citations = 0
for pattern in citation_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
if matches:
logger.info(f"🔍 [Citation Coverage] Pattern {pattern} found {len(matches)} matches: {matches}")
total_citations += len(matches)
logger.info(f"🔍 [Citation Coverage] Total citations found: {total_citations}")
# Calculate coverage score - be more lenient since we strategically place citations
expected_citations = min(len(sources), len(sources) * 0.8) # Allow 80% coverage
if expected_citations == 0:
logger.warning("🔍 [Citation Coverage] Expected citations is 0, returning 0.0")
return 0.0
coverage_score = min(total_citations / expected_citations, 1.0)
logger.info(f"🔍 [Citation Coverage] Coverage score before boost: {coverage_score}")
# Boost score if we have good source diversity
if len(sources) >= 3:
coverage_score = min(coverage_score * 1.2, 1.0)
logger.info(f"🔍 [Citation Coverage] Applied diversity boost, final score: {coverage_score}")
final_score = round(coverage_score, 3)
logger.info(f"🔍 [Citation Coverage] Final coverage score: {final_score}")
return final_score
def _is_claim_supported(self, claim: str, content: str, sources: List[Dict[str, Any]]) -> bool:
"""
Check if a factual claim is supported by nearby citations.
Args:
claim: The factual claim to check
content: The content containing the claim
sources: Research sources used
Returns:
True if the claim appears to be supported
"""
# Find the position of the claim
claim_pos = content.lower().find(claim.lower())
if claim_pos == -1:
return False
# Look for citations within 300 characters of the claim (increased range)
start_pos = max(0, claim_pos - 150)
end_pos = min(len(content), claim_pos + len(claim) + 150)
nearby_text = content[start_pos:end_pos]
# Check for citation patterns - updated to match our actual format
citation_patterns = [
r'<sup class="liw-cite"[^>]*>\[(\d+)\]</sup>', # HTML format - PRIORITY 1
r'\[(\d+)\]', # Our primary format: [1], [2], etc.
r'\[Source (\d+)\]', r'\[(\d+)\]', r'\(Source (\d+)\)',
r'\((\d+)\)', r'Source (\d+)', r'Ref\. (\d+)', r'Reference (\d+)'
]
for pattern in citation_patterns:
if re.search(pattern, nearby_text, re.IGNORECASE):
return True
return False
def _calculate_overall_score(self, metrics: Dict[str, float]) -> float:
"""
Calculate overall quality score from individual metrics.
Args:
metrics: Dictionary of quality metrics
Returns:
Overall quality score between 0.0 and 1.0
"""
# Weighted scoring system
weights = {
"factual_accuracy": 0.25,
"source_verification": 0.25,
"professional_tone": 0.20,
"industry_relevance": 0.15,
"citation_coverage": 0.15
}
overall_score = 0.0
total_weight = 0.0
for metric_name, weight in weights.items():
if metric_name in metrics:
overall_score += metrics[metric_name] * weight
total_weight += weight
if total_weight == 0:
return 0.0
final_score = overall_score / total_weight
return round(final_score, 3)
def _generate_recommendations(self, metrics: Dict[str, float]) -> List[str]:
"""
Generate improvement recommendations based on quality metrics.
Args:
metrics: Dictionary of quality metrics
Returns:
List of improvement recommendations
"""
recommendations = []
# Factual accuracy recommendations
if metrics.get("factual_accuracy", 0) < 0.7:
recommendations.append("Improve factual accuracy by ensuring all claims are properly supported by sources.")
if metrics.get("factual_accuracy", 0) < 0.5:
recommendations.append("Significant factual accuracy issues detected. Review and verify all claims against sources.")
# Source verification recommendations
if metrics.get("source_verification", 0) < 0.6:
recommendations.append("Enhance source quality by using more credible and relevant sources.")
if metrics.get("source_verification", 0) < 0.4:
recommendations.append("Low source verification quality. Consider using more authoritative and recent sources.")
# Professional tone recommendations
if metrics.get("professional_tone", 0) < 0.7:
recommendations.append("Improve professional tone by using more industry-appropriate language.")
if metrics.get("professional_tone", 0) < 0.5:
recommendations.append("Content tone needs significant improvement for professional audiences.")
# Industry relevance recommendations
if metrics.get("industry_relevance", 0) < 0.6:
recommendations.append("Increase industry relevance by using more industry-specific terminology and examples.")
if metrics.get("industry_relevance", 0) < 0.4:
recommendations.append("Content lacks industry focus. Add more industry-specific content and context.")
# Citation coverage recommendations
if metrics.get("citation_coverage", 0) < 0.8:
recommendations.append("Improve citation coverage by adding more inline citations throughout the content.")
if metrics.get("citation_coverage", 0) < 0.5:
recommendations.append("Low citation coverage. Add citations for all factual claims and data points.")
# General recommendations
if not recommendations:
recommendations.append("Content quality is good. Consider adding more specific examples or expanding on key points.")
return recommendations
def _get_timestamp(self) -> str:
"""Get current timestamp for analysis tracking."""
from datetime import datetime
return datetime.utcnow().isoformat()
def track_quality_over_time(
self,
content_id: str,
quality_metrics: Dict[str, Any]
) -> Dict[str, Any]:
"""
Track content quality metrics over time for analysis.
Args:
content_id: Unique identifier for the content
quality_metrics: Quality analysis results
Returns:
Tracking information and trends
"""
# This would typically integrate with a database or analytics system
# For now, we'll return the tracking structure
tracking_data = {
"content_id": content_id,
"timestamp": quality_metrics.get("analysis_timestamp"),
"overall_score": quality_metrics.get("overall_score", 0.0),
"metrics": quality_metrics.get("metrics", {}),
"content_length": quality_metrics.get("content_length", 0),
"word_count": quality_metrics.get("word_count", 0)
}
logger.info(f"Quality metrics tracked for content {content_id}: {tracking_data['overall_score']}")
return {
"tracked": True,
"tracking_data": tracking_data,
"message": f"Quality metrics tracked for content {content_id}"
}
def compare_content_quality(
self,
content_a: Dict[str, Any],
content_b: Dict[str, Any]
) -> Dict[str, Any]:
"""
Compare quality between two pieces of content.
Args:
content_a: Quality metrics for first content piece
content_b: Quality metrics for second content piece
Returns:
Comparison analysis and recommendations
"""
comparison = {
"content_a_score": content_a.get("overall_score", 0.0),
"content_b_score": content_b.get("overall_score", 0.0),
"score_difference": 0.0,
"better_content": "content_a",
"improvement_areas": [],
"strength_areas": []
}
# Calculate score difference
score_a = content_a.get("overall_score", 0.0)
score_b = content_b.get("overall_score", 0.0)
comparison["score_difference"] = round(abs(score_a - score_b), 3)
# Determine better content
if score_a > score_b:
comparison["better_content"] = "content_a"
better_metrics = content_a.get("metrics", {})
worse_metrics = content_b.get("metrics", {})
else:
comparison["better_content"] = "content_b"
better_metrics = content_b.get("metrics", {})
worse_metrics = content_a.get("metrics", {})
# Identify improvement areas
for metric_name in better_metrics:
if metric_name in worse_metrics:
if worse_metrics[metric_name] < better_metrics[metric_name] - 0.2:
comparison["improvement_areas"].append(f"Improve {metric_name.replace('_', ' ')}")
# Identify strength areas
for metric_name in better_metrics:
if better_metrics[metric_name] > 0.8:
comparison["strength_areas"].append(f"Strong {metric_name.replace('_', ' ')}")
return comparison
def generate_quality_report(
self,
content: str,
sources: List[Any],
industry: str = "general"
) -> Dict[str, Any]:
"""
Generate a comprehensive quality report for content.
Args:
content: The content to analyze
sources: Research sources used (can be Dict or ResearchSource objects)
industry: Target industry
Returns:
Comprehensive quality report
"""
# Perform full quality analysis
quality_analysis = self.analyze_content_quality(content, sources, industry)
# Generate detailed report
report = {
"summary": {
"overall_score": quality_analysis["overall_score"],
"quality_level": self._get_quality_level(quality_analysis["overall_score"]),
"content_length": quality_analysis["content_length"],
"word_count": quality_analysis["word_count"]
},
"detailed_metrics": quality_analysis["metrics"],
"recommendations": quality_analysis["recommendations"],
"source_analysis": {
"total_sources": len(sources),
"source_types": self._extract_source_types(sources),
"avg_credibility": self._calculate_avg_score(sources, "credibility_score"),
"avg_relevance": self._calculate_avg_score(sources, "relevance_score")
},
"improvement_plan": self._generate_improvement_plan(quality_analysis["metrics"]),
"analysis_timestamp": quality_analysis["analysis_timestamp"]
}
return report
def _get_quality_level(self, score: float) -> str:
"""Convert numerical score to quality level description."""
if score >= 0.9:
return "Excellent"
elif score >= 0.8:
return "Very Good"
elif score >= 0.7:
return "Good"
elif score >= 0.6:
return "Fair"
elif score >= 0.5:
return "Below Average"
else:
return "Poor"
def _generate_improvement_plan(self, metrics: Dict[str, float]) -> Dict[str, Any]:
"""
Generate a structured improvement plan based on quality metrics.
Args:
metrics: Quality metrics dictionary
Returns:
Structured improvement plan
"""
improvement_plan = {
"priority_high": [],
"priority_medium": [],
"priority_low": [],
"estimated_effort": "medium"
}
# Categorize improvements by priority
for metric_name, score in metrics.items():
if score < 0.4:
improvement_plan["priority_high"].append(f"Significantly improve {metric_name.replace('_', ' ')}")
elif score < 0.6:
improvement_plan["priority_medium"].append(f"Improve {metric_name.replace('_', ' ')}")
elif score < 0.8:
improvement_plan["priority_low"].append(f"Enhance {metric_name.replace('_', ' ')}")
# Estimate effort based on number of high-priority items
high_priority_count = len(improvement_plan["priority_high"])
if high_priority_count >= 3:
improvement_plan["estimated_effort"] = "high"
elif high_priority_count >= 1:
improvement_plan["estimated_effort"] = "medium"
else:
improvement_plan["estimated_effort"] = "low"
return improvement_plan
def _extract_source_types(self, sources: List[Any]) -> List[str]:
"""Extract source types from sources, handling both Dict and ResearchSource objects."""
source_types = set()
for s in sources:
if hasattr(s, 'source_type'):
# ResearchSource Pydantic model
source_types.add(s.source_type or "general")
else:
# Dictionary object
source_types.add(s.get("source_type", "general"))
return list(source_types)
def _calculate_avg_score(self, sources: List[Any], score_field: str) -> float:
"""Calculate average score from sources, handling both Dict and ResearchSource objects."""
if not sources:
return 0.0
total_score = 0.0
valid_sources = 0
for s in sources:
if hasattr(s, score_field):
# ResearchSource Pydantic model
score = getattr(s, score_field)
if score is not None:
total_score += score
valid_sources += 1
else:
# Dictionary object
score = s.get(score_field, 0)
if score:
total_score += score
valid_sources += 1
return total_score / valid_sources if valid_sources > 0 else 0.0