Files
ALwrity/backend/services/blog_writer/outline/metadata_collector.py

95 lines
4.2 KiB
Python

"""
Metadata Collector - Handles collection and formatting of outline metadata.
Collects source mapping stats, grounding insights, optimization results, and research coverage.
"""
from typing import Dict, Any, List
from loguru import logger
class MetadataCollector:
"""Handles collection and formatting of various metadata types for UI display."""
def __init__(self):
"""Initialize the metadata collector."""
pass
def collect_source_mapping_stats(self, mapped_sections, research):
"""Collect source mapping statistics for UI display."""
from models.blog_models import SourceMappingStats
total_sources = len(research.sources)
total_mapped = sum(len(section.references) for section in mapped_sections)
coverage_percentage = (total_mapped / total_sources * 100) if total_sources > 0 else 0.0
# Calculate average relevance score (simplified)
all_relevance_scores = []
for section in mapped_sections:
for ref in section.references:
if hasattr(ref, 'credibility_score') and ref.credibility_score:
all_relevance_scores.append(ref.credibility_score)
average_relevance = sum(all_relevance_scores) / len(all_relevance_scores) if all_relevance_scores else 0.0
high_confidence_mappings = sum(1 for score in all_relevance_scores if score >= 0.8)
return SourceMappingStats(
total_sources_mapped=total_mapped,
coverage_percentage=round(coverage_percentage, 1),
average_relevance_score=round(average_relevance, 3),
high_confidence_mappings=high_confidence_mappings
)
def collect_grounding_insights(self, grounding_insights):
"""Collect grounding insights for UI display."""
from models.blog_models import GroundingInsights
return GroundingInsights(
confidence_analysis=grounding_insights.get('confidence_analysis'),
authority_analysis=grounding_insights.get('authority_analysis'),
temporal_analysis=grounding_insights.get('temporal_analysis'),
content_relationships=grounding_insights.get('content_relationships'),
citation_insights=grounding_insights.get('citation_insights'),
search_intent_insights=grounding_insights.get('search_intent_insights'),
quality_indicators=grounding_insights.get('quality_indicators')
)
def collect_optimization_results(self, optimized_sections, focus):
"""Collect optimization results for UI display."""
from models.blog_models import OptimizationResults
# Calculate a quality score based on section completeness
total_sections = len(optimized_sections)
complete_sections = sum(1 for section in optimized_sections
if section.heading and section.subheadings and section.key_points)
quality_score = (complete_sections / total_sections * 10) if total_sections > 0 else 0.0
improvements_made = [
"Enhanced section headings for better SEO",
"Optimized keyword distribution across sections",
"Improved content flow and logical progression",
"Balanced word count distribution",
"Enhanced subheadings for better readability"
]
return OptimizationResults(
overall_quality_score=round(quality_score, 1),
improvements_made=improvements_made,
optimization_focus=focus
)
def collect_research_coverage(self, research):
"""Collect research coverage metrics for UI display."""
from models.blog_models import ResearchCoverage
sources_utilized = len(research.sources)
content_gaps = research.keyword_analysis.get('content_gaps', [])
competitive_advantages = research.competitor_analysis.get('competitive_advantages', [])
return ResearchCoverage(
sources_utilized=sources_utilized,
content_gaps_identified=len(content_gaps),
competitive_advantages=competitive_advantages[:5] # Limit to top 5
)