Added enhanced linguistic analyzer and persona quality improver
This commit is contained in:
42
backend/services/blog_writer/content/source_url_manager.py
Normal file
42
backend/services/blog_writer/content/source_url_manager.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""
|
||||
SourceURLManager - selects the most relevant source URLs for a section.
|
||||
|
||||
Low-effort heuristic using keywords and titles; safe defaults if no research.
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any
|
||||
|
||||
|
||||
class SourceURLManager:
|
||||
def pick_relevant_urls(self, section: Any, research: Any, limit: int = 5) -> List[str]:
|
||||
if not research or not getattr(research, 'sources', None):
|
||||
return []
|
||||
|
||||
section_keywords = set([k.lower() for k in getattr(section, 'keywords', [])])
|
||||
scored: List[tuple[float, str]] = []
|
||||
for s in research.sources:
|
||||
url = getattr(s, 'url', None) or getattr(s, 'uri', None) or s.get('url') if isinstance(s, dict) else None
|
||||
title = getattr(s, 'title', None) or s.get('title') if isinstance(s, dict) else ''
|
||||
if not url or not isinstance(url, str):
|
||||
continue
|
||||
title_l = (title or '').lower()
|
||||
# simple overlap score
|
||||
score = 0.0
|
||||
for kw in section_keywords:
|
||||
if kw and kw in title_l:
|
||||
score += 1.0
|
||||
# prefer https and reputable domains lightly
|
||||
if url.startswith('https://'):
|
||||
score += 0.2
|
||||
scored.append((score, url))
|
||||
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
dedup: List[str] = []
|
||||
for _, u in scored:
|
||||
if u not in dedup:
|
||||
dedup.append(u)
|
||||
if len(dedup) >= limit:
|
||||
break
|
||||
return dedup
|
||||
|
||||
|
||||
Reference in New Issue
Block a user