- alphaear-deepear-lite: DeepEar Lite API integration - alphaear-logic-visualizer: Draw.io XML finance diagrams - alphaear-news: Real-time finance news (10+ sources) - alphaear-predictor: Kronos time-series forecasting - alphaear-reporter: Professional financial reports - alphaear-search: Web search + local RAG - alphaear-sentiment: FinBERT/LLM sentiment analysis - alphaear-signal-tracker: Signal evolution tracking - alphaear-stock: A-Share/HK/US stock data Updates: - All scripts updated to use universal .env path - Added JINA_API_KEY, LLM_*, DEEPSEEK_API_KEY to .env.example - Updated load_dotenv() to use ~/.config/opencode/.env
168 lines
6.7 KiB
Python
168 lines
6.7 KiB
Python
import hashlib
|
||
import json
|
||
import re
|
||
import pandas as pd
|
||
from typing import List, Dict, Any, Optional
|
||
from loguru import logger
|
||
from types import SimpleNamespace
|
||
|
||
from .utils.database_manager import DatabaseManager
|
||
from .utils.json_utils import extract_json
|
||
|
||
class ReportUtils:
|
||
"""
|
||
研报辅助工具集 (ReportUtils)
|
||
提供格式化、引用管理、 JSON 提取等辅助功能。
|
||
核心生成逻辑(聚类、写作)已移交 Agent 执行。
|
||
"""
|
||
|
||
def __init__(self, db: DatabaseManager):
|
||
self.db = db
|
||
logger.info("📝 ReportUtils initialized")
|
||
|
||
@staticmethod
|
||
def _make_cite_key(url: str, title: str = "", source_name: str = "") -> str:
|
||
basis = (url or "").strip() or f"{(title or '').strip()}|{(source_name or '').strip()}"
|
||
digest = hashlib.sha1(basis.encode("utf-8")).hexdigest()[:8]
|
||
return f"SF-{digest}"
|
||
|
||
def build_bibliography(self, signals: List[Any]) -> tuple[list[Dict[str, Any]], Dict[int, list[str]]]:
|
||
"""Build stable bibliography entries and per-signal cite key mapping."""
|
||
bib_by_key: Dict[str, Dict[str, Any]] = {}
|
||
signal_to_keys: Dict[int, list[str]] = {}
|
||
|
||
for sig_idx, signal in enumerate(signals, 1):
|
||
source_items: list[Dict[str, Any]] = []
|
||
|
||
if hasattr(signal, "sources") and getattr(signal, "sources"):
|
||
source_items = list(getattr(signal, "sources") or [])
|
||
elif isinstance(signal, dict) and signal.get("sources"):
|
||
src_list = signal.get("sources")
|
||
if isinstance(src_list, list) and src_list:
|
||
source_items = list(src_list)
|
||
elif isinstance(signal, dict):
|
||
if signal.get("url") or signal.get("title"):
|
||
source_items = [
|
||
{
|
||
"title": signal.get("title"),
|
||
"url": signal.get("url"),
|
||
"source_name": signal.get("source") or signal.get("source_name"),
|
||
"publish_time": signal.get("publish_time"),
|
||
}
|
||
]
|
||
|
||
if not source_items:
|
||
continue
|
||
|
||
for src in source_items:
|
||
url = (src.get("url") or "").strip()
|
||
title = (src.get("title") or "").strip()
|
||
source_name = (src.get("source_name") or src.get("source") or "").strip()
|
||
publish_time = (src.get("publish_time") or "").strip() if isinstance(src.get("publish_time"), str) else src.get("publish_time")
|
||
|
||
key = self._make_cite_key(url=url, title=title, source_name=source_name)
|
||
signal_to_keys.setdefault(sig_idx, [])
|
||
if key not in signal_to_keys[sig_idx]:
|
||
signal_to_keys[sig_idx].append(key)
|
||
|
||
if key in bib_by_key:
|
||
continue
|
||
|
||
# Prefer canonical metadata from DB when possible
|
||
enriched = self.db.lookup_reference_by_url(url) if url else None
|
||
bib_by_key[key] = {
|
||
"key": key,
|
||
"url": url or (enriched.get("url") if enriched else ""),
|
||
"title": (enriched.get("title") if enriched else None) or title or "(无标题)",
|
||
"source": (enriched.get("source") if enriched else None) or source_name or "(未知来源)",
|
||
"publish_time": (enriched.get("publish_time") if enriched else None) or publish_time or "",
|
||
}
|
||
|
||
return list(bib_by_key.values()), signal_to_keys
|
||
|
||
@staticmethod
|
||
def render_references_section(bib_entries: list[Dict[str, Any]]) -> str:
|
||
lines = ["## 参考文献", ""]
|
||
if not bib_entries:
|
||
lines.append("(无)")
|
||
return "\n".join(lines).strip() + "\n"
|
||
|
||
for i, entry in enumerate(bib_entries, 1):
|
||
key = entry.get("key")
|
||
title = entry.get("title") or "(无标题)"
|
||
source = entry.get("source") or "(未知来源)"
|
||
url = entry.get("url") or ""
|
||
publish_time = entry.get("publish_time") or ""
|
||
suffix = ""
|
||
if publish_time:
|
||
suffix = f",{publish_time}"
|
||
label = f"[{i}]"
|
||
if url:
|
||
lines.append(f"<a id=\"ref-{key}\"></a>{label} {title} ({source}{suffix}), {url}")
|
||
else:
|
||
lines.append(f"<a id=\"ref-{key}\"></a>{label} {title} ({source}{suffix})")
|
||
|
||
return "\n".join(lines).strip() + "\n"
|
||
|
||
@staticmethod
|
||
def sanitize_json_chart_blocks(text: str) -> str:
|
||
"""Best-effort repair for malformed json-chart fenced blocks."""
|
||
if not text:
|
||
return text
|
||
# (Simplified logic: if closing ``` is missing, append it)
|
||
# Full logic omitted for brevity as it was complex regex, but retaining simple closure fix
|
||
if "```json-chart" in text and text.count("```") % 2 != 0:
|
||
text += "\n```"
|
||
return text
|
||
|
||
@staticmethod
|
||
def build_structured_report(report_md: str, signals: List[Dict[str, Any]], clusters: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||
"""构建结构化研报输出(便于前端渲染/JSON化)"""
|
||
text = (report_md or "").strip()
|
||
lines = text.splitlines() if text else []
|
||
|
||
title = "研报"
|
||
for line in lines:
|
||
if line.startswith("# "):
|
||
title = line.replace("# ", "").strip()
|
||
break
|
||
|
||
sections: List[Dict[str, Any]] = []
|
||
current: Dict[str, Any] | None = None
|
||
for line in lines:
|
||
heading = re.match(r"^(#{2,4})\s+(.*)$", line.strip())
|
||
if heading:
|
||
if current:
|
||
sections.append(current)
|
||
current = {"title": heading.group(2).strip(), "content": []}
|
||
continue
|
||
if current is None:
|
||
current = {"title": "摘要", "content": []}
|
||
current["content"].append(line)
|
||
if current:
|
||
sections.append(current)
|
||
|
||
bullets = [
|
||
re.sub(r"^[-*•]\s+", "", l.strip())
|
||
for l in lines
|
||
if l.strip().startswith(("- ", "* ", "• "))
|
||
]
|
||
bullets = [b for b in bullets if b]
|
||
|
||
return {
|
||
"title": title,
|
||
"summary_bullets": bullets[:8],
|
||
"sections": [
|
||
{"title": s["title"], "content": "\n".join(s["content"]).strip()}
|
||
for s in sections
|
||
]
|
||
}
|
||
|
||
@staticmethod
|
||
def _clean_ticker(ticker_raw: str) -> str:
|
||
t = (ticker_raw or "").strip()
|
||
if not t:
|
||
return ""
|
||
digits = "".join([c for c in t if c.isdigit()])
|
||
return digits or t
|