Files
opencode-skill/skills/alphaear-reporter/scripts/report_agent.py
Kunthawat Greethong 58f9380ec4 Import 9 alphaear finance skills
- alphaear-deepear-lite: DeepEar Lite API integration
- alphaear-logic-visualizer: Draw.io XML finance diagrams
- alphaear-news: Real-time finance news (10+ sources)
- alphaear-predictor: Kronos time-series forecasting
- alphaear-reporter: Professional financial reports
- alphaear-search: Web search + local RAG
- alphaear-sentiment: FinBERT/LLM sentiment analysis
- alphaear-signal-tracker: Signal evolution tracking
- alphaear-stock: A-Share/HK/US stock data

Updates:
- All scripts updated to use universal .env path
- Added JINA_API_KEY, LLM_*, DEEPSEEK_API_KEY to .env.example
- Updated load_dotenv() to use ~/.config/opencode/.env
2026-03-27 10:11:37 +07:00

168 lines
6.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import hashlib
import json
import re
import pandas as pd
from typing import List, Dict, Any, Optional
from loguru import logger
from types import SimpleNamespace
from .utils.database_manager import DatabaseManager
from .utils.json_utils import extract_json
class ReportUtils:
"""
研报辅助工具集 (ReportUtils)
提供格式化、引用管理、 JSON 提取等辅助功能。
核心生成逻辑(聚类、写作)已移交 Agent 执行。
"""
def __init__(self, db: DatabaseManager):
self.db = db
logger.info("📝 ReportUtils initialized")
@staticmethod
def _make_cite_key(url: str, title: str = "", source_name: str = "") -> str:
basis = (url or "").strip() or f"{(title or '').strip()}|{(source_name or '').strip()}"
digest = hashlib.sha1(basis.encode("utf-8")).hexdigest()[:8]
return f"SF-{digest}"
def build_bibliography(self, signals: List[Any]) -> tuple[list[Dict[str, Any]], Dict[int, list[str]]]:
"""Build stable bibliography entries and per-signal cite key mapping."""
bib_by_key: Dict[str, Dict[str, Any]] = {}
signal_to_keys: Dict[int, list[str]] = {}
for sig_idx, signal in enumerate(signals, 1):
source_items: list[Dict[str, Any]] = []
if hasattr(signal, "sources") and getattr(signal, "sources"):
source_items = list(getattr(signal, "sources") or [])
elif isinstance(signal, dict) and signal.get("sources"):
src_list = signal.get("sources")
if isinstance(src_list, list) and src_list:
source_items = list(src_list)
elif isinstance(signal, dict):
if signal.get("url") or signal.get("title"):
source_items = [
{
"title": signal.get("title"),
"url": signal.get("url"),
"source_name": signal.get("source") or signal.get("source_name"),
"publish_time": signal.get("publish_time"),
}
]
if not source_items:
continue
for src in source_items:
url = (src.get("url") or "").strip()
title = (src.get("title") or "").strip()
source_name = (src.get("source_name") or src.get("source") or "").strip()
publish_time = (src.get("publish_time") or "").strip() if isinstance(src.get("publish_time"), str) else src.get("publish_time")
key = self._make_cite_key(url=url, title=title, source_name=source_name)
signal_to_keys.setdefault(sig_idx, [])
if key not in signal_to_keys[sig_idx]:
signal_to_keys[sig_idx].append(key)
if key in bib_by_key:
continue
# Prefer canonical metadata from DB when possible
enriched = self.db.lookup_reference_by_url(url) if url else None
bib_by_key[key] = {
"key": key,
"url": url or (enriched.get("url") if enriched else ""),
"title": (enriched.get("title") if enriched else None) or title or "(无标题)",
"source": (enriched.get("source") if enriched else None) or source_name or "(未知来源)",
"publish_time": (enriched.get("publish_time") if enriched else None) or publish_time or "",
}
return list(bib_by_key.values()), signal_to_keys
@staticmethod
def render_references_section(bib_entries: list[Dict[str, Any]]) -> str:
lines = ["## 参考文献", ""]
if not bib_entries:
lines.append("(无)")
return "\n".join(lines).strip() + "\n"
for i, entry in enumerate(bib_entries, 1):
key = entry.get("key")
title = entry.get("title") or "(无标题)"
source = entry.get("source") or "(未知来源)"
url = entry.get("url") or ""
publish_time = entry.get("publish_time") or ""
suffix = ""
if publish_time:
suffix = f"{publish_time}"
label = f"[{i}]"
if url:
lines.append(f"<a id=\"ref-{key}\"></a>{label} {title} ({source}{suffix}), {url}")
else:
lines.append(f"<a id=\"ref-{key}\"></a>{label} {title} ({source}{suffix})")
return "\n".join(lines).strip() + "\n"
@staticmethod
def sanitize_json_chart_blocks(text: str) -> str:
"""Best-effort repair for malformed json-chart fenced blocks."""
if not text:
return text
# (Simplified logic: if closing ``` is missing, append it)
# Full logic omitted for brevity as it was complex regex, but retaining simple closure fix
if "```json-chart" in text and text.count("```") % 2 != 0:
text += "\n```"
return text
@staticmethod
def build_structured_report(report_md: str, signals: List[Dict[str, Any]], clusters: List[Dict[str, Any]]) -> Dict[str, Any]:
"""构建结构化研报输出(便于前端渲染/JSON化"""
text = (report_md or "").strip()
lines = text.splitlines() if text else []
title = "研报"
for line in lines:
if line.startswith("# "):
title = line.replace("# ", "").strip()
break
sections: List[Dict[str, Any]] = []
current: Dict[str, Any] | None = None
for line in lines:
heading = re.match(r"^(#{2,4})\s+(.*)$", line.strip())
if heading:
if current:
sections.append(current)
current = {"title": heading.group(2).strip(), "content": []}
continue
if current is None:
current = {"title": "摘要", "content": []}
current["content"].append(line)
if current:
sections.append(current)
bullets = [
re.sub(r"^[-*•]\s+", "", l.strip())
for l in lines
if l.strip().startswith(("- ", "* ", ""))
]
bullets = [b for b in bullets if b]
return {
"title": title,
"summary_bullets": bullets[:8],
"sections": [
{"title": s["title"], "content": "\n".join(s["content"]).strip()}
for s in sections
]
}
@staticmethod
def _clean_ticker(ticker_raw: str) -> str:
t = (ticker_raw or "").strip()
if not t:
return ""
digits = "".join([c for c in t if c.isdigit()])
return digits or t