Files
opencode-skill/skills/alphaear-sentiment/scripts/sentiment_tools.py
Kunthawat Greethong 58f9380ec4 Import 9 alphaear finance skills
- alphaear-deepear-lite: DeepEar Lite API integration
- alphaear-logic-visualizer: Draw.io XML finance diagrams
- alphaear-news: Real-time finance news (10+ sources)
- alphaear-predictor: Kronos time-series forecasting
- alphaear-reporter: Professional financial reports
- alphaear-search: Web search + local RAG
- alphaear-sentiment: FinBERT/LLM sentiment analysis
- alphaear-signal-tracker: Signal evolution tracking
- alphaear-stock: A-Share/HK/US stock data

Updates:
- All scripts updated to use universal .env path
- Added JINA_API_KEY, LLM_*, DEEPSEEK_API_KEY to .env.example
- Updated load_dotenv() to use ~/.config/opencode/.env
2026-03-27 10:11:37 +07:00

206 lines
8.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
from typing import Dict, List, Union, Optional
import json
from loguru import logger
# IMPORTS REMOVED: agno.agent, get_model
# Internal LLM logic has been removed to delegate analysis to the calling Agent.
from .database_manager import DatabaseManager
# 从环境变量读取默认情绪分析模式
DEFAULT_SENTIMENT_MODE = os.getenv("SENTIMENT_MODE", "auto") # auto, bert, llm
class SentimentTools:
"""
情绪分析工具 - 支持 LLM 和 BERT 两种模式
模式说明:
- "auto": 自动选择,优先使用 BERT速度快不可用时回退到 LLM
- "bert": 强制使用 BERT 模型(需要 transformers 库)
- "llm": 强制使用 LLM更准确但较慢
可通过环境变量 SENTIMENT_MODE 设置默认模式。
"""
def __init__(self, db: DatabaseManager, mode: Optional[str] = None):
"""
初始化情绪分析工具。
Args:
db: 数据库管理器实例
mode: 分析模式,可选 "auto", "bert", "llm"。None 则使用环境变量默认值。
model_provider: LLM 提供商,如 "openai", "ust", "deepseek"
model_id: 模型标识符
"""
self.db = db
self.mode = mode or DEFAULT_SENTIMENT_MODE
self.bert_pipeline = None
# LLM initialization removed. Agent should perform analysis if needed.
# Initialize BERT if needed
if self.mode in ["bert", "auto"]:
try:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from transformers.utils import logging as transformers_logging
transformers_logging.set_verbosity_error() # 减少冗余日志
bert_model = os.getenv("BERT_SENTIMENT_MODEL", "uer/roberta-base-finetuned-chinanews-chinese")
# 优先使用本地缓存
try:
tokenizer = AutoTokenizer.from_pretrained(bert_model, local_files_only=True)
model = AutoModelForSequenceClassification.from_pretrained(bert_model, local_files_only=True)
self.bert_pipeline = pipeline(
"sentiment-analysis",
model=model,
tokenizer=tokenizer,
device=-1
)
logger.info(f"✅ BERT pipeline loaded from local cache: {bert_model}")
except (OSError, ValueError, ImportError):
# 本地没有,则从网络下载
logger.info(f"📡 Downloading BERT model: {bert_model}...")
tokenizer = AutoTokenizer.from_pretrained(bert_model)
model = AutoModelForSequenceClassification.from_pretrained(bert_model)
self.bert_pipeline = pipeline(
"sentiment-analysis",
model=model,
tokenizer=tokenizer,
device=-1
)
logger.info(f"✅ BERT Sentiment pipeline ({bert_model}) initialized.")
except ImportError:
logger.warning("Transformers library not installed. BERT sentiment analysis disabled.")
except Exception as e:
if self.mode == "bert":
logger.error(f"BERT mode requested but failed: {e}")
else:
logger.warning(f"BERT unavailable, using LLM only. Error: {e}")
self.bert_pipeline = None
def analyze_sentiment(self, text: str) -> Dict[str, Union[float, str]]:
"""
分析文本的情绪极性。仅支持 BERT 模式。
如需 LLM 分析,请 Agent 按照 SKILL.md 中的 Prompt 自行执行。
Args:
text: 需要分析的文本内容。
Returns:
BERT 分析结果,或错误信息。
"""
if self.bert_pipeline:
results = self.analyze_sentiment_bert([text])
return results[0] if results else {"score": 0.0, "label": "error"}
else:
return {
"score": 0.0,
"label": "error",
"reason": "BERT pipeline not initialized. For LLM analysis, please manually execute the prompt in SKILL.md."
}
def update_single_news_sentiment(self, news_id: Union[str, int], score: float, reason: str = "") -> bool:
"""
允许 Agent 将手动分析的结果保存到数据库。
Args:
news_id: 新闻 ID
score: -1.0 到 1.0
reason: 分析理由
Returns:
Success bool
"""
try:
cursor = self.db.conn.cursor()
cursor.execute("""
UPDATE daily_news
SET sentiment_score = ?, meta_data = json_set(COALESCE(meta_data, '{}'), '$.sentiment_reason', ?)
WHERE id = ?
""", (score, reason, news_id))
self.db.conn.commit()
return True
except Exception as e:
logger.error(f"Failed to update sentiment for {news_id}: {e}")
return False
def analyze_sentiment_bert(self, texts: List[str]) -> List[Dict]:
"""
使用 BERT 进行批量高速情绪分析。
Args:
texts: 需要分析的文本列表。
Returns:
与输入列表等长的分析结果列表。
"""
if not self.bert_pipeline:
return [{"score": 0.0, "label": "error", "reason": "BERT not available"}] * len(texts)
try:
results = self.bert_pipeline(texts, truncation=True, max_length=512)
processed = []
for r in results:
label = r['label'].lower()
score = r['score']
# 标准化不同模型的标签格式
if 'negative' in label or 'neg' in label:
score = -score
elif 'neutral' in label or 'neu' in label:
score = 0.0
processed.append({
"score": float(round(score, 3)),
"label": "positive" if score > 0.1 else ("negative" if score < -0.1 else "neutral"),
"reason": "BERT automated analysis"
})
return processed
except Exception as e:
logger.error(f"BERT analysis failed: {e}")
return [{"score": 0.0, "label": "error", "reason": str(e)}] * len(texts)
def batch_update_news_sentiment(self, source: Optional[str] = None, limit: int = 50, use_bert: Optional[bool] = None):
"""
批量更新数据库中新闻的情绪分数。
Args:
source: 筛选特定新闻源,如 "wallstreetcn"。None 则处理所有来源。
limit: 最多处理的新闻数量。
use_bert: 是否使用 BERT。None 则根据初始化模式自动决定。
Returns:
成功更新的新闻数量。
"""
news_items = self.db.get_daily_news(source=source, limit=limit)
to_analyze = [item for item in news_items if not item.get('sentiment_score')]
if not to_analyze:
return 0
updated_count = 0
cursor = self.db.conn.cursor()
# 决定使用哪种方法
if self.bert_pipeline:
logger.info(f"🚀 Using BERT for batch analysis of {len(to_analyze)} items...")
titles = [item['title'] for item in to_analyze]
results = self.analyze_sentiment_bert(titles)
for item, analysis in zip(to_analyze, results):
cursor.execute("""
UPDATE daily_news
SET sentiment_score = ?, meta_data = json_set(COALESCE(meta_data, '{}'), '$.sentiment_reason', ?)
WHERE id = ?
""", (analysis['score'], analysis['reason'], item['id']))
updated_count += 1
else:
logger.warning("BERT pipeline not available. Batch update skipped. Please use Agentic analysis for high-quality results.")
self.db.conn.commit()
return updated_count