opencode-skill/skills/alphaear-sentiment/scripts/sentiment_tools.py

import os
from typing import Dict, List, Union, Optional
import json
from loguru import logger
# IMPORTS REMOVED: agno.agent, get_model
# Internal LLM logic has been removed to delegate analysis to the calling Agent.
from .database_manager import DatabaseManager

# 从环境变量读取默认情绪分析模式
DEFAULT_SENTIMENT_MODE = os.getenv("SENTIMENT_MODE", "auto")  # auto, bert, llm

class SentimentTools:
    """
    情绪分析工具 - 支持 LLM 和 BERT 两种模式

    模式说明:
    - "auto": 自动选择，优先使用 BERT（速度快），不可用时回退到 LLM
    - "bert": 强制使用 BERT 模型（需要 transformers 库）
    - "llm": 强制使用 LLM（更准确但较慢）

    可通过环境变量 SENTIMENT_MODE 设置默认模式。
    """

    def __init__(self, db: DatabaseManager, mode: Optional[str] = None):
        """
        初始化情绪分析工具。

        Args:
            db: 数据库管理器实例
            mode: 分析模式，可选 "auto", "bert", "llm"。None 则使用环境变量默认值。
            model_provider: LLM 提供商，如 "openai", "ust", "deepseek"
            model_id: 模型标识符
        """
        self.db = db
        self.mode = mode or DEFAULT_SENTIMENT_MODE
        self.bert_pipeline = None

        # LLM initialization removed. Agent should perform analysis if needed.

        # Initialize BERT if needed
        if self.mode in ["bert", "auto"]:
            try:
                from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
                from transformers.utils import logging as transformers_logging
                transformers_logging.set_verbosity_error() # 减少冗余日志

                bert_model = os.getenv("BERT_SENTIMENT_MODEL", "uer/roberta-base-finetuned-chinanews-chinese")

                # 优先使用本地缓存
                try:
                    tokenizer = AutoTokenizer.from_pretrained(bert_model, local_files_only=True)
                    model = AutoModelForSequenceClassification.from_pretrained(bert_model, local_files_only=True)

                    self.bert_pipeline = pipeline(
                        "sentiment-analysis",
                        model=model,
                        tokenizer=tokenizer,
                        device=-1
                    )
                    logger.info(f"✅ BERT pipeline loaded from local cache: {bert_model}")
                except (OSError, ValueError, ImportError):
                    # 本地没有，则从网络下载
                    logger.info(f"📡 Downloading BERT model: {bert_model}...")
                    tokenizer = AutoTokenizer.from_pretrained(bert_model)
                    model = AutoModelForSequenceClassification.from_pretrained(bert_model)

                    self.bert_pipeline = pipeline(
                        "sentiment-analysis",
                        model=model,
                        tokenizer=tokenizer,
                        device=-1
                    )
                    logger.info(f"✅ BERT Sentiment pipeline ({bert_model}) initialized.")
            except ImportError:
                logger.warning("Transformers library not installed. BERT sentiment analysis disabled.")
            except Exception as e:
                if self.mode == "bert":
                    logger.error(f"BERT mode requested but failed: {e}")
                else:
                    logger.warning(f"BERT unavailable, using LLM only. Error: {e}")
                self.bert_pipeline = None


    def analyze_sentiment(self, text: str) -> Dict[str, Union[float, str]]:
        """
        分析文本的情绪极性。仅支持 BERT 模式。
        如需 LLM 分析，请 Agent 按照 SKILL.md 中的 Prompt 自行执行。

        Args:
            text: 需要分析的文本内容。

        Returns:
            BERT 分析结果，或错误信息。
        """
        if self.bert_pipeline:
            results = self.analyze_sentiment_bert([text])
            return results[0] if results else {"score": 0.0, "label": "error"}
        else:
            return {
                "score": 0.0,
                "label": "error",
                "reason": "BERT pipeline not initialized. For LLM analysis, please manually execute the prompt in SKILL.md."
            }

    def update_single_news_sentiment(self, news_id: Union[str, int], score: float, reason: str = "") -> bool:
        """
        允许 Agent 将手动分析的结果保存到数据库。

        Args:
            news_id: 新闻 ID
            score: -1.0 到 1.0
            reason: 分析理由

        Returns:
            Success bool
        """
        try:
            cursor = self.db.conn.cursor()
            cursor.execute("""
                UPDATE daily_news
                SET sentiment_score = ?, meta_data = json_set(COALESCE(meta_data, '{}'), '$.sentiment_reason', ?)
                WHERE id = ?
            """, (score, reason, news_id))
            self.db.conn.commit()
            return True
        except Exception as e:
            logger.error(f"Failed to update sentiment for {news_id}: {e}")
            return False

    def analyze_sentiment_bert(self, texts: List[str]) -> List[Dict]:
        """
        使用 BERT 进行批量高速情绪分析。

        Args:
            texts: 需要分析的文本列表。

        Returns:
            与输入列表等长的分析结果列表。
        """
        if not self.bert_pipeline:
            return [{"score": 0.0, "label": "error", "reason": "BERT not available"}] * len(texts)

        try:
            results = self.bert_pipeline(texts, truncation=True, max_length=512)
            processed = []
            for r in results:
                label = r['label'].lower()
                score = r['score']

                # 标准化不同模型的标签格式
                if 'negative' in label or 'neg' in label:
                    score = -score
                elif 'neutral' in label or 'neu' in label:
                    score = 0.0

                processed.append({
                    "score": float(round(score, 3)),
                    "label": "positive" if score > 0.1 else ("negative" if score < -0.1 else "neutral"),
                    "reason": "BERT automated analysis"
                })
            return processed
        except Exception as e:
            logger.error(f"BERT analysis failed: {e}")
            return [{"score": 0.0, "label": "error", "reason": str(e)}] * len(texts)

    def batch_update_news_sentiment(self, source: Optional[str] = None, limit: int = 50, use_bert: Optional[bool] = None):
        """
        批量更新数据库中新闻的情绪分数。

        Args:
            source: 筛选特定新闻源，如 "wallstreetcn"。None 则处理所有来源。
            limit: 最多处理的新闻数量。
            use_bert: 是否使用 BERT。None 则根据初始化模式自动决定。

        Returns:
            成功更新的新闻数量。
        """
        news_items = self.db.get_daily_news(source=source, limit=limit)
        to_analyze = [item for item in news_items if not item.get('sentiment_score')]

        if not to_analyze:
            return 0

        updated_count = 0
        cursor = self.db.conn.cursor()

        # 决定使用哪种方法
        if self.bert_pipeline:
            logger.info(f"🚀 Using BERT for batch analysis of {len(to_analyze)} items...")
            titles = [item['title'] for item in to_analyze]
            results = self.analyze_sentiment_bert(titles)

            for item, analysis in zip(to_analyze, results):
                cursor.execute("""
                    UPDATE daily_news
                    SET sentiment_score = ?, meta_data = json_set(COALESCE(meta_data, '{}'), '$.sentiment_reason', ?)
                    WHERE id = ?
                """, (analysis['score'], analysis['reason'], item['id']))
                updated_count += 1
        else:
            logger.warning("BERT pipeline not available. Batch update skipped. Please use Agentic analysis for high-quality results.")

        self.db.conn.commit()
        return updated_count