Import 9 alphaear finance skills

- alphaear-deepear-lite: DeepEar Lite API integration - alphaear-logic-visualizer: Draw.io XML finance diagrams - alphaear-news: Real-time finance news (10+ sources) - alphaear-predictor: Kronos time-series forecasting - alphaear-reporter: Professional financial reports - alphaear-search: Web search + local RAG - alphaear-sentiment: FinBERT/LLM sentiment analysis - alphaear-signal-tracker: Signal evolution tracking - alphaear-stock: A-Share/HK/US stock data Updates: - All scripts updated to use universal .env path - Added JINA_API_KEY, LLM_*, DEEPSEEK_API_KEY to .env.example - Updated load_dotenv() to use ~/.config/opencode/.env
2026-03-27 10:11:37 +07:00
parent 7edf5bc4d0
commit 58f9380ec4
149 changed files with 26867 additions and 0 deletions
--- a/skills/alphaear-news/scripts/init.py
+++ b/skills/alphaear-news/scripts/init.py
--- a/skills/alphaear-news/scripts/content_extractor.py
+++ b/skills/alphaear-news/scripts/content_extractor.py
@@ -0,0 +1,122 @@
+import requests
+from requests.exceptions import RequestException, Timeout, ConnectionError
+import os
+import time
+import json
+import threading
+from typing import Optional
+from loguru import logger
+
+
+class ContentExtractor:
+    """内容提取工具 - 主要接入 Jina Reader API"""
+    
+    JINA_BASE_URL = "https://r.jina.ai/"
+    
+    # 速率限制配置 (无 API Key 时：20 次/分钟)
+    _rate_limit_no_key = 20  # 每分钟最大请求数
+    _rate_window = 60.0  # 时间窗口（秒）
+    _min_interval = 3.0  # 请求最小间隔（秒）
+    
+    # 类级别的速率限制状态
+    _request_times = []
+    _last_request_time = 0.0
+    _lock = threading.Lock()
+
+    @classmethod
+    def _wait_for_rate_limit(cls, has_api_key: bool) -> None:
+        """等待以满足速率限制要求"""
+        if has_api_key:
+            # 有 API Key 时，只需保持最小间隔
+            time.sleep(0.5)
+            return
+        
+        with cls._lock:
+            current_time = time.time()
+            
+            # 1. 清理过期的请求记录
+            cls._request_times = [t for t in cls._request_times if current_time - t < cls._rate_window]
+            
+            # 2. 检查是否达到速率限制
+            if len(cls._request_times) >= cls._rate_limit_no_key:
+                # 需要等待最旧的请求过期
+                oldest = cls._request_times[0]
+                wait_time = cls._rate_window - (current_time - oldest) + 1.0
+                if wait_time > 0:
+                    logger.warning(f"⏳ Jina rate limit reached, waiting {wait_time:.1f}s...")
+                    time.sleep(wait_time)
+                    current_time = time.time()
+                    cls._request_times = [t for t in cls._request_times if current_time - t < cls._rate_window]
+            
+            # 3. 确保请求间隔不太快
+            time_since_last = current_time - cls._last_request_time
+            if time_since_last < cls._min_interval:
+                sleep_time = cls._min_interval - time_since_last
+                time.sleep(sleep_time)
+            
+            # 4. 记录本次请求
+            cls._request_times.append(time.time())
+            cls._last_request_time = time.time()
+
+    @classmethod
+    def extract_with_jina(cls, url: str, timeout: int = 30) -> Optional[str]:
+        """
+        使用 Jina Reader 提取网页正文内容 (Markdown 格式)
+        
+        无 API Key 时自动限速：每分钟最多 20 次请求，每次间隔至少 3 秒
+        """
+        if not url or not url.startswith("http"):
+            return None
+            
+        logger.info(f"🕸️ Extracting content from: {url} via Jina...")
+        
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+            "Accept": "application/json"
+        }
+        
+        # 使用统一的 JINA_API_KEY
+        api_key = os.getenv("JINA_API_KEY")
+        has_api_key = bool(api_key and api_key.strip())
+        
+        if has_api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+        
+        # 等待速率限制
+        cls._wait_for_rate_limit(has_api_key)
+
+        try:
+            # Jina Reader API
+            full_url = f"{cls.JINA_BASE_URL}{url}"
+            response = requests.get(full_url, headers=headers, timeout=timeout)
+            
+            if response.status_code == 200:
+                try:
+                    data = response.json()
+                    # Jina JSON 响应格式通常在 data.content
+                    if isinstance(data, dict) and "data" in data:
+                        return data["data"].get("content", "")
+                    return data.get("content", response.text)
+                except (json.JSONDecodeError, TypeError):
+                    return response.text
+            elif response.status_code == 429:
+                # 触发速率限制，等待后重试一次
+                logger.warning(f"⚠️ Jina rate limit (429), waiting 60s before retry...")
+                time.sleep(60)
+                return cls.extract_with_jina(url, timeout)
+            else:
+                logger.warning(f"Jina extraction failed (Status {response.status_code}) for {url}")
+                return None
+                
+        except Timeout:
+            logger.error(f"Timeout during Jina extraction for {url}")
+            return None
+        except ConnectionError:
+            logger.error(f"Connection error during Jina extraction for {url}")
+            return None
+        except RequestException as e:
+            logger.error(f"Request error during Jina extraction: {e}")
+            return None
+        except Exception as e:
+            logger.error(f"Unexpected error during Jina extraction: {e}")
+            return None
--- a/skills/alphaear-news/scripts/database_manager.py
+++ b/skills/alphaear-news/scripts/database_manager.py
@@ -0,0 +1,131 @@
+import sqlite3
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import List, Dict, Optional
+from loguru import logger
+
+class DatabaseManager:
+    """
+    AlphaEar News Database Manager
+    Reduced version for alphaear-news skill
+    """
+    
+    def __init__(self, db_path: str = "data/signal_flux.db"):
+        self.db_path = Path(db_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
+        self.conn.row_factory = sqlite3.Row
+        self._init_db()
+        logger.debug(f"💾 Database initialized at {self.db_path}")
+
+    def _init_db(self):
+        """Initialize news-related tables only"""
+        cursor = self.conn.cursor()
+        
+        # Daily News Table
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS daily_news (
+                id TEXT PRIMARY KEY,
+                source TEXT,
+                rank INTEGER,
+                title TEXT,
+                url TEXT,
+                content TEXT,
+                publish_time TEXT,
+                crawl_time TEXT,
+                sentiment_score REAL,
+                analysis TEXT,
+                meta_data TEXT
+            )
+        """)
+        
+        # Indexes
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_news_crawl_time ON daily_news(crawl_time)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_news_source ON daily_news(source)")
+        
+        self.conn.commit()
+
+    # --- News Operations ---
+    
+    def save_daily_news(self, news_list: List[Dict]) -> int:
+        """Save hot news items"""
+        cursor = self.conn.cursor()
+        count = 0
+        crawl_time = datetime.now().isoformat()
+        
+        for news in news_list:
+            try:
+                news_id = news.get('id') or f"{news.get('source')}_{news.get('rank')}_{crawl_time[:10]}"
+                cursor.execute("""
+                    INSERT OR REPLACE INTO daily_news 
+                    (id, source, rank, title, url, content, publish_time, crawl_time, sentiment_score, meta_data)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """, (
+                    news_id,
+                    news.get('source'),
+                    news.get('rank'),
+                    news.get('title'),
+                    news.get('url'),
+                    news.get('content', ''),
+                    news.get('publish_time'),
+                    crawl_time,
+                    news.get('sentiment_score'),
+                    json.dumps(news.get('meta_data', {}))
+                ))
+                count += 1
+            except Exception as e:
+                logger.error(f"Error saving news item {news.get('title')}: {e}")
+        
+        self.conn.commit()
+        return count
+
+    def get_daily_news(self, source: Optional[str] = None, limit: int = 100, days: int = 1) -> List[Dict]:
+        """Get recent news"""
+        cursor = self.conn.cursor()
+        time_threshold = (datetime.now().timestamp() - days * 86400)
+        time_threshold_str = datetime.fromtimestamp(time_threshold).isoformat()
+        
+        query = "SELECT * FROM daily_news WHERE crawl_time >= ?"
+        params = [time_threshold_str]
+        
+        if source:
+            query += " AND source = ?"
+            params.append(source)
+            
+        query += " ORDER BY crawl_time DESC, rank LIMIT ?"
+        params.append(limit)
+        
+        cursor.execute(query, params)
+        return [dict(row) for row in cursor.fetchall()]
+
+    def delete_news(self, news_id: str) -> bool:
+        cursor = self.conn.cursor()
+        cursor.execute("DELETE FROM daily_news WHERE id = ?", (news_id,))
+        self.conn.commit()
+        return cursor.rowcount > 0
+    
+    def update_news_content(self, news_id: str, content: str = None, analysis: str = None) -> bool:
+        cursor = self.conn.cursor()
+        updates = []
+        params = []
+        
+        if content is not None:
+            updates.append("content = ?")
+            params.append(content)
+        if analysis is not None:
+            updates.append("analysis = ?")
+            params.append(analysis)
+            
+        if not updates:
+            return False
+            
+        params.append(news_id)
+        query = f"UPDATE daily_news SET {', '.join(updates)} WHERE id = ?"
+        cursor.execute(query, params)
+        self.conn.commit()
+        return cursor.rowcount > 0
+
+    def close(self):
+        if self.conn:
+            self.conn.close()
--- a/skills/alphaear-news/scripts/news_tools.py
+++ b/skills/alphaear-news/scripts/news_tools.py
@@ -0,0 +1,256 @@
+import requests
+from requests.exceptions import RequestException, Timeout
+import json
+import time
+from datetime import datetime
+from typing import List, Dict, Optional
+from loguru import logger
+from .database_manager import DatabaseManager
+from .content_extractor import ContentExtractor
+
+class NewsNowTools:
+    """热点新闻获取工具 - 接入 NewsNow API 与 Jina 内容提取"""
+    
+    BASE_URL = "https://newsnow.busiyi.world"
+    SOURCES = {
+        # 金融类
+        "cls": "财联社",
+        "wallstreetcn": "华尔街见闻",
+        "xueqiu": "雪球热榜",
+        # 综合/社交
+        "weibo": "微博热搜",
+        "zhihu": "知乎热榜",
+        "baidu": "百度热搜",
+        "toutiao": "今日头条",
+        "douyin": "抖音热榜",
+        "thepaper": "澎湃新闻",
+        # 科技类
+        "36kr": "36氪",
+        "ithome": "IT之家",
+        "v2ex": "V2EX",
+        "juejin": "掘金",
+        "hackernews": "Hacker News",
+    }
+
+
+    def __init__(self, db: DatabaseManager):
+        self.db = db
+        self.user_agent = (
+            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+        )
+        self.extractor = ContentExtractor()
+        # Simple in-memory cache: source_id -> {"time": timestamp, "data": []}
+        self._cache = {}
+
+    def fetch_hot_news(self, source_id: str, count: int = 15, fetch_content: bool = False) -> List[Dict]:
+        """
+        从指定新闻源获取热点新闻列表（支持5分钟缓存）。
+        """
+        # 1. Check cache validity (5 minutes)
+        cache_key = f"{source_id}_{count}"
+        cached = self._cache.get(cache_key)
+        now = time.time()
+        
+        if cached and (now - cached["time"] < 300):
+            logger.info(f"⚡ Using cached news for {source_id} (Age: {int(now - cached['time'])}s)")
+            return cached["data"]
+
+        try:
+            url = f"{self.BASE_URL}/api/s?id={source_id}"
+            response = requests.get(url, headers={"User-Agent": self.user_agent}, timeout=30)
+            if response.status_code == 200:
+                data = response.json()
+                items = data.get("items", [])[:count]
+                processed_items = []
+                for i, item in enumerate(items, 1):
+                    item_url = item.get("url", "")
+                    content = ""
+                    if fetch_content and item_url:
+                        content = self.extractor.extract_with_jina(item_url) or ""
+                    
+                    processed_items.append({
+                        "id": item.get("id") or f"{source_id}_{int(time.time())}_{i}",
+                        "source": source_id,
+                        "rank": i,
+                        "title": item.get("title", ""),
+                        "url": item_url,
+                        "content": content,
+                        "publish_time": item.get("publish_time"),
+                        "meta_data": item.get("extra", {})
+                    })
+                
+                # Update Cache
+                self._cache[cache_key] = {"time": now, "data": processed_items}
+                logger.info(f"✅ Fetched and cached news for {source_id}")
+                
+                self.db.save_daily_news(processed_items)
+                return processed_items
+            else:
+                logger.error(f"NewsNow API Error: {response.status_code}")
+                # Fallback to stale cache if available
+                if cached:
+                    logger.warning(f"⚠️ API failed, using stale cache for {source_id}")
+                    return cached["data"]
+                return []
+        except Timeout:
+            logger.error(f"Timeout fetching hot news from {source_id}")
+            if cached:
+                logger.warning(f"⚠️ Timeout, using stale cache for {source_id}")
+                return cached["data"]
+            return []
+        except RequestException as e:
+            logger.error(f"Network error fetching hot news from {source_id}: {e}")
+            if cached:
+                 logger.warning(f"⚠️ Network check failed, using stale cache for {source_id}")
+                 return cached["data"]
+            return []
+        except json.JSONDecodeError:
+            logger.error(f"Failed to parse JSON response from NewsNow for {source_id}")
+            return []
+        except Exception as e:
+            logger.error(f"Unexpected error fetching hot news from {source_id}: {e}")
+            return []
+
+    def fetch_news_content(self, url: str) -> Optional[str]:
+        """
+        使用 Jina Reader 抓取指定 URL 的网页正文内容。
+        
+        Args:
+            url: 需要抓取内容的完整网页 URL，必须以 http:// 或 https:// 开头。
+        
+        Returns:
+            提取的网页正文内容 (Markdown 格式)，如果失败则返回 None。
+        """
+        return self.extractor.extract_with_jina(url)
+
+    def get_unified_trends(self, sources: Optional[List[str]] = None) -> str:
+        """
+        获取多平台综合热点报告，自动聚合多个新闻源的热门内容。
+        
+        Args:
+            sources: 要扫描的新闻源列表。可选值按类别:
+                **金融类**: "cls", "wallstreetcn", "xueqiu"
+                **综合类**: "weibo", "zhihu", "baidu", "toutiao", "douyin", "thepaper"
+                **科技类**: "36kr", "ithome", "v2ex", "juejin", "hackernews"
+        
+        Returns:
+            格式化的 Markdown 热点汇总报告，包含各平台 Top 10 热点标题和链接。
+        """
+        sources = sources or ["weibo", "zhihu", "wallstreetcn"]
+        all_news = []
+        for src in sources:
+            all_news.extend(self.fetch_hot_news(src))
+            time.sleep(0.2)
+        
+        if not all_news:
+            return "❌ 未能获取到热点数据"
+            
+        report = f"# 实时全网热点汇总 ({datetime.now().strftime('%Y-%m-%d %H:%M')})\n\n"
+        for src in sources:
+
+            src_name = self.SOURCES.get(src, src)
+            report += f"### 🔥 {src_name}\n"
+            src_news = [n for n in all_news if n['source'] == src]
+            for n in src_news[:10]:
+                report += f"- {n['title']} ([链接]({n['url']}))\n"
+            report += "\n"
+            
+        return report
+
+
+class PolymarketTools:
+    """Polymarket 预测市场数据工具 - 获取热门预测市场反映公众情绪和预期"""
+    
+    BASE_URL = "https://gamma-api.polymarket.com"
+    
+    def __init__(self, db: DatabaseManager):
+        self.db = db
+        self.user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
+    
+    def get_active_markets(self, limit: int = 20) -> List[Dict]:
+        """
+        获取活跃的预测市场，用于分析公众情绪和预期。
+        
+        预测市场数据可以反映:
+        - 公众对重大事件的预期概率
+        - 市场情绪和风险偏好
+        - 热门话题的关注度
+        
+        Args:
+            limit: 获取的市场数量，默认 20 个。
+        
+        Returns:
+            包含预测市场信息的列表，每个市场包含:
+            - question: 预测问题
+            - outcomes: 可能的结果
+            - outcomePrices: 各结果的概率价格
+            - volume: 交易量
+        """
+        try:
+            response = requests.get(
+                f"{self.BASE_URL}/markets",
+                params={"active": "true", "closed": "false", "limit": limit},
+                headers={"User-Agent": self.user_agent, "Accept": "application/json"},
+                timeout=30
+            )
+            
+            if response.status_code == 200:
+                markets = response.json()
+                result = []
+                for m in markets:
+                    result.append({
+                        "id": m.get("id"),
+                        "question": m.get("question"),
+                        "slug": m.get("slug"),
+                        "outcomes": m.get("outcomes"),
+                        "outcomePrices": m.get("outcomePrices"),
+                        "volume": m.get("volume"),
+                        "liquidity": m.get("liquidity"),
+                    })
+                logger.info(f"✅ 获取 {len(result)} 个预测市场")
+                return result
+            else:
+                logger.warning(f"⚠️ Polymarket API 返回 {response.status_code}")
+                return []
+        except Timeout:
+            logger.error("Timeout fetching Polymarket markets")
+            return []
+        except RequestException as e:
+            logger.error(f"Network error fetching Polymarket markets: {e}")
+            return []
+        except json.JSONDecodeError:
+            logger.error("Failed to parse JSON response from Polymarket")
+            return []
+        except Exception as e:
+            logger.error(f"Unexpected error fetching Polymarket markets: {e}")
+            return []
+    
+    def get_market_summary(self, limit: int = 10) -> str:
+        """
+        获取预测市场摘要报告，用于了解当前热门话题和公众预期。
+        
+        Args:
+            limit: 获取的市场数量
+            
+        Returns:
+            格式化的预测市场报告
+        """
+        markets = self.get_active_markets(limit)
+        if not markets:
+            return "❌ 无法获取 Polymarket 数据"
+        
+        report = f"# 🔮 Polymarket 热门预测 ({datetime.now().strftime('%Y-%m-%d %H:%M')})\n\n"
+        for i, m in enumerate(markets, 1):
+            question = m.get("question", "Unknown")
+            prices = m.get("outcomePrices", [])
+            volume = m.get("volume", 0)
+            
+            report += f"**{i}. {question}**\n"
+            if prices:
+                report += f"   概率: {prices}\n"
+            if volume:
+                report += f"   交易量: ${float(volume):,.0f}\n"
+            report += "\n"
+        
+        return report