Import 9 alphaear finance skills

- alphaear-deepear-lite: DeepEar Lite API integration
- alphaear-logic-visualizer: Draw.io XML finance diagrams
- alphaear-news: Real-time finance news (10+ sources)
- alphaear-predictor: Kronos time-series forecasting
- alphaear-reporter: Professional financial reports
- alphaear-search: Web search + local RAG
- alphaear-sentiment: FinBERT/LLM sentiment analysis
- alphaear-signal-tracker: Signal evolution tracking
- alphaear-stock: A-Share/HK/US stock data

Updates:
- All scripts updated to use universal .env path
- Added JINA_API_KEY, LLM_*, DEEPSEEK_API_KEY to .env.example
- Updated load_dotenv() to use ~/.config/opencode/.env
This commit is contained in:
Kunthawat Greethong
2026-03-27 10:11:37 +07:00
parent 7edf5bc4d0
commit 58f9380ec4
149 changed files with 26867 additions and 0 deletions

View File

View File

@@ -0,0 +1,122 @@
import requests
from requests.exceptions import RequestException, Timeout, ConnectionError
import os
import time
import json
import threading
from typing import Optional
from loguru import logger
class ContentExtractor:
"""内容提取工具 - 主要接入 Jina Reader API"""
JINA_BASE_URL = "https://r.jina.ai/"
# 速率限制配置 (无 API Key 时20 次/分钟)
_rate_limit_no_key = 20 # 每分钟最大请求数
_rate_window = 60.0 # 时间窗口(秒)
_min_interval = 3.0 # 请求最小间隔(秒)
# 类级别的速率限制状态
_request_times = []
_last_request_time = 0.0
_lock = threading.Lock()
@classmethod
def _wait_for_rate_limit(cls, has_api_key: bool) -> None:
"""等待以满足速率限制要求"""
if has_api_key:
# 有 API Key 时,只需保持最小间隔
time.sleep(0.5)
return
with cls._lock:
current_time = time.time()
# 1. 清理过期的请求记录
cls._request_times = [t for t in cls._request_times if current_time - t < cls._rate_window]
# 2. 检查是否达到速率限制
if len(cls._request_times) >= cls._rate_limit_no_key:
# 需要等待最旧的请求过期
oldest = cls._request_times[0]
wait_time = cls._rate_window - (current_time - oldest) + 1.0
if wait_time > 0:
logger.warning(f"⏳ Jina rate limit reached, waiting {wait_time:.1f}s...")
time.sleep(wait_time)
current_time = time.time()
cls._request_times = [t for t in cls._request_times if current_time - t < cls._rate_window]
# 3. 确保请求间隔不太快
time_since_last = current_time - cls._last_request_time
if time_since_last < cls._min_interval:
sleep_time = cls._min_interval - time_since_last
time.sleep(sleep_time)
# 4. 记录本次请求
cls._request_times.append(time.time())
cls._last_request_time = time.time()
@classmethod
def extract_with_jina(cls, url: str, timeout: int = 30) -> Optional[str]:
"""
使用 Jina Reader 提取网页正文内容 (Markdown 格式)
无 API Key 时自动限速:每分钟最多 20 次请求,每次间隔至少 3 秒
"""
if not url or not url.startswith("http"):
return None
logger.info(f"🕸️ Extracting content from: {url} via Jina...")
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"Accept": "application/json"
}
# 使用统一的 JINA_API_KEY
api_key = os.getenv("JINA_API_KEY")
has_api_key = bool(api_key and api_key.strip())
if has_api_key:
headers["Authorization"] = f"Bearer {api_key}"
# 等待速率限制
cls._wait_for_rate_limit(has_api_key)
try:
# Jina Reader API
full_url = f"{cls.JINA_BASE_URL}{url}"
response = requests.get(full_url, headers=headers, timeout=timeout)
if response.status_code == 200:
try:
data = response.json()
# Jina JSON 响应格式通常在 data.content
if isinstance(data, dict) and "data" in data:
return data["data"].get("content", "")
return data.get("content", response.text)
except (json.JSONDecodeError, TypeError):
return response.text
elif response.status_code == 429:
# 触发速率限制,等待后重试一次
logger.warning(f"⚠️ Jina rate limit (429), waiting 60s before retry...")
time.sleep(60)
return cls.extract_with_jina(url, timeout)
else:
logger.warning(f"Jina extraction failed (Status {response.status_code}) for {url}")
return None
except Timeout:
logger.error(f"Timeout during Jina extraction for {url}")
return None
except ConnectionError:
logger.error(f"Connection error during Jina extraction for {url}")
return None
except RequestException as e:
logger.error(f"Request error during Jina extraction: {e}")
return None
except Exception as e:
logger.error(f"Unexpected error during Jina extraction: {e}")
return None

View File

@@ -0,0 +1,131 @@
import sqlite3
import json
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Optional
from loguru import logger
class DatabaseManager:
"""
AlphaEar News Database Manager
Reduced version for alphaear-news skill
"""
def __init__(self, db_path: str = "data/signal_flux.db"):
self.db_path = Path(db_path)
self.db_path.parent.mkdir(parents=True, exist_ok=True)
self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
self.conn.row_factory = sqlite3.Row
self._init_db()
logger.debug(f"💾 Database initialized at {self.db_path}")
def _init_db(self):
"""Initialize news-related tables only"""
cursor = self.conn.cursor()
# Daily News Table
cursor.execute("""
CREATE TABLE IF NOT EXISTS daily_news (
id TEXT PRIMARY KEY,
source TEXT,
rank INTEGER,
title TEXT,
url TEXT,
content TEXT,
publish_time TEXT,
crawl_time TEXT,
sentiment_score REAL,
analysis TEXT,
meta_data TEXT
)
""")
# Indexes
cursor.execute("CREATE INDEX IF NOT EXISTS idx_news_crawl_time ON daily_news(crawl_time)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_news_source ON daily_news(source)")
self.conn.commit()
# --- News Operations ---
def save_daily_news(self, news_list: List[Dict]) -> int:
"""Save hot news items"""
cursor = self.conn.cursor()
count = 0
crawl_time = datetime.now().isoformat()
for news in news_list:
try:
news_id = news.get('id') or f"{news.get('source')}_{news.get('rank')}_{crawl_time[:10]}"
cursor.execute("""
INSERT OR REPLACE INTO daily_news
(id, source, rank, title, url, content, publish_time, crawl_time, sentiment_score, meta_data)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
news_id,
news.get('source'),
news.get('rank'),
news.get('title'),
news.get('url'),
news.get('content', ''),
news.get('publish_time'),
crawl_time,
news.get('sentiment_score'),
json.dumps(news.get('meta_data', {}))
))
count += 1
except Exception as e:
logger.error(f"Error saving news item {news.get('title')}: {e}")
self.conn.commit()
return count
def get_daily_news(self, source: Optional[str] = None, limit: int = 100, days: int = 1) -> List[Dict]:
"""Get recent news"""
cursor = self.conn.cursor()
time_threshold = (datetime.now().timestamp() - days * 86400)
time_threshold_str = datetime.fromtimestamp(time_threshold).isoformat()
query = "SELECT * FROM daily_news WHERE crawl_time >= ?"
params = [time_threshold_str]
if source:
query += " AND source = ?"
params.append(source)
query += " ORDER BY crawl_time DESC, rank LIMIT ?"
params.append(limit)
cursor.execute(query, params)
return [dict(row) for row in cursor.fetchall()]
def delete_news(self, news_id: str) -> bool:
cursor = self.conn.cursor()
cursor.execute("DELETE FROM daily_news WHERE id = ?", (news_id,))
self.conn.commit()
return cursor.rowcount > 0
def update_news_content(self, news_id: str, content: str = None, analysis: str = None) -> bool:
cursor = self.conn.cursor()
updates = []
params = []
if content is not None:
updates.append("content = ?")
params.append(content)
if analysis is not None:
updates.append("analysis = ?")
params.append(analysis)
if not updates:
return False
params.append(news_id)
query = f"UPDATE daily_news SET {', '.join(updates)} WHERE id = ?"
cursor.execute(query, params)
self.conn.commit()
return cursor.rowcount > 0
def close(self):
if self.conn:
self.conn.close()

View File

@@ -0,0 +1,256 @@
import requests
from requests.exceptions import RequestException, Timeout
import json
import time
from datetime import datetime
from typing import List, Dict, Optional
from loguru import logger
from .database_manager import DatabaseManager
from .content_extractor import ContentExtractor
class NewsNowTools:
"""热点新闻获取工具 - 接入 NewsNow API 与 Jina 内容提取"""
BASE_URL = "https://newsnow.busiyi.world"
SOURCES = {
# 金融类
"cls": "财联社",
"wallstreetcn": "华尔街见闻",
"xueqiu": "雪球热榜",
# 综合/社交
"weibo": "微博热搜",
"zhihu": "知乎热榜",
"baidu": "百度热搜",
"toutiao": "今日头条",
"douyin": "抖音热榜",
"thepaper": "澎湃新闻",
# 科技类
"36kr": "36氪",
"ithome": "IT之家",
"v2ex": "V2EX",
"juejin": "掘金",
"hackernews": "Hacker News",
}
def __init__(self, db: DatabaseManager):
self.db = db
self.user_agent = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
)
self.extractor = ContentExtractor()
# Simple in-memory cache: source_id -> {"time": timestamp, "data": []}
self._cache = {}
def fetch_hot_news(self, source_id: str, count: int = 15, fetch_content: bool = False) -> List[Dict]:
"""
从指定新闻源获取热点新闻列表支持5分钟缓存
"""
# 1. Check cache validity (5 minutes)
cache_key = f"{source_id}_{count}"
cached = self._cache.get(cache_key)
now = time.time()
if cached and (now - cached["time"] < 300):
logger.info(f"⚡ Using cached news for {source_id} (Age: {int(now - cached['time'])}s)")
return cached["data"]
try:
url = f"{self.BASE_URL}/api/s?id={source_id}"
response = requests.get(url, headers={"User-Agent": self.user_agent}, timeout=30)
if response.status_code == 200:
data = response.json()
items = data.get("items", [])[:count]
processed_items = []
for i, item in enumerate(items, 1):
item_url = item.get("url", "")
content = ""
if fetch_content and item_url:
content = self.extractor.extract_with_jina(item_url) or ""
processed_items.append({
"id": item.get("id") or f"{source_id}_{int(time.time())}_{i}",
"source": source_id,
"rank": i,
"title": item.get("title", ""),
"url": item_url,
"content": content,
"publish_time": item.get("publish_time"),
"meta_data": item.get("extra", {})
})
# Update Cache
self._cache[cache_key] = {"time": now, "data": processed_items}
logger.info(f"✅ Fetched and cached news for {source_id}")
self.db.save_daily_news(processed_items)
return processed_items
else:
logger.error(f"NewsNow API Error: {response.status_code}")
# Fallback to stale cache if available
if cached:
logger.warning(f"⚠️ API failed, using stale cache for {source_id}")
return cached["data"]
return []
except Timeout:
logger.error(f"Timeout fetching hot news from {source_id}")
if cached:
logger.warning(f"⚠️ Timeout, using stale cache for {source_id}")
return cached["data"]
return []
except RequestException as e:
logger.error(f"Network error fetching hot news from {source_id}: {e}")
if cached:
logger.warning(f"⚠️ Network check failed, using stale cache for {source_id}")
return cached["data"]
return []
except json.JSONDecodeError:
logger.error(f"Failed to parse JSON response from NewsNow for {source_id}")
return []
except Exception as e:
logger.error(f"Unexpected error fetching hot news from {source_id}: {e}")
return []
def fetch_news_content(self, url: str) -> Optional[str]:
"""
使用 Jina Reader 抓取指定 URL 的网页正文内容。
Args:
url: 需要抓取内容的完整网页 URL必须以 http:// 或 https:// 开头。
Returns:
提取的网页正文内容 (Markdown 格式),如果失败则返回 None。
"""
return self.extractor.extract_with_jina(url)
def get_unified_trends(self, sources: Optional[List[str]] = None) -> str:
"""
获取多平台综合热点报告,自动聚合多个新闻源的热门内容。
Args:
sources: 要扫描的新闻源列表。可选值按类别:
**金融类**: "cls", "wallstreetcn", "xueqiu"
**综合类**: "weibo", "zhihu", "baidu", "toutiao", "douyin", "thepaper"
**科技类**: "36kr", "ithome", "v2ex", "juejin", "hackernews"
Returns:
格式化的 Markdown 热点汇总报告,包含各平台 Top 10 热点标题和链接。
"""
sources = sources or ["weibo", "zhihu", "wallstreetcn"]
all_news = []
for src in sources:
all_news.extend(self.fetch_hot_news(src))
time.sleep(0.2)
if not all_news:
return "❌ 未能获取到热点数据"
report = f"# 实时全网热点汇总 ({datetime.now().strftime('%Y-%m-%d %H:%M')})\n\n"
for src in sources:
src_name = self.SOURCES.get(src, src)
report += f"### 🔥 {src_name}\n"
src_news = [n for n in all_news if n['source'] == src]
for n in src_news[:10]:
report += f"- {n['title']} ([链接]({n['url']}))\n"
report += "\n"
return report
class PolymarketTools:
"""Polymarket 预测市场数据工具 - 获取热门预测市场反映公众情绪和预期"""
BASE_URL = "https://gamma-api.polymarket.com"
def __init__(self, db: DatabaseManager):
self.db = db
self.user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
def get_active_markets(self, limit: int = 20) -> List[Dict]:
"""
获取活跃的预测市场,用于分析公众情绪和预期。
预测市场数据可以反映:
- 公众对重大事件的预期概率
- 市场情绪和风险偏好
- 热门话题的关注度
Args:
limit: 获取的市场数量,默认 20 个。
Returns:
包含预测市场信息的列表,每个市场包含:
- question: 预测问题
- outcomes: 可能的结果
- outcomePrices: 各结果的概率价格
- volume: 交易量
"""
try:
response = requests.get(
f"{self.BASE_URL}/markets",
params={"active": "true", "closed": "false", "limit": limit},
headers={"User-Agent": self.user_agent, "Accept": "application/json"},
timeout=30
)
if response.status_code == 200:
markets = response.json()
result = []
for m in markets:
result.append({
"id": m.get("id"),
"question": m.get("question"),
"slug": m.get("slug"),
"outcomes": m.get("outcomes"),
"outcomePrices": m.get("outcomePrices"),
"volume": m.get("volume"),
"liquidity": m.get("liquidity"),
})
logger.info(f"✅ 获取 {len(result)} 个预测市场")
return result
else:
logger.warning(f"⚠️ Polymarket API 返回 {response.status_code}")
return []
except Timeout:
logger.error("Timeout fetching Polymarket markets")
return []
except RequestException as e:
logger.error(f"Network error fetching Polymarket markets: {e}")
return []
except json.JSONDecodeError:
logger.error("Failed to parse JSON response from Polymarket")
return []
except Exception as e:
logger.error(f"Unexpected error fetching Polymarket markets: {e}")
return []
def get_market_summary(self, limit: int = 10) -> str:
"""
获取预测市场摘要报告,用于了解当前热门话题和公众预期。
Args:
limit: 获取的市场数量
Returns:
格式化的预测市场报告
"""
markets = self.get_active_markets(limit)
if not markets:
return "❌ 无法获取 Polymarket 数据"
report = f"# 🔮 Polymarket 热门预测 ({datetime.now().strftime('%Y-%m-%d %H:%M')})\n\n"
for i, m in enumerate(markets, 1):
question = m.get("question", "Unknown")
prices = m.get("outcomePrices", [])
volume = m.get("volume", 0)
report += f"**{i}. {question}**\n"
if prices:
report += f" 概率: {prices}\n"
if volume:
report += f" 交易量: ${float(volume):,.0f}\n"
report += "\n"
return report