import requests from requests.exceptions import RequestException, Timeout import json import time from datetime import datetime from typing import List, Dict, Optional from loguru import logger from .database_manager import DatabaseManager from .content_extractor import ContentExtractor class NewsNowTools: """热点新闻获取工具 - 接入 NewsNow API 与 Jina 内容提取""" BASE_URL = "https://newsnow.busiyi.world" SOURCES = { # 金融类 "cls": "财联社", "wallstreetcn": "华尔街见闻", "xueqiu": "雪球热榜", # 综合/社交 "weibo": "微博热搜", "zhihu": "知乎热榜", "baidu": "百度热搜", "toutiao": "今日头条", "douyin": "抖音热榜", "thepaper": "澎湃新闻", # 科技类 "36kr": "36氪", "ithome": "IT之家", "v2ex": "V2EX", "juejin": "掘金", "hackernews": "Hacker News", } def __init__(self, db: DatabaseManager): self.db = db self.user_agent = ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" ) self.extractor = ContentExtractor() # Simple in-memory cache: source_id -> {"time": timestamp, "data": []} self._cache = {} def fetch_hot_news(self, source_id: str, count: int = 15, fetch_content: bool = False) -> List[Dict]: """ 从指定新闻源获取热点新闻列表(支持5分钟缓存)。 """ # 1. Check cache validity (5 minutes) cache_key = f"{source_id}_{count}" cached = self._cache.get(cache_key) now = time.time() if cached and (now - cached["time"] < 300): logger.info(f"⚡ Using cached news for {source_id} (Age: {int(now - cached['time'])}s)") return cached["data"] try: url = f"{self.BASE_URL}/api/s?id={source_id}" response = requests.get(url, headers={"User-Agent": self.user_agent}, timeout=30) if response.status_code == 200: data = response.json() items = data.get("items", [])[:count] processed_items = [] for i, item in enumerate(items, 1): item_url = item.get("url", "") content = "" if fetch_content and item_url: content = self.extractor.extract_with_jina(item_url) or "" processed_items.append({ "id": item.get("id") or f"{source_id}_{int(time.time())}_{i}", "source": source_id, "rank": i, "title": item.get("title", ""), "url": item_url, "content": content, "publish_time": item.get("publish_time"), "meta_data": item.get("extra", {}) }) # Update Cache self._cache[cache_key] = {"time": now, "data": processed_items} logger.info(f"✅ Fetched and cached news for {source_id}") self.db.save_daily_news(processed_items) return processed_items else: logger.error(f"NewsNow API Error: {response.status_code}") # Fallback to stale cache if available if cached: logger.warning(f"⚠️ API failed, using stale cache for {source_id}") return cached["data"] return [] except Timeout: logger.error(f"Timeout fetching hot news from {source_id}") if cached: logger.warning(f"⚠️ Timeout, using stale cache for {source_id}") return cached["data"] return [] except RequestException as e: logger.error(f"Network error fetching hot news from {source_id}: {e}") if cached: logger.warning(f"⚠️ Network check failed, using stale cache for {source_id}") return cached["data"] return [] except json.JSONDecodeError: logger.error(f"Failed to parse JSON response from NewsNow for {source_id}") return [] except Exception as e: logger.error(f"Unexpected error fetching hot news from {source_id}: {e}") return [] def fetch_news_content(self, url: str) -> Optional[str]: """ 使用 Jina Reader 抓取指定 URL 的网页正文内容。 Args: url: 需要抓取内容的完整网页 URL,必须以 http:// 或 https:// 开头。 Returns: 提取的网页正文内容 (Markdown 格式),如果失败则返回 None。 """ return self.extractor.extract_with_jina(url) def get_unified_trends(self, sources: Optional[List[str]] = None) -> str: """ 获取多平台综合热点报告,自动聚合多个新闻源的热门内容。 Args: sources: 要扫描的新闻源列表。可选值按类别: **金融类**: "cls", "wallstreetcn", "xueqiu" **综合类**: "weibo", "zhihu", "baidu", "toutiao", "douyin", "thepaper" **科技类**: "36kr", "ithome", "v2ex", "juejin", "hackernews" Returns: 格式化的 Markdown 热点汇总报告,包含各平台 Top 10 热点标题和链接。 """ sources = sources or ["weibo", "zhihu", "wallstreetcn"] all_news = [] for src in sources: all_news.extend(self.fetch_hot_news(src)) time.sleep(0.2) if not all_news: return "❌ 未能获取到热点数据" report = f"# 实时全网热点汇总 ({datetime.now().strftime('%Y-%m-%d %H:%M')})\n\n" for src in sources: src_name = self.SOURCES.get(src, src) report += f"### 🔥 {src_name}\n" src_news = [n for n in all_news if n['source'] == src] for n in src_news[:10]: report += f"- {n['title']} ([链接]({n['url']}))\n" report += "\n" return report class PolymarketTools: """Polymarket 预测市场数据工具 - 获取热门预测市场反映公众情绪和预期""" BASE_URL = "https://gamma-api.polymarket.com" def __init__(self, db: DatabaseManager): self.db = db self.user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" def get_active_markets(self, limit: int = 20) -> List[Dict]: """ 获取活跃的预测市场,用于分析公众情绪和预期。 预测市场数据可以反映: - 公众对重大事件的预期概率 - 市场情绪和风险偏好 - 热门话题的关注度 Args: limit: 获取的市场数量,默认 20 个。 Returns: 包含预测市场信息的列表,每个市场包含: - question: 预测问题 - outcomes: 可能的结果 - outcomePrices: 各结果的概率价格 - volume: 交易量 """ try: response = requests.get( f"{self.BASE_URL}/markets", params={"active": "true", "closed": "false", "limit": limit}, headers={"User-Agent": self.user_agent, "Accept": "application/json"}, timeout=30 ) if response.status_code == 200: markets = response.json() result = [] for m in markets: result.append({ "id": m.get("id"), "question": m.get("question"), "slug": m.get("slug"), "outcomes": m.get("outcomes"), "outcomePrices": m.get("outcomePrices"), "volume": m.get("volume"), "liquidity": m.get("liquidity"), }) logger.info(f"✅ 获取 {len(result)} 个预测市场") return result else: logger.warning(f"⚠️ Polymarket API 返回 {response.status_code}") return [] except Timeout: logger.error("Timeout fetching Polymarket markets") return [] except RequestException as e: logger.error(f"Network error fetching Polymarket markets: {e}") return [] except json.JSONDecodeError: logger.error("Failed to parse JSON response from Polymarket") return [] except Exception as e: logger.error(f"Unexpected error fetching Polymarket markets: {e}") return [] def get_market_summary(self, limit: int = 10) -> str: """ 获取预测市场摘要报告,用于了解当前热门话题和公众预期。 Args: limit: 获取的市场数量 Returns: 格式化的预测市场报告 """ markets = self.get_active_markets(limit) if not markets: return "❌ 无法获取 Polymarket 数据" report = f"# 🔮 Polymarket 热门预测 ({datetime.now().strftime('%Y-%m-%d %H:%M')})\n\n" for i, m in enumerate(markets, 1): question = m.get("question", "Unknown") prices = m.get("outcomePrices", []) volume = m.get("volume", 0) report += f"**{i}. {question}**\n" if prices: report += f" 概率: {prices}\n" if volume: report += f" 交易量: ${float(volume):,.0f}\n" report += "\n" return report