Import 9 alphaear finance skills
- alphaear-deepear-lite: DeepEar Lite API integration - alphaear-logic-visualizer: Draw.io XML finance diagrams - alphaear-news: Real-time finance news (10+ sources) - alphaear-predictor: Kronos time-series forecasting - alphaear-reporter: Professional financial reports - alphaear-search: Web search + local RAG - alphaear-sentiment: FinBERT/LLM sentiment analysis - alphaear-signal-tracker: Signal evolution tracking - alphaear-stock: A-Share/HK/US stock data Updates: - All scripts updated to use universal .env path - Added JINA_API_KEY, LLM_*, DEEPSEEK_API_KEY to .env.example - Updated load_dotenv() to use ~/.config/opencode/.env
This commit is contained in:
180
skills/alphaear-predictor/scripts/json_utils.py
Normal file
180
skills/alphaear-predictor/scripts/json_utils.py
Normal file
@@ -0,0 +1,180 @@
|
||||
import ast
|
||||
import json
|
||||
import re
|
||||
from typing import Optional, Any
|
||||
from loguru import logger
|
||||
|
||||
def _strip_comments(text: str) -> str:
|
||||
"""
|
||||
Safely remove C-style comments (// and /* */) from JSON-like text,
|
||||
preserving strings (including URLs like http://).
|
||||
"""
|
||||
result = []
|
||||
i = 0
|
||||
n = len(text)
|
||||
in_string = False
|
||||
escape = False
|
||||
|
||||
while i < n:
|
||||
char = text[i]
|
||||
|
||||
if in_string:
|
||||
if char == '\\':
|
||||
escape = not escape
|
||||
elif char == '"' and not escape:
|
||||
in_string = False
|
||||
else:
|
||||
escape = False
|
||||
result.append(char)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Not in string
|
||||
if char == '"':
|
||||
in_string = True
|
||||
result.append(char)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check for // comment
|
||||
if i + 1 < n and text[i:i+2] == '//':
|
||||
i += 2
|
||||
while i < n and text[i] != '\n':
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check for /* comment
|
||||
if i + 1 < n and text[i:i+2] == '/*':
|
||||
i += 2
|
||||
while i + 1 < n and text[i:i+2] != '*/':
|
||||
i += 1
|
||||
i += 2
|
||||
continue
|
||||
|
||||
result.append(char)
|
||||
i += 1
|
||||
|
||||
return ''.join(result)
|
||||
|
||||
def extract_json(text: str) -> Optional[Any]:
|
||||
"""
|
||||
更加鲁棒的 JSON 提取工具。
|
||||
处理:
|
||||
1. Markdown 代码块 (```json ... ```)
|
||||
2. 首尾多余字符
|
||||
3. 同一个文本中多个 JSON 对象 (仅提取第一个)
|
||||
4. 简单的 JSON 修复 (末尾逗号等)
|
||||
5. C 风格注释 (// 和 /* */)
|
||||
"""
|
||||
if not text:
|
||||
return None
|
||||
|
||||
# 1. 清理明显的 Markdown 包装
|
||||
text = text.strip()
|
||||
|
||||
# 先尝试精确匹配 ```json ... ``` 或 ```...```
|
||||
md_match = re.search(r'```(?:json)?\s*\n?(.*?)\n?```', text, re.DOTALL)
|
||||
if md_match:
|
||||
text = md_match.group(1).strip()
|
||||
elif text.startswith("```"):
|
||||
# 回退:如果开头有 ``` 但没完整匹配
|
||||
text = re.sub(r'^```[a-z]*\n?', '', text)
|
||||
text = re.sub(r'\n?```\s*$', '', text)
|
||||
|
||||
# 2. 寻找第一个 JSON 起始符 { 或 [
|
||||
start_brace = text.find('{')
|
||||
start_bracket = text.find('[')
|
||||
|
||||
if start_brace == -1 and start_bracket == -1:
|
||||
return None
|
||||
|
||||
start_idx = start_brace if (start_bracket == -1 or (start_brace != -1 and start_brace < start_bracket)) else start_bracket
|
||||
|
||||
# 2.5 预处理:修复一些极其常见的 LLM 错误
|
||||
potential_json = text[start_idx:].strip()
|
||||
|
||||
# remove comments safely
|
||||
potential_json = _strip_comments(potential_json)
|
||||
|
||||
# b. 修复缺失开头引号的键: nodes": [ -> "nodes": [
|
||||
# 匹配模式: (空白或换行) 单词 紧跟引号和冒号
|
||||
potential_json = re.sub(r'([\{\,]\s*)([a-zA-Z_]\w*)\"\s*:', r'\1"\2":', potential_json)
|
||||
|
||||
# c. 修复缺失末尾引号的键: "nodes: [ -> "nodes": [
|
||||
potential_json = re.sub(r'([\{\,]\s*)\"([a-zA-Z_]\w*)\s*:', r'\1"\2":', potential_json)
|
||||
|
||||
# d. 修复完全缺失引号的键: nodes: [ -> "nodes": [
|
||||
# 注意避免匹配到像 http:// 这种内容,所以限定在 { 或 , 之后
|
||||
potential_json = re.sub(r'([\{\,]\s*)([a-zA-Z_]\w*)\s*:', r'\1"\2":', potential_json)
|
||||
|
||||
# 3. 使用 raw_decode 尝试解析
|
||||
decoder = json.JSONDecoder()
|
||||
|
||||
# 首先尝试直接解析(不做任何预处理)
|
||||
try:
|
||||
obj = json.loads(potential_json)
|
||||
return obj
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 简单预处理:移除对象/列表末位多余逗号
|
||||
processed_json = re.sub(r',\s*([\]}])', r'\1', potential_json)
|
||||
|
||||
try:
|
||||
obj, end_pos = decoder.raw_decode(processed_json)
|
||||
return obj
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# e. 修复未终止的字符串字面量问题:移除值中的实际换行符
|
||||
# LLM 可能在字符串值中生成包含真实 newline 的内容,导致 JSON 非法
|
||||
def fix_multiline_strings(s):
|
||||
# 简单策略:将字符串值内的换行替换为空格
|
||||
lines = s.split('\n')
|
||||
result = []
|
||||
in_string = False
|
||||
for line in lines:
|
||||
# 计算未转义的引号数
|
||||
quote_count = line.count('"') - line.count('\\"')
|
||||
if in_string:
|
||||
result[-1] += ' ' + line.strip()
|
||||
else:
|
||||
result.append(line)
|
||||
|
||||
if quote_count % 2 == 1:
|
||||
in_string = not in_string
|
||||
return '\n'.join(result)
|
||||
|
||||
fixed_json = fix_multiline_strings(processed_json)
|
||||
|
||||
try:
|
||||
obj, end_pos = decoder.raw_decode(fixed_json)
|
||||
return obj
|
||||
except json.JSONDecodeError:
|
||||
try:
|
||||
# 4. 尝试处理单引号问题 (JSON 规范要求双引号,但 LLM 常输出单引号)
|
||||
# 这是一个简单的替换技巧,仅针对像 {'key': 'value'} 这样的结构
|
||||
# 注意:这可能会破坏包含单引号的字符串值,所以作为较后的回退
|
||||
fix_quotes = re.sub(r"'(.*?)':", r'"\1":', processed_json) # 修复键
|
||||
fix_quotes = re.sub(r":\s*'(.*?)'", r': "\1"', fix_quotes) # 修复简单值
|
||||
obj, end_pos = decoder.raw_decode(fix_quotes)
|
||||
return obj
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
try:
|
||||
# 5. 使用 ast.literal_eval 作为终极回退 (处理 Python 字典格式)
|
||||
# 提取第一个匹配的括号对内容
|
||||
# 寻找匹配的 { }
|
||||
stack = []
|
||||
for i, char in enumerate(potential_json):
|
||||
if char == '{': stack.append('{')
|
||||
elif char == '}':
|
||||
if stack: stack.pop()
|
||||
if not stack:
|
||||
content = potential_json[:i+1]
|
||||
return ast.literal_eval(content)
|
||||
except (ValueError, SyntaxError, MemoryError) as e:
|
||||
logger.warning(f"All JSON extraction attempts failed: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during JSON extraction: {e}")
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user