Enhance OASIS simulation capabilities and profile generation

- Updated README.md to include detailed descriptions of new features, including Zep mixed search functionality and detailed persona generation for individual and group entities.
- Implemented a robust mechanism for checking simulation preparation status to avoid redundant profile generation.
- Added support for parallel profile generation, improving efficiency in creating OASIS Agent Profiles.
- Enhanced the simulation configuration generator to adopt a stepwise approach, ensuring better handling of complex configurations.
- Introduced error handling and retry mechanisms for LLM calls, improving the reliability of profile generation.
- Updated simulation management to support new API parameters for controlling profile generation behavior.
This commit is contained in:
666ghj
2025-12-01 19:40:07 +08:00
parent 5f159f6d88
commit af5c235695
5 changed files with 1602 additions and 408 deletions

View File

@@ -213,6 +213,112 @@ def create_simulation():
}), 500
def _check_simulation_prepared(simulation_id: str) -> tuple:
"""
检查模拟是否已经准备完成
检查条件:
1. state.json 存在且 status 为 "ready"
2. 必要文件存在reddit_profiles.json, twitter_profiles.csv, simulation_config.json
Args:
simulation_id: 模拟ID
Returns:
(is_prepared: bool, info: dict)
"""
import os
from ..config import Config
simulation_dir = os.path.join(Config.OASIS_SIMULATION_DATA_DIR, simulation_id)
# 检查目录是否存在
if not os.path.exists(simulation_dir):
return False, {"reason": "模拟目录不存在"}
# 必要文件列表
required_files = [
"state.json",
"simulation_config.json",
"reddit_profiles.json",
"twitter_profiles.csv",
"run_reddit_simulation.py",
"run_twitter_simulation.py",
"run_parallel_simulation.py"
]
# 检查文件是否存在
existing_files = []
missing_files = []
for f in required_files:
file_path = os.path.join(simulation_dir, f)
if os.path.exists(file_path):
existing_files.append(f)
else:
missing_files.append(f)
if missing_files:
return False, {
"reason": "缺少必要文件",
"missing_files": missing_files,
"existing_files": existing_files
}
# 检查state.json中的状态
state_file = os.path.join(simulation_dir, "state.json")
try:
import json
with open(state_file, 'r', encoding='utf-8') as f:
state_data = json.load(f)
status = state_data.get("status", "")
# 如果状态是ready或preparing已有文件认为准备完成
if status in ["ready", "preparing"] and state_data.get("config_generated"):
# 获取文件统计信息
profiles_file = os.path.join(simulation_dir, "reddit_profiles.json")
config_file = os.path.join(simulation_dir, "simulation_config.json")
profiles_count = 0
if os.path.exists(profiles_file):
with open(profiles_file, 'r', encoding='utf-8') as f:
profiles_data = json.load(f)
profiles_count = len(profiles_data) if isinstance(profiles_data, list) else 0
# 如果状态是preparing但文件已完成自动更新状态为ready
if status == "preparing":
try:
state_data["status"] = "ready"
from datetime import datetime
state_data["updated_at"] = datetime.now().isoformat()
with open(state_file, 'w', encoding='utf-8') as f:
json.dump(state_data, f, ensure_ascii=False, indent=2)
logger.info(f"自动更新模拟状态: {simulation_id} preparing -> ready")
status = "ready"
except Exception as e:
logger.warning(f"自动更新状态失败: {e}")
return True, {
"status": status,
"entities_count": state_data.get("entities_count", 0),
"profiles_count": profiles_count,
"entity_types": state_data.get("entity_types", []),
"config_generated": state_data.get("config_generated", False),
"created_at": state_data.get("created_at"),
"updated_at": state_data.get("updated_at"),
"existing_files": existing_files
}
else:
return False, {
"reason": f"状态不是ready: {status}",
"status": status,
"config_generated": state_data.get("config_generated", False)
}
except Exception as e:
return False, {"reason": f"读取状态文件失败: {str(e)}"}
@simulation_bp.route('/prepare', methods=['POST'])
def prepare_simulation():
"""
@@ -221,17 +327,25 @@ def prepare_simulation():
这是一个耗时操作接口会立即返回task_id
使用 GET /api/simulation/prepare/status 查询进度
特性:
- 自动检测已完成的准备工作,避免重复生成
- 如果已准备完成,直接返回已有结果
- 支持强制重新生成force_regenerate=true
步骤:
1. 从Zep图谱读取并过滤实体
2. 为每个实体生成OASIS Agent Profile带重试机制
3. LLM智能生成模拟配置(带重试机制)
4. 保存配置文件和预设脚本
1. 检查是否已有完成的准备工作
2. 从Zep图谱读取并过滤实体
3. 为每个实体生成OASIS Agent Profile(带重试机制)
4. LLM智能生成模拟配置带重试机制
5. 保存配置文件和预设脚本
请求JSON
{
"simulation_id": "sim_xxxx", // 必填模拟ID
"entity_types": ["Student", "PublicFigure"], // 可选,指定实体类型
"use_llm_for_profiles": true // 可选是否用LLM生成人设
"use_llm_for_profiles": true, // 可选是否用LLM生成人设
"parallel_profile_count": 5, // 可选并行生成人设数量默认5
"force_regenerate": false // 可选强制重新生成默认false
}
返回:
@@ -239,14 +353,17 @@ def prepare_simulation():
"success": true,
"data": {
"simulation_id": "sim_xxxx",
"task_id": "task_xxxx",
"status": "preparing",
"message": "准备任务已启动"
"task_id": "task_xxxx", // 新任务时返回
"status": "preparing|ready",
"message": "准备任务已启动|已有完成的准备工作",
"already_prepared": true|false // 是否已准备完成
}
}
"""
import threading
import os
from ..models.task import TaskManager, TaskStatus
from ..config import Config
try:
data = request.get_json() or {}
@@ -267,6 +384,25 @@ def prepare_simulation():
"error": f"模拟不存在: {simulation_id}"
}), 404
# 检查是否强制重新生成
force_regenerate = data.get('force_regenerate', False)
# 检查是否已经准备完成(避免重复生成)
if not force_regenerate:
is_prepared, prepare_info = _check_simulation_prepared(simulation_id)
if is_prepared:
logger.info(f"模拟 {simulation_id} 已准备完成,跳过重复生成")
return jsonify({
"success": True,
"data": {
"simulation_id": simulation_id,
"status": "ready",
"message": "已有完成的准备工作,无需重复生成",
"already_prepared": True,
"prepare_info": prepare_info
}
})
# 从项目获取必要信息
project = ProjectManager.get_project(state.project_id)
if not project:
@@ -288,6 +424,7 @@ def prepare_simulation():
entity_types_list = data.get('entity_types')
use_llm_for_profiles = data.get('use_llm_for_profiles', True)
parallel_profile_count = data.get('parallel_profile_count', 5)
# 创建异步任务
task_manager = TaskManager()
@@ -384,7 +521,8 @@ def prepare_simulation():
document_text=document_text,
defined_entity_types=entity_types_list,
use_llm_for_profiles=use_llm_for_profiles,
progress_callback=progress_callback
progress_callback=progress_callback,
parallel_profile_count=parallel_profile_count
)
# 任务完成
@@ -414,7 +552,8 @@ def prepare_simulation():
"simulation_id": simulation_id,
"task_id": task_id,
"status": "preparing",
"message": "准备任务已启动,请通过 /api/simulation/prepare/status 查询进度"
"message": "准备任务已启动,请通过 /api/simulation/prepare/status 查询进度",
"already_prepared": False
}
})
@@ -438,9 +577,14 @@ def get_prepare_status():
"""
查询准备任务进度
支持两种查询方式:
1. 通过task_id查询正在进行的任务进度
2. 通过simulation_id检查是否已有完成的准备工作
请求JSON
{
"task_id": "task_xxxx" // 必填prepare返回的task_id
"task_id": "task_xxxx", // 可选prepare返回的task_id
"simulation_id": "sim_xxxx" // 可选模拟ID用于检查已完成的准备
}
返回:
@@ -448,21 +592,11 @@ def get_prepare_status():
"success": true,
"data": {
"task_id": "task_xxxx",
"status": "processing", // pending/processing/completed/failed
"progress": 45, // 0-100 总进度
"message": "[2/4] 生成Agent人设: 35/93 - 生成 教授张三 的人设...",
"progress_detail": { // 详细进度信息
"current_stage": "generating_profiles",
"current_stage_name": "生成Agent人设",
"stage_index": 2, // 当前阶段序号
"total_stages": 4, // 总阶段数
"stage_progress": 38, // 阶段内进度 0-100
"current_item": 35, // 当前处理项目序号
"total_items": 93, // 当前阶段总项目数
"item_description": "生成 教授张三 的人设..."
},
"result": null, // 完成后返回结果
"error": null // 失败时返回错误信息
"status": "processing|completed|ready",
"progress": 45,
"message": "...",
"already_prepared": true|false, // 是否已有完成的准备
"prepare_info": {...} // 已准备完成时的详细信息
}
}
"""
@@ -472,24 +606,75 @@ def get_prepare_status():
data = request.get_json() or {}
task_id = data.get('task_id')
simulation_id = data.get('simulation_id')
# 如果提供了simulation_id先检查是否已准备完成
if simulation_id:
is_prepared, prepare_info = _check_simulation_prepared(simulation_id)
if is_prepared:
return jsonify({
"success": True,
"data": {
"simulation_id": simulation_id,
"status": "ready",
"progress": 100,
"message": "已有完成的准备工作",
"already_prepared": True,
"prepare_info": prepare_info
}
})
# 如果没有task_id返回错误
if not task_id:
if simulation_id:
# 有simulation_id但未准备完成
return jsonify({
"success": True,
"data": {
"simulation_id": simulation_id,
"status": "not_started",
"progress": 0,
"message": "尚未开始准备,请调用 /api/simulation/prepare 开始",
"already_prepared": False
}
})
return jsonify({
"success": False,
"error": "请提供 task_id"
"error": "请提供 task_id 或 simulation_id"
}), 400
task_manager = TaskManager()
task = task_manager.get_task(task_id)
if not task:
# 任务不存在但如果有simulation_id检查是否已准备完成
if simulation_id:
is_prepared, prepare_info = _check_simulation_prepared(simulation_id)
if is_prepared:
return jsonify({
"success": True,
"data": {
"simulation_id": simulation_id,
"task_id": task_id,
"status": "ready",
"progress": 100,
"message": "任务已完成(准备工作已存在)",
"already_prepared": True,
"prepare_info": prepare_info
}
})
return jsonify({
"success": False,
"error": f"任务不存在: {task_id}"
}), 404
task_dict = task.to_dict()
task_dict["already_prepared"] = False
return jsonify({
"success": True,
"data": task.to_dict()
"data": task_dict
})
except Exception as e: