Enhance OASIS simulation capabilities and profile generation

- Updated README.md to include detailed descriptions of new features, including Zep mixed search functionality and detailed persona generation for individual and group entities.
- Implemented a robust mechanism for checking simulation preparation status to avoid redundant profile generation.
- Added support for parallel profile generation, improving efficiency in creating OASIS Agent Profiles.
- Enhanced the simulation configuration generator to adopt a stepwise approach, ensuring better handling of complex configurations.
- Introduced error handling and retry mechanisms for LLM calls, improving the reliability of profile generation.
- Updated simulation management to support new API parameters for controlling profile generation behavior.
This commit is contained in:
666ghj
2025-12-01 19:40:07 +08:00
parent 5f159f6d88
commit af5c235695
5 changed files with 1602 additions and 408 deletions

View File

@@ -2,10 +2,17 @@
模拟配置智能生成器
使用LLM根据模拟需求、文档内容、图谱信息自动生成细致的模拟参数
实现全程自动化,无需人工设置参数
采用分步生成策略,避免一次性生成过长内容导致失败:
1. 生成时间配置
2. 生成事件配置
3. 分批生成Agent配置
4. 生成平台配置
"""
import json
from typing import Dict, Any, List, Optional
import math
from typing import Dict, Any, List, Optional, Callable
from dataclasses import dataclass, field, asdict
from datetime import datetime
@@ -17,6 +24,28 @@ from .zep_entity_reader import EntityNode, ZepEntityReader
logger = get_logger('mirofish.simulation_config')
# 中国作息时间配置(北京时间)
CHINA_TIMEZONE_CONFIG = {
# 深夜时段(几乎无人活动)
"dead_hours": [0, 1, 2, 3, 4, 5],
# 早间时段(逐渐醒来)
"morning_hours": [6, 7, 8],
# 工作时段
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
# 晚间高峰(最活跃)
"peak_hours": [19, 20, 21, 22],
# 夜间时段(活跃度下降)
"night_hours": [23],
# 活跃度系数
"activity_multipliers": {
"dead": 0.05, # 凌晨几乎无人
"morning": 0.4, # 早间逐渐活跃
"work": 0.7, # 工作时段中等
"peak": 1.5, # 晚间高峰
"night": 0.5 # 深夜下降
}
}
@dataclass
class AgentActivityConfig:
@@ -52,7 +81,7 @@ class AgentActivityConfig:
@dataclass
class TimeSimulationConfig:
"""时间模拟配置"""
"""时间模拟配置(基于中国人作息习惯)"""
# 模拟总时长(模拟小时数)
total_simulation_hours: int = 72 # 默认模拟72小时3天
@@ -63,13 +92,21 @@ class TimeSimulationConfig:
agents_per_hour_min: int = 5
agents_per_hour_max: int = 20
# 高峰时段(活跃度提升
peak_hours: List[int] = field(default_factory=lambda: [9, 10, 11, 14, 15, 20, 21, 22])
# 高峰时段(晚间19-22点中国人最活跃的时间
peak_hours: List[int] = field(default_factory=lambda: [19, 20, 21, 22])
peak_activity_multiplier: float = 1.5
# 低谷时段(活跃度降低
off_peak_hours: List[int] = field(default_factory=lambda: [0, 1, 2, 3, 4, 5, 6])
off_peak_activity_multiplier: float = 0.3
# 低谷时段(凌晨0-5点几乎无人活动
off_peak_hours: List[int] = field(default_factory=lambda: [0, 1, 2, 3, 4, 5])
off_peak_activity_multiplier: float = 0.05 # 凌晨活跃度极低
# 早间时段
morning_hours: List[int] = field(default_factory=lambda: [6, 7, 8])
morning_activity_multiplier: float = 0.4
# 工作时段
work_hours: List[int] = field(default_factory=lambda: [9, 10, 11, 12, 13, 14, 15, 16, 17, 18])
work_activity_multiplier: float = 0.7
@dataclass
@@ -137,12 +174,13 @@ class SimulationParameters:
def to_dict(self) -> Dict[str, Any]:
"""转换为字典"""
time_dict = asdict(self.time_config)
return {
"simulation_id": self.simulation_id,
"project_id": self.project_id,
"graph_id": self.graph_id,
"simulation_requirement": self.simulation_requirement,
"time_config": asdict(self.time_config),
"time_config": time_dict,
"agent_configs": [asdict(a) for a in self.agent_configs],
"event_config": asdict(self.event_config),
"twitter_config": asdict(self.twitter_config) if self.twitter_config else None,
@@ -164,10 +202,17 @@ class SimulationConfigGenerator:
使用LLM分析模拟需求、文档内容、图谱实体信息
自动生成最佳的模拟参数配置
采用分步生成策略:
1. 生成时间配置和事件配置(轻量级)
2. 分批生成Agent配置每批10-15个
3. 生成平台配置
"""
# 上下文最大字符数
MAX_CONTEXT_LENGTH = 50000
# 每批生成的Agent数量
AGENTS_PER_BATCH = 15
def __init__(
self,
@@ -197,9 +242,10 @@ class SimulationConfigGenerator:
entities: List[EntityNode],
enable_twitter: bool = True,
enable_reddit: bool = True,
progress_callback: Optional[Callable[[int, int, str], None]] = None,
) -> SimulationParameters:
"""
智能生成完整的模拟配置
智能生成完整的模拟配置(分步生成)
Args:
simulation_id: 模拟ID
@@ -210,37 +256,107 @@ class SimulationConfigGenerator:
entities: 过滤后的实体列表
enable_twitter: 是否启用Twitter
enable_reddit: 是否启用Reddit
progress_callback: 进度回调函数(current_step, total_steps, message)
Returns:
SimulationParameters: 完整的模拟参数
"""
logger.info(f"开始智能生成模拟配置: simulation_id={simulation_id}")
logger.info(f"开始智能生成模拟配置: simulation_id={simulation_id}, 实体数={len(entities)}")
# 1. 构建上下文信息截断到50000字符
# 计算总步骤数
num_batches = math.ceil(len(entities) / self.AGENTS_PER_BATCH)
total_steps = 3 + num_batches # 时间配置 + 事件配置 + N批Agent + 平台配置
current_step = 0
def report_progress(step: int, message: str):
nonlocal current_step
current_step = step
if progress_callback:
progress_callback(step, total_steps, message)
logger.info(f"[{step}/{total_steps}] {message}")
# 1. 构建基础上下文信息
context = self._build_context(
simulation_requirement=simulation_requirement,
document_text=document_text,
entities=entities
)
# 2. 调用LLM生成配置
llm_result = self._generate_config_with_llm(
context=context,
entities=entities,
enable_twitter=enable_twitter,
enable_reddit=enable_reddit
)
reasoning_parts = []
# 3. 构建SimulationParameters对象
params = self._build_parameters(
# ========== 步骤1: 生成时间配置 ==========
report_progress(1, "生成时间配置...")
time_config_result = self._generate_time_config(context, len(entities))
time_config = self._parse_time_config(time_config_result)
reasoning_parts.append(f"时间配置: {time_config_result.get('reasoning', '成功')}")
# ========== 步骤2: 生成事件配置 ==========
report_progress(2, "生成事件配置和热点话题...")
event_config_result = self._generate_event_config(context, simulation_requirement)
event_config = self._parse_event_config(event_config_result)
reasoning_parts.append(f"事件配置: {event_config_result.get('reasoning', '成功')}")
# ========== 步骤3-N: 分批生成Agent配置 ==========
all_agent_configs = []
for batch_idx in range(num_batches):
start_idx = batch_idx * self.AGENTS_PER_BATCH
end_idx = min(start_idx + self.AGENTS_PER_BATCH, len(entities))
batch_entities = entities[start_idx:end_idx]
report_progress(
3 + batch_idx,
f"生成Agent配置 ({start_idx + 1}-{end_idx}/{len(entities)})..."
)
batch_configs = self._generate_agent_configs_batch(
context=context,
entities=batch_entities,
start_idx=start_idx,
simulation_requirement=simulation_requirement
)
all_agent_configs.extend(batch_configs)
reasoning_parts.append(f"Agent配置: 成功生成 {len(all_agent_configs)}")
# ========== 最后一步: 生成平台配置 ==========
report_progress(total_steps, "生成平台配置...")
twitter_config = None
reddit_config = None
if enable_twitter:
twitter_config = PlatformConfig(
platform="twitter",
recency_weight=0.4,
popularity_weight=0.3,
relevance_weight=0.3,
viral_threshold=10,
echo_chamber_strength=0.5
)
if enable_reddit:
reddit_config = PlatformConfig(
platform="reddit",
recency_weight=0.3,
popularity_weight=0.4,
relevance_weight=0.3,
viral_threshold=15,
echo_chamber_strength=0.6
)
# 构建最终参数
params = SimulationParameters(
simulation_id=simulation_id,
project_id=project_id,
graph_id=graph_id,
simulation_requirement=simulation_requirement,
entities=entities,
llm_result=llm_result,
enable_twitter=enable_twitter,
enable_reddit=enable_reddit
time_config=time_config,
agent_configs=all_agent_configs,
event_config=event_config,
twitter_config=twitter_config,
reddit_config=reddit_config,
llm_model=self.model_name,
llm_base_url=self.base_url,
generation_reasoning=" | ".join(reasoning_parts)
)
logger.info(f"模拟配置生成完成: {len(params.agent_configs)} 个Agent配置")
@@ -297,288 +413,397 @@ class SimulationConfigGenerator:
return "\n".join(lines)
def _generate_config_with_llm(
def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any]:
"""带重试的LLM调用包含JSON修复逻辑"""
import re
max_attempts = 3
last_error = None
for attempt in range(max_attempts):
try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
# 不设置max_tokens让LLM自由发挥
)
content = response.choices[0].message.content
finish_reason = response.choices[0].finish_reason
# 检查是否被截断
if finish_reason == 'length':
logger.warning(f"LLM输出被截断 (attempt {attempt+1})")
content = self._fix_truncated_json(content)
# 尝试解析JSON
try:
return json.loads(content)
except json.JSONDecodeError as e:
logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(e)[:80]}")
# 尝试修复JSON
fixed = self._try_fix_config_json(content)
if fixed:
return fixed
last_error = e
except Exception as e:
logger.warning(f"LLM调用失败 (attempt {attempt+1}): {str(e)[:80]}")
last_error = e
import time
time.sleep(2 * (attempt + 1))
raise last_error or Exception("LLM调用失败")
def _fix_truncated_json(self, content: str) -> str:
"""修复被截断的JSON"""
content = content.strip()
# 计算未闭合的括号
open_braces = content.count('{') - content.count('}')
open_brackets = content.count('[') - content.count(']')
# 检查是否有未闭合的字符串
if content and content[-1] not in '",}]':
content += '"'
# 闭合括号
content += ']' * open_brackets
content += '}' * open_braces
return content
def _try_fix_config_json(self, content: str) -> Optional[Dict[str, Any]]:
"""尝试修复配置JSON"""
import re
# 修复被截断的情况
content = self._fix_truncated_json(content)
# 提取JSON部分
json_match = re.search(r'\{[\s\S]*\}', content)
if json_match:
json_str = json_match.group()
# 移除字符串中的换行符
def fix_string(match):
s = match.group(0)
s = s.replace('\n', ' ').replace('\r', ' ')
s = re.sub(r'\s+', ' ', s)
return s
json_str = re.sub(r'"[^"\\]*(?:\\.[^"\\]*)*"', fix_string, json_str)
try:
return json.loads(json_str)
except:
# 尝试移除所有控制字符
json_str = re.sub(r'[\x00-\x1f\x7f-\x9f]', ' ', json_str)
json_str = re.sub(r'\s+', ' ', json_str)
try:
return json.loads(json_str)
except:
pass
return None
def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, Any]:
"""生成时间配置"""
prompt = f"""基于以下模拟需求,生成时间模拟配置。
{context[:5000]}
## 任务
请生成时间配置JSON注意
- 用户群体为中国人,需符合北京时间作息习惯
- 凌晨0-5点几乎无人活动活跃度系数0.05
- 早上6-8点逐渐活跃活跃度系数0.4
- 工作时间9-18点中等活跃活跃度系数0.7
- 晚间19-22点是高峰期活跃度系数1.5
- 23点后活跃度下降活跃度系数0.5
当前实体数量: {num_entities}
返回JSON格式不要markdown
{{
"total_simulation_hours": <72-168根据事件性质决定>,
"minutes_per_round": <15-60>,
"agents_per_hour_min": <每小时最少激活Agent数>,
"agents_per_hour_max": <每小时最多激活Agent数>,
"peak_hours": [19, 20, 21, 22],
"off_peak_hours": [0, 1, 2, 3, 4, 5],
"morning_hours": [6, 7, 8],
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
"reasoning": "<简要说明>"
}}"""
system_prompt = "你是社交媒体模拟专家。返回纯JSON格式时间配置需符合中国人作息习惯。"
try:
return self._call_llm_with_retry(prompt, system_prompt)
except Exception as e:
logger.warning(f"时间配置LLM生成失败: {e}, 使用默认配置")
return self._get_default_time_config(num_entities)
def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]:
"""获取默认时间配置(中国人作息)"""
return {
"total_simulation_hours": 72,
"minutes_per_round": 30,
"agents_per_hour_min": max(1, num_entities // 15),
"agents_per_hour_max": max(5, num_entities // 5),
"peak_hours": [19, 20, 21, 22],
"off_peak_hours": [0, 1, 2, 3, 4, 5],
"morning_hours": [6, 7, 8],
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
"reasoning": "使用默认中国人作息配置"
}
def _parse_time_config(self, result: Dict[str, Any]) -> TimeSimulationConfig:
"""解析时间配置结果"""
return TimeSimulationConfig(
total_simulation_hours=result.get("total_simulation_hours", 72),
minutes_per_round=result.get("minutes_per_round", 30),
agents_per_hour_min=result.get("agents_per_hour_min", 5),
agents_per_hour_max=result.get("agents_per_hour_max", 20),
peak_hours=result.get("peak_hours", [19, 20, 21, 22]),
off_peak_hours=result.get("off_peak_hours", [0, 1, 2, 3, 4, 5]),
off_peak_activity_multiplier=0.05, # 凌晨几乎无人
morning_hours=result.get("morning_hours", [6, 7, 8]),
morning_activity_multiplier=0.4,
work_hours=result.get("work_hours", list(range(9, 19))),
work_activity_multiplier=0.7,
peak_activity_multiplier=1.5
)
def _generate_event_config(self, context: str, simulation_requirement: str) -> Dict[str, Any]:
"""生成事件配置"""
prompt = f"""基于以下模拟需求,生成事件配置。
模拟需求: {simulation_requirement}
{context[:3000]}
## 任务
请生成事件配置JSON
- 提取热点话题关键词
- 描述舆论发展方向
- 设计初始帖子内容
返回JSON格式不要markdown
{{
"hot_topics": ["关键词1", "关键词2", ...],
"narrative_direction": "<舆论发展方向描述>",
"initial_posts": [
{{"content": "帖子内容", "poster_type": "MediaOutlet"}},
...
],
"reasoning": "<简要说明>"
}}"""
system_prompt = "你是舆论分析专家。返回纯JSON格式。"
try:
return self._call_llm_with_retry(prompt, system_prompt)
except Exception as e:
logger.warning(f"事件配置LLM生成失败: {e}, 使用默认配置")
return {
"hot_topics": [],
"narrative_direction": "",
"initial_posts": [],
"reasoning": "使用默认配置"
}
def _parse_event_config(self, result: Dict[str, Any]) -> EventConfig:
"""解析事件配置结果"""
return EventConfig(
initial_posts=result.get("initial_posts", []),
scheduled_events=[],
hot_topics=result.get("hot_topics", []),
narrative_direction=result.get("narrative_direction", "")
)
def _generate_agent_configs_batch(
self,
context: str,
entities: List[EntityNode],
enable_twitter: bool,
enable_reddit: bool
) -> Dict[str, Any]:
"""调用LLM生成配置"""
start_idx: int,
simulation_requirement: str
) -> List[AgentActivityConfig]:
"""分批生成Agent配置"""
# 构建实体列表用于Agent配置
# 构建实体信息
entity_list = []
for i, e in enumerate(entities):
entity_list.append({
"agent_id": i,
"entity_uuid": e.uuid,
"agent_id": start_idx + i,
"entity_name": e.name,
"entity_type": e.get_entity_type() or "Unknown",
"summary": e.summary[:200] if e.summary else ""
"summary": e.summary[:150] if e.summary else ""
})
prompt = f"""你是一个社交媒体舆论模拟专家。请根据以下信息,生成详细的模拟参数配置。
prompt = f"""基于以下信息,为每个实体生成社交媒体活动配置。
{context}
模拟需求: {simulation_requirement}
## 实体列表(需要为每个实体生成活动配置)
## 实体列表
```json
{json.dumps(entity_list, ensure_ascii=False, indent=2)}
```
## 任务
请生成一个JSON配置包含以下部分
为每个实体生成活动配置,注意
- **时间符合中国人作息**凌晨0-5点几乎不活动晚间19-22点最活跃
- **官方机构**University/GovernmentAgency活跃度低(0.1-0.3),工作时间(9-17)活动,响应慢(60-240分钟),影响力高(2.5-3.0)
- **媒体**MediaOutlet活跃度中(0.4-0.6),全天活动(8-23),响应快(5-30分钟),影响力高(2.0-2.5)
- **个人**Student/Person/Alumni活跃度高(0.6-0.9),主要晚间活动(18-23),响应快(1-15分钟),影响力低(0.8-1.2)
- **公众人物/专家**:活跃度中(0.4-0.6),影响力中高(1.5-2.0)
1. **time_config** - 时间模拟配置
- total_simulation_hours: 模拟总时长小时根据事件性质决定短期热点24-72小时长期舆论168-336小时
- minutes_per_round: 每轮代表的时间分钟建议15-60
- agents_per_hour_min/max: 每小时激活的Agent数量范围
- peak_hours: 高峰时段列表0-23
- off_peak_hours: 低谷时段列表
2. **agent_configs** - 每个Agent的活动配置必须为每个实体生成
对于每个agent_id设置
- activity_level: 活跃度(0.0-1.0)官方机构通常0.1-0.3媒体0.3-0.5个人0.5-0.9
- posts_per_hour: 每小时发帖频率官方机构0.05-0.2媒体0.5-2个人0.1-1
- comments_per_hour: 每小时评论频率
- active_hours: 活跃时间段列表,官方通常工作时间,个人更分散
- response_delay_min/max: 响应延迟(模拟分钟),官方较慢(30-180),个人较快(1-30)
- sentiment_bias: 情感倾向(-1到1),根据实体立场设置
- stance: 立场(supportive/opposing/neutral/observer)
- influence_weight: 影响力权重,知名人物和媒体较高
3. **event_config** - 事件配置
- initial_posts: 初始帖子列表包含content和poster_agent_id
- hot_topics: 热点话题关键词列表
- narrative_direction: 舆论发展方向描述
4. **platform_configs** - 平台配置(如果启用)
- viral_threshold: 病毒传播阈值
- echo_chamber_strength: 回声室效应强度(0-1)
5. **reasoning** - 你的推理说明,解释为什么这样设置参数
## 重要原则
- 官方机构University、GovernmentAgency发言频率低但影响力大
- 媒体MediaOutlet发言频率中等传播速度快
- 个人Student、PublicFigure发言频率高但影响力分散
- 根据模拟需求判断各实体的立场和情感倾向
- 时间配置要符合真实社交媒体的使用规律
请返回JSON格式不要包含markdown代码块标记。"""
返回JSON格式不要markdown
{{
"agent_configs": [
{{
"agent_id": <必须与输入一致>,
"activity_level": <0.0-1.0>,
"posts_per_hour": <发帖频率>,
"comments_per_hour": <评论频率>,
"active_hours": [<活跃小时列表,考虑中国人作息>],
"response_delay_min": <最小响应延迟分钟>,
"response_delay_max": <最大响应延迟分钟>,
"sentiment_bias": <-1.0到1.0>,
"stance": "<supportive/opposing/neutral/observer>",
"influence_weight": <影响力权重>
}},
...
]
}}"""
system_prompt = "你是社交媒体行为分析专家。返回纯JSON配置需符合中国人作息习惯。"
try:
# 使用重试机制调用LLM API
from ..utils.retry import RetryableAPIClient
retry_client = RetryableAPIClient(max_retries=3, initial_delay=2.0, max_delay=60.0)
def call_llm():
return self.client.chat.completions.create(
model=self.model_name,
messages=[
{
"role": "system",
"content": "你是社交媒体舆论模拟专家擅长设计真实的模拟参数。返回纯JSON格式不要markdown。"
},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.7,
max_tokens=8000
)
response = retry_client.call_with_retry(call_llm)
result = json.loads(response.choices[0].message.content)
logger.info(f"LLM配置生成成功")
return result
result = self._call_llm_with_retry(prompt, system_prompt)
llm_configs = {cfg["agent_id"]: cfg for cfg in result.get("agent_configs", [])}
except Exception as e:
logger.error(f"LLM配置生成失败(已重试): {str(e)}")
# 返回默认配置
return self._generate_default_config(entities)
def _generate_default_config(self, entities: List[EntityNode]) -> Dict[str, Any]:
"""生成默认配置LLM失败时的fallback"""
agent_configs = []
for i, e in enumerate(entities):
entity_type = (e.get_entity_type() or "Unknown").lower()
# 根据实体类型设置默认参数
if entity_type in ["university", "governmentagency", "ngo"]:
config = {
"agent_id": i,
"activity_level": 0.2,
"posts_per_hour": 0.1,
"comments_per_hour": 0.05,
"active_hours": list(range(9, 18)),
"response_delay_min": 60,
"response_delay_max": 240,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 3.0
}
elif entity_type in ["mediaoutlet"]:
config = {
"agent_id": i,
"activity_level": 0.6,
"posts_per_hour": 1.0,
"comments_per_hour": 0.5,
"active_hours": list(range(6, 24)),
"response_delay_min": 5,
"response_delay_max": 30,
"sentiment_bias": 0.0,
"stance": "observer",
"influence_weight": 2.5
}
elif entity_type in ["publicfigure", "expert"]:
config = {
"agent_id": i,
"activity_level": 0.5,
"posts_per_hour": 0.3,
"comments_per_hour": 0.5,
"active_hours": list(range(8, 23)),
"response_delay_min": 10,
"response_delay_max": 60,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 2.0
}
else: # Student, Person, etc.
config = {
"agent_id": i,
"activity_level": 0.7,
"posts_per_hour": 0.5,
"comments_per_hour": 1.0,
"active_hours": list(range(7, 24)),
"response_delay_min": 1,
"response_delay_max": 20,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 1.0
}
agent_configs.append(config)
return {
"time_config": {
"total_simulation_hours": 72,
"minutes_per_round": 30,
"agents_per_hour_min": max(1, len(entities) // 10),
"agents_per_hour_max": max(5, len(entities) // 3),
"peak_hours": [9, 10, 11, 14, 15, 20, 21, 22],
"off_peak_hours": [0, 1, 2, 3, 4, 5]
},
"agent_configs": agent_configs,
"event_config": {
"initial_posts": [],
"hot_topics": [],
"narrative_direction": ""
},
"reasoning": "使用默认配置LLM生成失败"
}
def _build_parameters(
self,
simulation_id: str,
project_id: str,
graph_id: str,
simulation_requirement: str,
entities: List[EntityNode],
llm_result: Dict[str, Any],
enable_twitter: bool,
enable_reddit: bool
) -> SimulationParameters:
"""根据LLM结果构建SimulationParameters对象"""
# 时间配置
time_cfg = llm_result.get("time_config", {})
time_config = TimeSimulationConfig(
total_simulation_hours=time_cfg.get("total_simulation_hours", 72),
minutes_per_round=time_cfg.get("minutes_per_round", 30),
agents_per_hour_min=time_cfg.get("agents_per_hour_min", 5),
agents_per_hour_max=time_cfg.get("agents_per_hour_max", 20),
peak_hours=time_cfg.get("peak_hours", [9, 10, 11, 14, 15, 20, 21, 22]),
off_peak_hours=time_cfg.get("off_peak_hours", [0, 1, 2, 3, 4, 5]),
peak_activity_multiplier=time_cfg.get("peak_activity_multiplier", 1.5),
off_peak_activity_multiplier=time_cfg.get("off_peak_activity_multiplier", 0.3)
)
# Agent配置
agent_configs = []
llm_agent_configs = {cfg["agent_id"]: cfg for cfg in llm_result.get("agent_configs", [])}
logger.warning(f"Agent配置批次LLM生成失败: {e}, 使用规则生成")
llm_configs = {}
# 构建AgentActivityConfig对象
configs = []
for i, entity in enumerate(entities):
cfg = llm_agent_configs.get(i, {})
agent_id = start_idx + i
cfg = llm_configs.get(agent_id, {})
agent_config = AgentActivityConfig(
agent_id=i,
# 如果LLM没有生成使用规则生成
if not cfg:
cfg = self._generate_agent_config_by_rule(entity)
config = AgentActivityConfig(
agent_id=agent_id,
entity_uuid=entity.uuid,
entity_name=entity.name,
entity_type=entity.get_entity_type() or "Unknown",
activity_level=cfg.get("activity_level", 0.5),
posts_per_hour=cfg.get("posts_per_hour", 0.5),
comments_per_hour=cfg.get("comments_per_hour", 1.0),
active_hours=cfg.get("active_hours", list(range(8, 23))),
active_hours=cfg.get("active_hours", list(range(9, 23))),
response_delay_min=cfg.get("response_delay_min", 5),
response_delay_max=cfg.get("response_delay_max", 60),
sentiment_bias=cfg.get("sentiment_bias", 0.0),
stance=cfg.get("stance", "neutral"),
influence_weight=cfg.get("influence_weight", 1.0)
)
agent_configs.append(agent_config)
configs.append(config)
# 事件配置
event_cfg = llm_result.get("event_config", {})
event_config = EventConfig(
initial_posts=event_cfg.get("initial_posts", []),
scheduled_events=event_cfg.get("scheduled_events", []),
hot_topics=event_cfg.get("hot_topics", []),
narrative_direction=event_cfg.get("narrative_direction", "")
)
return configs
def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]:
"""基于规则生成单个Agent配置中国人作息"""
entity_type = (entity.get_entity_type() or "Unknown").lower()
# 平台配置
twitter_config = None
reddit_config = None
platform_cfgs = llm_result.get("platform_configs", {})
if enable_twitter:
tw_cfg = platform_cfgs.get("twitter", {})
twitter_config = PlatformConfig(
platform="twitter",
recency_weight=tw_cfg.get("recency_weight", 0.4),
popularity_weight=tw_cfg.get("popularity_weight", 0.3),
relevance_weight=tw_cfg.get("relevance_weight", 0.3),
viral_threshold=tw_cfg.get("viral_threshold", 10),
echo_chamber_strength=tw_cfg.get("echo_chamber_strength", 0.5)
)
if enable_reddit:
rd_cfg = platform_cfgs.get("reddit", {})
reddit_config = PlatformConfig(
platform="reddit",
recency_weight=rd_cfg.get("recency_weight", 0.3),
popularity_weight=rd_cfg.get("popularity_weight", 0.4),
relevance_weight=rd_cfg.get("relevance_weight", 0.3),
viral_threshold=rd_cfg.get("viral_threshold", 15),
echo_chamber_strength=rd_cfg.get("echo_chamber_strength", 0.6)
)
return SimulationParameters(
simulation_id=simulation_id,
project_id=project_id,
graph_id=graph_id,
simulation_requirement=simulation_requirement,
time_config=time_config,
agent_configs=agent_configs,
event_config=event_config,
twitter_config=twitter_config,
reddit_config=reddit_config,
llm_model=self.model_name,
llm_base_url=self.base_url,
generation_reasoning=llm_result.get("reasoning", "")
)
if entity_type in ["university", "governmentagency", "ngo"]:
# 官方机构:工作时间活动,低频率,高影响力
return {
"activity_level": 0.2,
"posts_per_hour": 0.1,
"comments_per_hour": 0.05,
"active_hours": list(range(9, 18)), # 9:00-17:59
"response_delay_min": 60,
"response_delay_max": 240,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 3.0
}
elif entity_type in ["mediaoutlet"]:
# 媒体:全天活动,中等频率,高影响力
return {
"activity_level": 0.5,
"posts_per_hour": 0.8,
"comments_per_hour": 0.3,
"active_hours": list(range(7, 24)), # 7:00-23:59
"response_delay_min": 5,
"response_delay_max": 30,
"sentiment_bias": 0.0,
"stance": "observer",
"influence_weight": 2.5
}
elif entity_type in ["professor", "expert", "official"]:
# 专家/教授:工作+晚间活动,中等频率
return {
"activity_level": 0.4,
"posts_per_hour": 0.3,
"comments_per_hour": 0.5,
"active_hours": list(range(8, 22)), # 8:00-21:59
"response_delay_min": 15,
"response_delay_max": 90,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 2.0
}
elif entity_type in ["student"]:
# 学生:晚间为主,高频率
return {
"activity_level": 0.8,
"posts_per_hour": 0.6,
"comments_per_hour": 1.5,
"active_hours": [8, 9, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23], # 上午+晚间
"response_delay_min": 1,
"response_delay_max": 15,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 0.8
}
elif entity_type in ["alumni"]:
# 校友:晚间为主
return {
"activity_level": 0.6,
"posts_per_hour": 0.4,
"comments_per_hour": 0.8,
"active_hours": [12, 13, 19, 20, 21, 22, 23], # 午休+晚间
"response_delay_min": 5,
"response_delay_max": 30,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 1.0
}
else:
# 普通人:晚间高峰
return {
"activity_level": 0.7,
"posts_per_hour": 0.5,
"comments_per_hour": 1.2,
"active_hours": [9, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23], # 白天+晚间
"response_delay_min": 2,
"response_delay_max": 20,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 1.0
}