fix: translate Chinese prompts to English in simulation config and profile generators
- simulation_config_generator.py: translate all LLM prompts and system messages - oasis_profile_generator.py: translate profile generation prompts Ensures get_language_instruction() controls output language instead of being overridden by Chinese prompt context.
This commit is contained in:
@@ -671,7 +671,7 @@ class OasisProfileGenerator:
|
|||||||
|
|
||||||
def _get_system_prompt(self, is_individual: bool) -> str:
|
def _get_system_prompt(self, is_individual: bool) -> str:
|
||||||
"""获取系统提示词"""
|
"""获取系统提示词"""
|
||||||
base_prompt = "你是社交媒体用户画像生成专家。生成详细、真实的人设用于舆论模拟,最大程度还原已有现实情况。必须返回有效的JSON格式,所有字符串值不能包含未转义的换行符。"
|
base_prompt = "You are a social media user persona generation expert. Generate detailed, realistic personas for opinion simulation, maximizing fidelity to real-world situations. Must return valid JSON format, all string values must not contain unescaped newlines."
|
||||||
return f"{base_prompt}\n\n{get_language_instruction()}"
|
return f"{base_prompt}\n\n{get_language_instruction()}"
|
||||||
|
|
||||||
def _build_individual_persona_prompt(
|
def _build_individual_persona_prompt(
|
||||||
@@ -687,7 +687,7 @@ class OasisProfileGenerator:
|
|||||||
attrs_str = json.dumps(entity_attributes, ensure_ascii=False) if entity_attributes else "无"
|
attrs_str = json.dumps(entity_attributes, ensure_ascii=False) if entity_attributes else "无"
|
||||||
context_str = context[:3000] if context else "无额外上下文"
|
context_str = context[:3000] if context else "无额外上下文"
|
||||||
|
|
||||||
return f"""为实体生成详细的社交媒体用户人设,最大程度还原已有现实情况。
|
return f"""Generate detailed social media user persona for the entity, maximizing fidelity to real-world situations.
|
||||||
|
|
||||||
实体名称: {entity_name}
|
实体名称: {entity_name}
|
||||||
实体类型: {entity_type}
|
实体类型: {entity_type}
|
||||||
@@ -736,7 +736,7 @@ class OasisProfileGenerator:
|
|||||||
attrs_str = json.dumps(entity_attributes, ensure_ascii=False) if entity_attributes else "无"
|
attrs_str = json.dumps(entity_attributes, ensure_ascii=False) if entity_attributes else "无"
|
||||||
context_str = context[:3000] if context else "无额外上下文"
|
context_str = context[:3000] if context else "无额外上下文"
|
||||||
|
|
||||||
return f"""为机构/群体实体生成详细的社交媒体账号设定,最大程度还原已有现实情况。
|
return f"""Generate detailed social media account settings for organization/group entity, maximizing fidelity to real-world situations.
|
||||||
|
|
||||||
实体名称: {entity_name}
|
实体名称: {entity_name}
|
||||||
实体类型: {entity_type}
|
实体类型: {entity_type}
|
||||||
@@ -769,7 +769,7 @@ class OasisProfileGenerator:
|
|||||||
- persona必须是一段连贯的文字描述,不要使用换行符
|
- persona必须是一段连贯的文字描述,不要使用换行符
|
||||||
- {get_language_instruction()} (gender字段必须用英文"other")
|
- {get_language_instruction()} (gender字段必须用英文"other")
|
||||||
- age必须是整数30,gender必须是字符串"other"
|
- age必须是整数30,gender必须是字符串"other"
|
||||||
- 机构账号发言要符合其身份定位"""
|
- Organization account posts must match its identity positioning"""
|
||||||
|
|
||||||
def _generate_profile_rule_based(
|
def _generate_profile_rule_based(
|
||||||
self,
|
self,
|
||||||
@@ -1076,11 +1076,11 @@ class OasisProfileGenerator:
|
|||||||
- name: 用户真实姓名
|
- name: 用户真实姓名
|
||||||
- username: 系统中的用户名
|
- username: 系统中的用户名
|
||||||
- user_char: 详细人设描述(注入到LLM系统提示中,指导Agent行为)
|
- user_char: 详细人设描述(注入到LLM系统提示中,指导Agent行为)
|
||||||
- description: 简短的公开简介(显示在用户资料页面)
|
- description: Short public bio (displayed on user profile page)
|
||||||
|
|
||||||
user_char vs description 区别:
|
user_char vs description difference:
|
||||||
- user_char: 内部使用,LLM系统提示,决定Agent如何思考和行动
|
- user_char: 内部使用,LLM系统提示,决定Agent如何思考和行动
|
||||||
- description: 外部显示,其他用户可见的简介
|
- description: Externally displayed, visible to other users
|
||||||
"""
|
"""
|
||||||
import csv
|
import csv
|
||||||
|
|
||||||
@@ -1112,7 +1112,7 @@ class OasisProfileGenerator:
|
|||||||
profile.name, # name: 真实姓名
|
profile.name, # name: 真实姓名
|
||||||
profile.user_name, # username: 用户名
|
profile.user_name, # username: 用户名
|
||||||
user_char, # user_char: 完整人设(内部LLM使用)
|
user_char, # user_char: 完整人设(内部LLM使用)
|
||||||
description # description: 简短简介(外部显示)
|
description # description: Short bio (externally displayed)
|
||||||
]
|
]
|
||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|||||||
@@ -540,15 +540,15 @@ class SimulationConfigGenerator:
|
|||||||
# 计算最大允许值(80%的agent数)
|
# 计算最大允许值(80%的agent数)
|
||||||
max_agents_allowed = max(1, int(num_entities * 0.9))
|
max_agents_allowed = max(1, int(num_entities * 0.9))
|
||||||
|
|
||||||
prompt = f"""基于以下模拟需求,生成时间模拟配置。
|
prompt = f"""Based on the following simulation requirements, generate time simulation config.
|
||||||
|
|
||||||
{context_truncated}
|
{context_truncated}
|
||||||
|
|
||||||
## 任务
|
## 任务
|
||||||
请生成时间配置JSON。
|
Please generate time config JSON.
|
||||||
|
|
||||||
### 基本原则(仅供参考,需根据具体事件和参与群体灵活调整):
|
### 基本原则(仅供参考,需根据具体事件和参与群体灵活调整):
|
||||||
- 请根据模拟场景推断目标用户群体所在时区和作息习惯,以下为东八区(UTC+8)的参考示例
|
- Please infer the target user group's timezone and daily routine based on the simulation scenario. Below is a reference example for UTC+8
|
||||||
- 凌晨0-5点几乎无人活动(活跃度系数0.05)
|
- 凌晨0-5点几乎无人活动(活跃度系数0.05)
|
||||||
- 早上6-8点逐渐活跃(活跃度系数0.4)
|
- 早上6-8点逐渐活跃(活跃度系数0.4)
|
||||||
- 工作时间9-18点中等活跃(活跃度系数0.7)
|
- 工作时间9-18点中等活跃(活跃度系数0.7)
|
||||||
@@ -571,7 +571,7 @@ class SimulationConfigGenerator:
|
|||||||
"off_peak_hours": [0, 1, 2, 3, 4, 5],
|
"off_peak_hours": [0, 1, 2, 3, 4, 5],
|
||||||
"morning_hours": [6, 7, 8],
|
"morning_hours": [6, 7, 8],
|
||||||
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
|
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
|
||||||
"reasoning": "针对该事件的时间配置说明"
|
"reasoning": "Explanation of time config for this event"
|
||||||
}}
|
}}
|
||||||
|
|
||||||
字段说明:
|
字段说明:
|
||||||
@@ -583,15 +583,15 @@ class SimulationConfigGenerator:
|
|||||||
- off_peak_hours (int数组): 低谷时段,通常深夜凌晨
|
- off_peak_hours (int数组): 低谷时段,通常深夜凌晨
|
||||||
- morning_hours (int数组): 早间时段
|
- morning_hours (int数组): 早间时段
|
||||||
- work_hours (int数组): 工作时段
|
- work_hours (int数组): 工作时段
|
||||||
- reasoning (string): 简要说明为什么这样配置"""
|
- reasoning (string): Brief explanation of why this config was chosen"""
|
||||||
|
|
||||||
system_prompt = "你是社交媒体模拟专家。返回纯JSON格式,时间配置需符合模拟场景中目标用户群体的作息习惯。"
|
system_prompt = "You are a social media simulation expert. Return pure JSON format. Time config must match the target user group's daily routine in the simulation scenario."
|
||||||
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}"
|
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return self._call_llm_with_retry(prompt, system_prompt)
|
return self._call_llm_with_retry(prompt, system_prompt)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"时间配置LLM生成失败: {e}, 使用默认配置")
|
logger.warning(f"Time config LLM generation failed: {e}, using default config")
|
||||||
return self._get_default_time_config(num_entities)
|
return self._get_default_time_config(num_entities)
|
||||||
|
|
||||||
def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]:
|
def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]:
|
||||||
@@ -605,7 +605,7 @@ class SimulationConfigGenerator:
|
|||||||
"off_peak_hours": [0, 1, 2, 3, 4, 5],
|
"off_peak_hours": [0, 1, 2, 3, 4, 5],
|
||||||
"morning_hours": [6, 7, 8],
|
"morning_hours": [6, 7, 8],
|
||||||
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
|
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
|
||||||
"reasoning": "使用默认中国人作息配置(每轮1小时)"
|
"reasoning": "Using default UTC+8 daily routine config (1 hour per round)"
|
||||||
}
|
}
|
||||||
|
|
||||||
def _parse_time_config(self, result: Dict[str, Any], num_entities: int) -> TimeSimulationConfig:
|
def _parse_time_config(self, result: Dict[str, Any], num_entities: int) -> TimeSimulationConfig:
|
||||||
@@ -673,9 +673,9 @@ class SimulationConfigGenerator:
|
|||||||
# 使用配置的上下文截断长度
|
# 使用配置的上下文截断长度
|
||||||
context_truncated = context[:self.EVENT_CONFIG_CONTEXT_LENGTH]
|
context_truncated = context[:self.EVENT_CONFIG_CONTEXT_LENGTH]
|
||||||
|
|
||||||
prompt = f"""基于以下模拟需求,生成事件配置。
|
prompt = f"""Based on the following simulation requirements, generate event config.
|
||||||
|
|
||||||
模拟需求: {simulation_requirement}
|
Simulation requirements: {simulation_requirement}
|
||||||
|
|
||||||
{context_truncated}
|
{context_truncated}
|
||||||
|
|
||||||
@@ -683,7 +683,7 @@ class SimulationConfigGenerator:
|
|||||||
{type_info}
|
{type_info}
|
||||||
|
|
||||||
## 任务
|
## 任务
|
||||||
请生成事件配置JSON:
|
Please generate event config JSON:
|
||||||
- 提取热点话题关键词
|
- 提取热点话题关键词
|
||||||
- 描述舆论发展方向
|
- 描述舆论发展方向
|
||||||
- 设计初始帖子内容,**每个帖子必须指定 poster_type(发布者类型)**
|
- 设计初始帖子内容,**每个帖子必须指定 poster_type(发布者类型)**
|
||||||
@@ -691,7 +691,7 @@ class SimulationConfigGenerator:
|
|||||||
**重要**: poster_type 必须从上面的"可用实体类型"中选择,这样初始帖子才能分配给合适的 Agent 发布。
|
**重要**: poster_type 必须从上面的"可用实体类型"中选择,这样初始帖子才能分配给合适的 Agent 发布。
|
||||||
例如:官方声明应由 Official/University 类型发布,新闻由 MediaOutlet 发布,学生观点由 Student 发布。
|
例如:官方声明应由 Official/University 类型发布,新闻由 MediaOutlet 发布,学生观点由 Student 发布。
|
||||||
|
|
||||||
返回JSON格式(不要markdown):
|
Return in JSON format (no markdown):
|
||||||
{{
|
{{
|
||||||
"hot_topics": ["关键词1", "关键词2", ...],
|
"hot_topics": ["关键词1", "关键词2", ...],
|
||||||
"narrative_direction": "<舆论发展方向描述>",
|
"narrative_direction": "<舆论发展方向描述>",
|
||||||
@@ -702,18 +702,18 @@ class SimulationConfigGenerator:
|
|||||||
"reasoning": "<简要说明>"
|
"reasoning": "<简要说明>"
|
||||||
}}"""
|
}}"""
|
||||||
|
|
||||||
system_prompt = "你是舆论分析专家。返回纯JSON格式。注意 poster_type 必须精确匹配可用实体类型。"
|
system_prompt = "You are a public opinion analysis expert. Return pure JSON format. Note: poster_type must exactly match available entity types."
|
||||||
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}\nIMPORTANT: The 'poster_type' field value MUST be in English PascalCase exactly matching the available entity types. Only 'content', 'narrative_direction', 'hot_topics' and 'reasoning' fields should use the specified language."
|
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}\nIMPORTANT: The 'poster_type' field value MUST be in English PascalCase exactly matching the available entity types. Only 'content', 'narrative_direction', 'hot_topics' and 'reasoning' fields should use the specified language."
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return self._call_llm_with_retry(prompt, system_prompt)
|
return self._call_llm_with_retry(prompt, system_prompt)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"事件配置LLM生成失败: {e}, 使用默认配置")
|
logger.warning(f"Event config LLM generation failed: {e}, using default config")
|
||||||
return {
|
return {
|
||||||
"hot_topics": [],
|
"hot_topics": [],
|
||||||
"narrative_direction": "",
|
"narrative_direction": "",
|
||||||
"initial_posts": [],
|
"initial_posts": [],
|
||||||
"reasoning": "使用默认配置"
|
"reasoning": "Using default config"
|
||||||
}
|
}
|
||||||
|
|
||||||
def _parse_event_config(self, result: Dict[str, Any]) -> EventConfig:
|
def _parse_event_config(self, result: Dict[str, Any]) -> EventConfig:
|
||||||
@@ -830,9 +830,9 @@ class SimulationConfigGenerator:
|
|||||||
"summary": e.summary[:summary_len] if e.summary else ""
|
"summary": e.summary[:summary_len] if e.summary else ""
|
||||||
})
|
})
|
||||||
|
|
||||||
prompt = f"""基于以下信息,为每个实体生成社交媒体活动配置。
|
prompt = f"""Based on the following information, generate social media activity config for each entity.
|
||||||
|
|
||||||
模拟需求: {simulation_requirement}
|
Simulation requirements: {simulation_requirement}
|
||||||
|
|
||||||
## 实体列表
|
## 实体列表
|
||||||
```json
|
```json
|
||||||
@@ -840,14 +840,14 @@ class SimulationConfigGenerator:
|
|||||||
```
|
```
|
||||||
|
|
||||||
## 任务
|
## 任务
|
||||||
为每个实体生成活动配置,注意:
|
Generate activity config for each entity, noting:
|
||||||
- **时间符合目标用户群体作息**:以下为参考(东八区),请根据模拟场景调整
|
- **Time must match target user group routine**: Below is a reference (UTC+8), please adjust based on simulation scenario
|
||||||
- **官方机构**(University/GovernmentAgency):活跃度低(0.1-0.3),工作时间(9-17)活动,响应慢(60-240分钟),影响力高(2.5-3.0)
|
- **官方机构**(University/GovernmentAgency):活跃度低(0.1-0.3),工作时间(9-17)活动,响应慢(60-240分钟),影响力高(2.5-3.0)
|
||||||
- **媒体**(MediaOutlet):活跃度中(0.4-0.6),全天活动(8-23),响应快(5-30分钟),影响力高(2.0-2.5)
|
- **媒体**(MediaOutlet):活跃度中(0.4-0.6),全天活动(8-23),响应快(5-30分钟),影响力高(2.0-2.5)
|
||||||
- **个人**(Student/Person/Alumni):活跃度高(0.6-0.9),主要晚间活动(18-23),响应快(1-15分钟),影响力低(0.8-1.2)
|
- **个人**(Student/Person/Alumni):活跃度高(0.6-0.9),主要晚间活动(18-23),响应快(1-15分钟),影响力低(0.8-1.2)
|
||||||
- **公众人物/专家**:活跃度中(0.4-0.6),影响力中高(1.5-2.0)
|
- **公众人物/专家**:活跃度中(0.4-0.6),影响力中高(1.5-2.0)
|
||||||
|
|
||||||
返回JSON格式(不要markdown):
|
Return in JSON format (no markdown):
|
||||||
{{
|
{{
|
||||||
"agent_configs": [
|
"agent_configs": [
|
||||||
{{
|
{{
|
||||||
@@ -866,14 +866,14 @@ class SimulationConfigGenerator:
|
|||||||
]
|
]
|
||||||
}}"""
|
}}"""
|
||||||
|
|
||||||
system_prompt = "你是社交媒体行为分析专家。返回纯JSON,配置需符合模拟场景中目标用户群体的作息习惯。"
|
system_prompt = "You are a social media behavior analysis expert. Return pure JSON. Config must match the target user group's daily routine in the simulation scenario."
|
||||||
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}\nIMPORTANT: The 'stance' field value MUST be one of the English strings: 'supportive', 'opposing', 'neutral', 'observer'. All JSON field names and numeric values must remain unchanged. Only natural language text fields should use the specified language."
|
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}\nIMPORTANT: The 'stance' field value MUST be one of the English strings: 'supportive', 'opposing', 'neutral', 'observer'. All JSON field names and numeric values must remain unchanged. Only natural language text fields should use the specified language."
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = self._call_llm_with_retry(prompt, system_prompt)
|
result = self._call_llm_with_retry(prompt, system_prompt)
|
||||||
llm_configs = {cfg["agent_id"]: cfg for cfg in result.get("agent_configs", [])}
|
llm_configs = {cfg["agent_id"]: cfg for cfg in result.get("agent_configs", [])}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Agent配置批次LLM生成失败: {e}, 使用规则生成")
|
logger.warning(f"Agent config batch LLM generation failed: {e}, using rule-based generation")
|
||||||
llm_configs = {}
|
llm_configs = {}
|
||||||
|
|
||||||
# 构建AgentActivityConfig对象
|
# 构建AgentActivityConfig对象
|
||||||
|
|||||||
Reference in New Issue
Block a user