Enhance OASIS simulation capabilities and profile generation

- Updated README.md to include detailed descriptions of new features, including Zep mixed search functionality and detailed persona generation for individual and group entities. - Implemented a robust mechanism for checking simulation preparation status to avoid redundant profile generation. - Added support for parallel profile generation, improving efficiency in creating OASIS Agent Profiles. - Enhanced the simulation configuration generator to adopt a stepwise approach, ensuring better handling of complex configurations. - Introduced error handling and retry mechanisms for LLM calls, improving the reliability of profile generation. - Updated simulation management to support new API parameters for controlling profile generation behavior.
2025-12-01 19:40:07 +08:00
parent 5f159f6d88
commit af5c235695
5 changed files with 1602 additions and 408 deletions
--- a/backend/app/services/oasis_profile_generator.py
+++ b/backend/app/services/oasis_profile_generator.py
@@ -1,6 +1,11 @@
 """
 OASIS Agent Profile生成器
 将Zep图谱中的实体转换为OASIS模拟平台所需的Agent Profile格式
+
+优化改进：
+1. 调用Zep检索功能二次丰富节点信息
+2. 优化提示词生成非常详细的人设
+3. 区分个人实体和抽象群体实体
 """

 import json
@@ -10,6 +15,7 @@ from dataclasses import dataclass, field
 from datetime import datetime

 from openai import OpenAI
+from zep_cloud.client import Zep

 from ..config import Config
 from ..utils.logger import get_logger
@@ -137,6 +143,11 @@ class OasisProfileGenerator:
    OASIS Profile生成器
    
    将Zep图谱中的实体转换为OASIS模拟所需的Agent Profile
+    
+    优化特性：
+    1. 调用Zep图谱检索功能获取更丰富的上下文
+    2. 生成非常详细的人设（包括基本信息、职业经历、性格特征、社交媒体行为等）
+    3. 区分个人实体和抽象群体实体
    """
    
    # MBTI类型列表
@@ -153,11 +164,25 @@ class OasisProfileGenerator:
        "Canada", "Australia", "Brazil", "India", "South Korea"
    ]
    
+    # 个人类型实体（需要生成具体人设）
+    INDIVIDUAL_ENTITY_TYPES = [
+        "student", "alumni", "professor", "person", "publicfigure", 
+        "expert", "faculty", "official", "journalist", "activist"
+    ]
+    
+    # 群体/机构类型实体（需要生成群体代表人设）
+    GROUP_ENTITY_TYPES = [
+        "university", "governmentagency", "organization", "ngo", 
+        "mediaoutlet", "company", "institution", "group", "community"
+    ]
+    
    def __init__(
        self, 
        api_key: Optional[str] = None,
        base_url: Optional[str] = None,
-        model_name: Optional[str] = None
+        model_name: Optional[str] = None,
+        zep_api_key: Optional[str] = None,
+        graph_id: Optional[str] = None
    ):
        self.api_key = api_key or Config.LLM_API_KEY
        self.base_url = base_url or Config.LLM_BASE_URL
@@ -170,6 +195,17 @@ class OasisProfileGenerator:
            api_key=self.api_key,
            base_url=self.base_url
        )
+        
+        # Zep客户端用于检索丰富上下文
+        self.zep_api_key = zep_api_key or Config.ZEP_API_KEY
+        self.zep_client = None
+        self.graph_id = graph_id
+        
+        if self.zep_api_key:
+            try:
+                self.zep_client = Zep(api_key=self.zep_api_key)
+            except Exception as e:
+                logger.warning(f"Zep客户端初始化失败: {e}")
    
    def generate_profile_from_entity(
        self, 
@@ -245,28 +281,195 @@ class OasisProfileGenerator:
        suffix = random.randint(100, 999)
        return f"{username}_{suffix}"
    
+    def _search_zep_for_entity(self, entity: EntityNode) -> Dict[str, Any]:
+        """
+        使用Zep图谱混合搜索功能获取实体相关的丰富信息
+        
+        Zep没有内置混合搜索接口，需要分别搜索edges和nodes然后合并结果。
+        使用并行请求同时搜索，提高效率。
+        
+        Args:
+            entity: 实体节点对象
+            
+        Returns:
+            包含facts, node_summaries, context的字典
+        """
+        import concurrent.futures
+        
+        if not self.zep_client:
+            return {"facts": [], "node_summaries": [], "context": ""}
+        
+        entity_name = entity.name
+        
+        results = {
+            "facts": [],
+            "node_summaries": [],
+            "context": ""
+        }
+        
+        # 必须有graph_id才能进行搜索
+        if not self.graph_id:
+            logger.debug(f"跳过Zep检索：未设置graph_id")
+            return results
+        
+        comprehensive_query = f"关于{entity_name}的所有信息、活动、事件、关系和背景"
+        
+        def search_edges():
+            """搜索边（事实/关系）"""
+            try:
+                return self.zep_client.graph.search(
+                    query=comprehensive_query,
+                    graph_id=self.graph_id,
+                    limit=30,
+                    scope="edges",
+                    reranker="rrf"
+                )
+            except Exception as e:
+                logger.debug(f"Zep边搜索失败: {e}")
+                return None
+        
+        def search_nodes():
+            """搜索节点（实体摘要）"""
+            try:
+                return self.zep_client.graph.search(
+                    query=comprehensive_query,
+                    graph_id=self.graph_id,
+                    limit=20,
+                    scope="nodes",
+                    reranker="rrf"
+                )
+            except Exception as e:
+                logger.debug(f"Zep节点搜索失败: {e}")
+                return None
+        
+        try:
+            # 并行执行edges和nodes搜索
+            with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+                edge_future = executor.submit(search_edges)
+                node_future = executor.submit(search_nodes)
+                
+                # 获取结果
+                edge_result = edge_future.result(timeout=30)
+                node_result = node_future.result(timeout=30)
+            
+            # 处理边搜索结果
+            all_facts = set()
+            if edge_result and hasattr(edge_result, 'edges') and edge_result.edges:
+                for edge in edge_result.edges:
+                    if hasattr(edge, 'fact') and edge.fact:
+                        all_facts.add(edge.fact)
+            results["facts"] = list(all_facts)
+            
+            # 处理节点搜索结果
+            all_summaries = set()
+            if node_result and hasattr(node_result, 'nodes') and node_result.nodes:
+                for node in node_result.nodes:
+                    if hasattr(node, 'summary') and node.summary:
+                        all_summaries.add(node.summary)
+                    if hasattr(node, 'name') and node.name and node.name != entity_name:
+                        all_summaries.add(f"相关实体: {node.name}")
+            results["node_summaries"] = list(all_summaries)
+            
+            # 构建综合上下文
+            context_parts = []
+            if results["facts"]:
+                context_parts.append("事实信息:\n" + "\n".join(f"- {f}" for f in results["facts"][:20]))
+            if results["node_summaries"]:
+                context_parts.append("相关实体:\n" + "\n".join(f"- {s}" for s in results["node_summaries"][:10]))
+            results["context"] = "\n\n".join(context_parts)
+            
+            logger.info(f"Zep混合检索完成: {entity_name}, 获取 {len(results['facts'])} 条事实, {len(results['node_summaries'])} 个相关节点")
+            
+        except concurrent.futures.TimeoutError:
+            logger.warning(f"Zep检索超时 ({entity_name})")
+        except Exception as e:
+            logger.warning(f"Zep检索失败 ({entity_name}): {e}")
+        
+        return results
+    
    def _build_entity_context(self, entity: EntityNode) -> str:
-        """构建实体的上下文信息"""
+        """
+        构建实体的完整上下文信息
+        
+        包括：
+        1. 实体本身的边信息（事实）
+        2. 关联节点的详细信息
+        3. Zep混合检索到的丰富信息
+        """
        context_parts = []
        
-        # 添加相关边信息
+        # 1. 添加实体属性信息
+        if entity.attributes:
+            attrs = []
+            for key, value in entity.attributes.items():
+                if value and str(value).strip():
+                    attrs.append(f"- {key}: {value}")
+            if attrs:
+                context_parts.append("### 实体属性\n" + "\n".join(attrs))
+        
+        # 2. 添加相关边信息（事实/关系）
+        existing_facts = set()
        if entity.related_edges:
            relationships = []
-            for edge in entity.related_edges[:10]:  # 最多取10条
-                if edge.get("fact"):
-                    relationships.append(edge["fact"])
+            for edge in entity.related_edges:  # 不限制数量
+                fact = edge.get("fact", "")
+                edge_name = edge.get("edge_name", "")
+                direction = edge.get("direction", "")
+                
+                if fact:
+                    relationships.append(f"- {fact}")
+                    existing_facts.add(fact)
+                elif edge_name:
+                    if direction == "outgoing":
+                        relationships.append(f"- {entity.name} --[{edge_name}]--> (相关实体)")
+                    else:
+                        relationships.append(f"- (相关实体) --[{edge_name}]--> {entity.name}")
            
            if relationships:
-                context_parts.append("Related facts:\n" + "\n".join(f"- {r}" for r in relationships))
+                context_parts.append("### 相关事实和关系\n" + "\n".join(relationships))
        
-        # 添加关联节点信息
+        # 3. 添加关联节点的详细信息
        if entity.related_nodes:
-            related_names = [n["name"] for n in entity.related_nodes[:5]]
-            if related_names:
-                context_parts.append(f"Related to: {', '.join(related_names)}")
+            related_info = []
+            for node in entity.related_nodes:  # 不限制数量
+                node_name = node.get("name", "")
+                node_labels = node.get("labels", [])
+                node_summary = node.get("summary", "")
+                
+                # 过滤掉默认标签
+                custom_labels = [l for l in node_labels if l not in ["Entity", "Node"]]
+                label_str = f" ({', '.join(custom_labels)})" if custom_labels else ""
+                
+                if node_summary:
+                    related_info.append(f"- **{node_name}**{label_str}: {node_summary}")
+                else:
+                    related_info.append(f"- **{node_name}**{label_str}")
+            
+            if related_info:
+                context_parts.append("### 关联实体信息\n" + "\n".join(related_info))
+        
+        # 4. 使用Zep混合检索获取更丰富的信息
+        zep_results = self._search_zep_for_entity(entity)
+        
+        if zep_results.get("facts"):
+            # 去重：排除已存在的事实
+            new_facts = [f for f in zep_results["facts"] if f not in existing_facts]
+            if new_facts:
+                context_parts.append("### Zep检索到的事实信息\n" + "\n".join(f"- {f}" for f in new_facts[:15]))
+        
+        if zep_results.get("node_summaries"):
+            context_parts.append("### Zep检索到的相关节点\n" + "\n".join(f"- {s}" for s in zep_results["node_summaries"][:10]))
        
        return "\n\n".join(context_parts)
    
+    def _is_individual_entity(self, entity_type: str) -> bool:
+        """判断是否是个人类型实体"""
+        return entity_type.lower() in self.INDIVIDUAL_ENTITY_TYPES
+    
+    def _is_group_entity(self, entity_type: str) -> bool:
+        """判断是否是群体/机构类型实体"""
+        return entity_type.lower() in self.GROUP_ENTITY_TYPES
+    
    def _generate_profile_with_llm(
        self,
        entity_name: str,
@@ -275,63 +478,271 @@ class OasisProfileGenerator:
        entity_attributes: Dict[str, Any],
        context: str
    ) -> Dict[str, Any]:
-        """使用LLM生成详细人设"""
+        """
+        使用LLM生成非常详细的人设
        
-        prompt = f"""Based on the following entity information, generate a detailed social media user profile for simulation purposes.
+        根据实体类型区分：
+        - 个人实体：生成具体的人物设定
+        - 群体/机构实体：生成代表性账号设定
+        """
+        
+        is_individual = self._is_individual_entity(entity_type)
+        
+        if is_individual:
+            prompt = self._build_individual_persona_prompt(
+                entity_name, entity_type, entity_summary, entity_attributes, context
+            )
+        else:
+            prompt = self._build_group_persona_prompt(
+                entity_name, entity_type, entity_summary, entity_attributes, context
+            )

-Entity Information:
- Name: {entity_name}
- Type: {entity_type}
- Summary: {entity_summary}
- Attributes: {json.dumps(entity_attributes, ensure_ascii=False)}
-
-Context:
-{context}
-
-Generate a JSON object with the following fields:
-{{
-    "bio": "A short bio (max 150 chars) suitable for social media",
-    "persona": "A detailed persona description (2-3 sentences) describing personality, interests, and behavior patterns",
-    "age": <integer between 18-65, or null if not applicable>,
-    "gender": "<male/female/other, or null if not applicable>",
-    "mbti": "<MBTI type like INTJ, ENFP, etc., or null>",
-    "country": "<country name, or null>",
-    "profession": "<profession/occupation, or null>",
-    "interested_topics": ["topic1", "topic2", ...]
-}}
-
-Important:
- The profile should be consistent with the entity type and context
- Make the persona feel realistic and suitable for social media simulation
- If the entity is an organization, institution, or non-person, adapt the profile accordingly (e.g., as an official account)
- Return ONLY the JSON object, no additional text"""
-
-        try:
-            # 使用重试机制调用LLM API
-            from ..utils.retry import RetryableAPIClient
-            
-            retry_client = RetryableAPIClient(max_retries=3, initial_delay=1.0)
-            
-            def call_llm():
-                return self.client.chat.completions.create(
+        # 尝试多次生成，直到成功或达到最大重试次数
+        max_attempts = 3
+        last_error = None
+        
+        for attempt in range(max_attempts):
+            try:
+                response = self.client.chat.completions.create(
                    model=self.model_name,
                    messages=[
-                        {"role": "system", "content": "You are a profile generator for social media simulation. Generate realistic user profiles based on entity information."},
+                        {"role": "system", "content": self._get_system_prompt(is_individual)},
                        {"role": "user", "content": prompt}
                    ],
                    response_format={"type": "json_object"},
-                    temperature=0.7
+                    temperature=0.7 - (attempt * 0.1)  # 每次重试降低温度
+                    # 不设置max_tokens，让LLM自由发挥
                )
+                
+                content = response.choices[0].message.content
+                
+                # 检查是否被截断（finish_reason不是'stop'）
+                finish_reason = response.choices[0].finish_reason
+                if finish_reason == 'length':
+                    logger.warning(f"LLM输出被截断 (attempt {attempt+1}), 尝试修复...")
+                    content = self._fix_truncated_json(content)
+                
+                # 尝试解析JSON
+                try:
+                    result = json.loads(content)
+                    
+                    # 验证必需字段
+                    if "bio" not in result or not result["bio"]:
+                        result["bio"] = entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}"
+                    if "persona" not in result or not result["persona"]:
+                        result["persona"] = entity_summary or f"{entity_name}是一个{entity_type}。"
+                    
+                    return result
+                    
+                except json.JSONDecodeError as je:
+                    logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(je)[:80]}")
+                    
+                    # 尝试修复JSON
+                    result = self._try_fix_json(content, entity_name, entity_type, entity_summary)
+                    if result.get("_fixed"):
+                        del result["_fixed"]
+                        return result
+                    
+                    last_error = je
+                    
+            except Exception as e:
+                logger.warning(f"LLM调用失败 (attempt {attempt+1}): {str(e)[:80]}")
+                last_error = e
+                import time
+                time.sleep(1 * (attempt + 1))  # 指数退避
+        
+        logger.warning(f"LLM生成人设失败（{max_attempts}次尝试）: {last_error}, 使用规则生成")
+        return self._generate_profile_rule_based(
+            entity_name, entity_type, entity_summary, entity_attributes
+        )
+    
+    def _fix_truncated_json(self, content: str) -> str:
+        """修复被截断的JSON（输出被max_tokens限制截断）"""
+        import re
+        
+        # 如果JSON被截断，尝试闭合它
+        content = content.strip()
+        
+        # 计算未闭合的括号
+        open_braces = content.count('{') - content.count('}')
+        open_brackets = content.count('[') - content.count(']')
+        
+        # 检查是否有未闭合的字符串
+        # 简单检查：如果最后一个引号后没有逗号或闭合括号，可能是字符串被截断
+        if content and content[-1] not in '",}]':
+            # 尝试闭合字符串
+            content += '"'
+        
+        # 闭合括号
+        content += ']' * open_brackets
+        content += '}' * open_braces
+        
+        return content
+    
+    def _try_fix_json(self, content: str, entity_name: str, entity_type: str, entity_summary: str = "") -> Dict[str, Any]:
+        """尝试修复损坏的JSON"""
+        import re
+        
+        # 1. 首先尝试修复被截断的情况
+        content = self._fix_truncated_json(content)
+        
+        # 2. 尝试提取JSON部分
+        json_match = re.search(r'\{[\s\S]*\}', content)
+        if json_match:
+            json_str = json_match.group()
            
-            response = retry_client.call_with_retry(call_llm)
-            result = json.loads(response.choices[0].message.content)
-            return result
+            # 3. 处理字符串中的换行符问题
+            # 找到所有字符串值并替换其中的换行符
+            def fix_string_newlines(match):
+                s = match.group(0)
+                # 替换字符串内的实际换行符为空格
+                s = s.replace('\n', ' ').replace('\r', ' ')
+                # 替换多余空格
+                s = re.sub(r'\s+', ' ', s)
+                return s
            
-        except Exception as e:
-            logger.warning(f"LLM生成人设失败（已重试）: {str(e)}, 使用规则生成")
-            return self._generate_profile_rule_based(
-                entity_name, entity_type, entity_summary, entity_attributes
-            )
+            # 匹配JSON字符串值
+            json_str = re.sub(r'"[^"\\]*(?:\\.[^"\\]*)*"', fix_string_newlines, json_str)
+            
+            # 4. 尝试解析
+            try:
+                result = json.loads(json_str)
+                result["_fixed"] = True
+                return result
+            except json.JSONDecodeError as e:
+                # 5. 如果还是失败，尝试更激进的修复
+                try:
+                    # 移除所有控制字符
+                    json_str = re.sub(r'[\x00-\x1f\x7f-\x9f]', ' ', json_str)
+                    # 替换所有连续空白
+                    json_str = re.sub(r'\s+', ' ', json_str)
+                    result = json.loads(json_str)
+                    result["_fixed"] = True
+                    return result
+                except:
+                    pass
+        
+        # 6. 尝试从内容中提取部分信息
+        bio_match = re.search(r'"bio"\s*:\s*"([^"]*)"', content)
+        persona_match = re.search(r'"persona"\s*:\s*"([^"]*)', content)  # 可能被截断
+        
+        bio = bio_match.group(1) if bio_match else (entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}")
+        persona = persona_match.group(1) if persona_match else (entity_summary or f"{entity_name}是一个{entity_type}。")
+        
+        # 如果提取到了有意义的内容，标记为已修复
+        if bio_match or persona_match:
+            logger.info(f"从损坏的JSON中提取了部分信息")
+            return {
+                "bio": bio,
+                "persona": persona,
+                "_fixed": True
+            }
+        
+        # 7. 完全失败，返回基础结构
+        logger.warning(f"JSON修复失败，返回基础结构")
+        return {
+            "bio": entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}",
+            "persona": entity_summary or f"{entity_name}是一个{entity_type}。"
+        }
+    
+    def _get_system_prompt(self, is_individual: bool) -> str:
+        """获取系统提示词"""
+        base_prompt = "你是社交媒体用户画像生成专家。生成详细、真实的人设用于舆论模拟,最大程度还原已有现实情况。必须返回有效的JSON格式，所有字符串值不能包含未转义的换行符。使用中文。"
+        return base_prompt
+    
+    def _build_individual_persona_prompt(
+        self,
+        entity_name: str,
+        entity_type: str,
+        entity_summary: str,
+        entity_attributes: Dict[str, Any],
+        context: str
+    ) -> str:
+        """构建个人实体的详细人设提示词"""
+        
+        attrs_str = json.dumps(entity_attributes, ensure_ascii=False) if entity_attributes else "无"
+        context_str = context[:3000] if context else "无额外上下文"
+        
+        return f"""为实体生成详细的社交媒体用户人设,最大程度还原已有现实情况。
+
+实体名称: {entity_name}
+实体类型: {entity_type}
+实体摘要: {entity_summary}
+实体属性: {attrs_str}
+
+上下文信息:
+{context_str}
+
+请生成JSON，包含以下字段:
+
+1. bio: 社交媒体简介，200字
+2. persona: 详细人设描述（2000字的纯文本），需包含:
+   - 基本信息（年龄、职业、教育背景、所在地）
+   - 人物背景（重要经历、与事件的关联、社会关系）
+   - 性格特征（MBTI类型、核心性格、情绪表达方式）
+   - 社交媒体行为（发帖频率、内容偏好、互动风格、语言特点）
+   - 立场观点（对话题的态度、可能被激怒/感动的内容）
+   - 独特特征（口头禅、特殊经历、个人爱好）
+   - 个人记忆（人设的重要部分，要介绍这个个体与事件的关联，以及这个个体在事件中的已有动作与反应）
+3. age: 年龄数字
+4. gender: 性别（男/女）
+5. mbti: MBTI类型
+6. country: 国家
+7. profession: 职业
+8. interested_topics: 感兴趣话题数组
+
+重要:
+- 所有字段值必须是字符串或数字，不要使用换行符
+- persona必须是一段连贯的文字描述
+- 使用中文
+- 内容要与实体信息保持一致"""
+
+    def _build_group_persona_prompt(
+        self,
+        entity_name: str,
+        entity_type: str,
+        entity_summary: str,
+        entity_attributes: Dict[str, Any],
+        context: str
+    ) -> str:
+        """构建群体/机构实体的详细人设提示词"""
+        
+        attrs_str = json.dumps(entity_attributes, ensure_ascii=False) if entity_attributes else "无"
+        context_str = context[:3000] if context else "无额外上下文"
+        
+        return f"""为机构/群体实体生成详细的社交媒体账号设定,最大程度还原已有现实情况。
+
+实体名称: {entity_name}
+实体类型: {entity_type}
+实体摘要: {entity_summary}
+实体属性: {attrs_str}
+
+上下文信息:
+{context_str}
+
+请生成JSON，包含以下字段:
+
+1. bio: 官方账号简介，200字，专业得体
+2. persona: 详细账号设定描述（2000字的纯文本），需包含:
+   - 机构基本信息（正式名称、机构性质、成立背景、主要职能）
+   - 账号定位（账号类型、目标受众、核心功能）
+   - 发言风格（语言特点、常用表达、禁忌话题）
+   - 发布内容特点（内容类型、发布频率、活跃时间段）
+   - 立场态度（对核心话题的官方立场、面对争议的处理方式）
+   - 特殊说明（代表的群体画像、运营习惯）
+   - 机构记忆（机构人设的重要部分，要介绍这个机构与事件的关联，以及这个机构在事件中的已有动作与反应）
+3. age: null（机构不适用）
+4. gender: null（机构不适用）
+5. mbti: 可选，用于描述账号风格，如ISTJ代表严谨保守
+6. country: 国家
+7. profession: 机构职能描述
+8. interested_topics: 关注领域数组
+
+重要:
+- 所有字段值必须是字符串、数字或null
+- persona必须是一段连贯的文字描述，不要使用换行符
+- 使用中文
+- 机构账号发言要符合其身份定位"""
    
    def _generate_profile_rule_based(
        self,
@@ -398,29 +809,46 @@ Important:
                "interested_topics": ["General", "Social Issues"],
            }
    
+    def set_graph_id(self, graph_id: str):
+        """设置图谱ID用于Zep检索"""
+        self.graph_id = graph_id
+    
    def generate_profiles_from_entities(
        self,
        entities: List[EntityNode],
        use_llm: bool = True,
-        progress_callback: Optional[callable] = None
+        progress_callback: Optional[callable] = None,
+        graph_id: Optional[str] = None,
+        parallel_count: int = 5
    ) -> List[OasisAgentProfile]:
        """
-        批量从实体生成Agent Profile
+        批量从实体生成Agent Profile（支持并行生成）
        
        Args:
            entities: 实体列表
            use_llm: 是否使用LLM生成详细人设
            progress_callback: 进度回调函数 (current, total, message)
+            graph_id: 图谱ID，用于Zep检索获取更丰富上下文
+            parallel_count: 并行生成数量，默认5
            
        Returns:
            Agent Profile列表
        """
-        profiles = []
-        total = len(entities)
+        import concurrent.futures
+        from threading import Lock
        
-        for idx, entity in enumerate(entities):
-            if progress_callback:
-                progress_callback(idx + 1, total, f"生成 {entity.name} 的人设...")
+        # 设置graph_id用于Zep检索
+        if graph_id:
+            self.graph_id = graph_id
+        
+        total = len(entities)
+        profiles = [None] * total  # 预分配列表保持顺序
+        completed_count = [0]  # 使用列表以便在闭包中修改
+        lock = Lock()
+        
+        def generate_single_profile(idx: int, entity: EntityNode) -> tuple:
+            """生成单个profile的工作函数"""
+            entity_type = entity.get_entity_type() or "Entity"
            
            try:
                profile = self.generate_profile_from_entity(
@@ -428,23 +856,115 @@ Important:
                    user_id=idx,
                    use_llm=use_llm
                )
-                profiles.append(profile)
+                
+                # 实时输出生成的人设到控制台和日志
+                self._print_generated_profile(entity.name, entity_type, profile)
+                
+                return idx, profile, None
                
            except Exception as e:
                logger.error(f"生成实体 {entity.name} 的人设失败: {str(e)}")
                # 创建一个基础profile
-                profiles.append(OasisAgentProfile(
+                fallback_profile = OasisAgentProfile(
                    user_id=idx,
                    user_name=self._generate_username(entity.name),
                    name=entity.name,
-                    bio=f"{entity.get_entity_type() or 'Entity'}: {entity.name}",
+                    bio=f"{entity_type}: {entity.name}",
                    persona=entity.summary or f"A participant in social discussions.",
                    source_entity_uuid=entity.uuid,
-                    source_entity_type=entity.get_entity_type(),
-                ))
+                    source_entity_type=entity_type,
+                )
+                return idx, fallback_profile, str(e)
+        
+        logger.info(f"开始并行生成 {total} 个Agent人设（并行数: {parallel_count}）...")
+        print(f"\n{'='*60}")
+        print(f"开始生成Agent人设 - 共 {total} 个实体，并行数: {parallel_count}")
+        print(f"{'='*60}\n")
+        
+        # 使用线程池并行执行
+        with concurrent.futures.ThreadPoolExecutor(max_workers=parallel_count) as executor:
+            # 提交所有任务
+            future_to_entity = {
+                executor.submit(generate_single_profile, idx, entity): (idx, entity)
+                for idx, entity in enumerate(entities)
+            }
+            
+            # 收集结果
+            for future in concurrent.futures.as_completed(future_to_entity):
+                idx, entity = future_to_entity[future]
+                entity_type = entity.get_entity_type() or "Entity"
+                
+                try:
+                    result_idx, profile, error = future.result()
+                    profiles[result_idx] = profile
+                    
+                    with lock:
+                        completed_count[0] += 1
+                        current = completed_count[0]
+                    
+                    if progress_callback:
+                        progress_callback(
+                            current, 
+                            total, 
+                            f"已完成 {current}/{total}: {entity.name}（{entity_type}）"
+                        )
+                    
+                    if error:
+                        logger.warning(f"[{current}/{total}] {entity.name} 使用备用人设: {error}")
+                    else:
+                        logger.info(f"[{current}/{total}] 成功生成人设: {entity.name} ({entity_type})")
+                        
+                except Exception as e:
+                    logger.error(f"处理实体 {entity.name} 时发生异常: {str(e)}")
+                    with lock:
+                        completed_count[0] += 1
+                    profiles[idx] = OasisAgentProfile(
+                        user_id=idx,
+                        user_name=self._generate_username(entity.name),
+                        name=entity.name,
+                        bio=f"{entity_type}: {entity.name}",
+                        persona=entity.summary or "A participant in social discussions.",
+                        source_entity_uuid=entity.uuid,
+                        source_entity_type=entity_type,
+                    )
+        
+        print(f"\n{'='*60}")
+        print(f"人设生成完成！共生成 {len([p for p in profiles if p])} 个Agent")
+        print(f"{'='*60}\n")
        
        return profiles
    
+    def _print_generated_profile(self, entity_name: str, entity_type: str, profile: OasisAgentProfile):
+        """实时输出生成的人设到控制台（完整内容，不截断）"""
+        separator = "-" * 70
+        
+        # 构建完整输出内容（不截断）
+        topics_str = ', '.join(profile.interested_topics) if profile.interested_topics else '无'
+        
+        output_lines = [
+            f"\n{separator}",
+            f"[已生成] {entity_name} ({entity_type})",
+            f"{separator}",
+            f"用户名: {profile.user_name}",
+            f"",
+            f"【简介】",
+            f"{profile.bio}",
+            f"",
+            f"【详细人设】",
+            f"{profile.persona}",
+            f"",
+            f"【基本属性】",
+            f"年龄: {profile.age} | 性别: {profile.gender} | MBTI: {profile.mbti}",
+            f"职业: {profile.profession} | 国家: {profile.country}",
+            f"兴趣话题: {topics_str}",
+            separator
+        ]
+        
+        output = "\n".join(output_lines)
+        
+        # 只输出到控制台（避免重复，logger不再输出完整内容）
+        print(output)
+    
    def save_profiles(
        self,
        profiles: List[OasisAgentProfile],
@@ -470,10 +990,18 @@ Important:
    
    def _save_twitter_csv(self, profiles: List[OasisAgentProfile], file_path: str):
        """
-        保存Twitter Profile为CSV格式
+        保存Twitter Profile为CSV格式（符合OASIS官方要求）
        
        OASIS Twitter要求的CSV字段：
-        user_id, user_name, name, bio, friend_count, follower_count, statuses_count, created_at
+        - user_id: 用户ID（根据CSV顺序从0开始）
+        - name: 用户真实姓名
+        - username: 系统中的用户名
+        - user_char: 详细人设描述（注入到LLM系统提示中，指导Agent行为）
+        - description: 简短的公开简介（显示在用户资料页面）
+        
+        user_char vs description 区别：
+        - user_char: 内部使用，LLM系统提示，决定Agent如何思考和行动
+        - description: 外部显示，其他用户可见的简介
        """
        import csv
        
@@ -484,28 +1012,32 @@ Important:
        with open(file_path, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            
-            # 写入表头
-            headers = ['user_id', 'user_name', 'name', 'bio', 'friend_count', 
-                      'follower_count', 'statuses_count', 'created_at']
+            # 写入OASIS要求的表头
+            headers = ['user_id', 'name', 'username', 'user_char', 'description']
            writer.writerow(headers)
            
            # 写入数据行
-            for profile in profiles:
-                # bio需要处理换行符和逗号
-                bio = profile.bio.replace('\n', ' ').replace('\r', ' ')
+            for idx, profile in enumerate(profiles):
+                # user_char: 完整人设（bio + persona），用于LLM系统提示
+                user_char = profile.bio
+                if profile.persona and profile.persona != profile.bio:
+                    user_char = f"{profile.bio} {profile.persona}"
+                # 处理换行符（CSV中用空格替代）
+                user_char = user_char.replace('\n', ' ').replace('\r', ' ')
+                
+                # description: 简短简介，用于外部显示
+                description = profile.bio.replace('\n', ' ').replace('\r', ' ')
+                
                row = [
-                    profile.user_id,
-                    profile.user_name,
-                    profile.name,
-                    bio,
-                    profile.friend_count,
-                    profile.follower_count,
-                    profile.statuses_count,
-                    profile.created_at
+                    idx,                    # user_id: 从0开始的顺序ID
+                    profile.name,           # name: 真实姓名
+                    profile.user_name,      # username: 用户名
+                    user_char,              # user_char: 完整人设（内部LLM使用）
+                    description             # description: 简短简介（外部显示）
                ]
                writer.writerow(row)
        
-        logger.info(f"已保存 {len(profiles)} 个Twitter Profile到 {file_path} (CSV格式)")
+        logger.info(f"已保存 {len(profiles)} 个Twitter Profile到 {file_path} (OASIS CSV格式)")
    
    def _save_reddit_json(self, profiles: List[OasisAgentProfile], file_path: str):
        """