From afc7afa2f502dbd36723dfe3f7f70b4488e92325 Mon Sep 17 00:00:00 2001 From: Kunthawat Greethong Date: Mon, 22 Jun 2026 10:57:57 +0700 Subject: [PATCH] fix: 3 fixes for Step 2, Step 4, Step 5 1. Step 5: Use empty model_config_dict={} so camel-ai doesn't spread api_key into create() - AsyncOpenAI reads env vars automatically. Step 3 unaffected (same env vars, just cleaner ModelFactory call). 2. Step 2: Fix Thai text truncation - isalnum() stripped Thai chars. Use re.sub(r'[^\w]', '', username, re.UNICODE) instead. 3. Step 4: Move get_language_instruction() to START of prompt (not end) and strengthen wording with MUST/IMPORTANT prefix. --- .../app/services/oasis_profile_generator.py | 10 +- backend/app/services/report_agent.py | 521 +++++++++--------- backend/scripts/run_parallel_simulation.py | 11 +- backend/scripts/run_reddit_simulation.py | 11 +- backend/scripts/run_twitter_simulation.py | 11 +- locales/languages.json | 8 +- 6 files changed, 279 insertions(+), 293 deletions(-) diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py index 3009e10..cac49f7 100644 --- a/backend/app/services/oasis_profile_generator.py +++ b/backend/app/services/oasis_profile_generator.py @@ -274,12 +274,14 @@ class OasisProfileGenerator: ) def _generate_username(self, name: str) -> str: - """生成用户名""" - # 移除特殊字符,转换为小写 + """Generate a username from the entity name""" + # Keep Unicode characters (Thai, Chinese, etc) and convert to lowercase username = name.lower().replace(" ", "_") - username = ''.join(c for c in username if c.isalnum() or c == '_') + # Only remove truly problematic characters for usernames + import re + username = re.sub(r'[^\w]', '', username, flags=re.UNICODE) - # 添加随机后缀避免重复 + # Add random suffix to avoid duplicates suffix = random.randint(100, 999) return f"{username}_{suffix}" diff --git a/backend/app/services/report_agent.py b/backend/app/services/report_agent.py index cecd70b..9819f5c 100644 --- a/backend/app/services/report_agent.py +++ b/backend/app/services/report_agent.py @@ -474,387 +474,386 @@ class Report: # ── 工具描述 ── TOOL_DESC_INSIGHT_FORGE = """\ -【深度洞察检索 - 强大的检索工具】 -这是我们强大的检索函数,专为深度分析设计。它会: -1. 自动将你的问题分解为多个子问题 -2. 从多个维度检索模拟图谱中的信息 -3. 整合语义搜索、实体分析、关系链追踪的结果 -4. 返回最全面、最深度的检索内容 +[Deep Insight Retrieval - Powerful Analysis Tool] +This is our powerful retrieval function designed for deep analysis. It will: +1. Automatically decompose your question into multiple sub-questions +2. Retrieve information from multiple dimensions in the simulation graph +3. Integrate results from semantic search, entity analysis, and relationship chain tracking +4. Return the most comprehensive and in-depth retrieval content -【使用场景】 -- 需要深入分析某个话题 -- 需要了解事件的多个方面 -- 需要获取支撑报告章节的丰富素材 +[Use Cases] +- Deep analysis of a specific topic +- Understanding multiple aspects of an event +- Gathering rich material to support report sections -【返回内容】 -- 相关事实原文(可直接引用) -- 核心实体洞察 -- 关系链分析""" +[Returned Content] +- Relevant original facts (can be directly quoted) +- Core entity insights +- Relationship chain analysis""" TOOL_DESC_PANORAMA_SEARCH = """\ -【广度搜索 - 获取全貌视图】 -这个工具用于获取模拟结果的完整全貌,特别适合了解事件演变过程。它会: -1. 获取所有相关节点和关系 -2. 区分当前有效的事实和历史/过期的事实 -3. 帮助你了解舆情是如何演变的 +[Panorama Search - Get the Full Picture] +This tool retrieves the complete picture of simulation results, especially useful for understanding event evolution. It will: +1. Retrieve all related nodes and relationships +2. Distinguish between currently valid facts and historical/expired facts +3. Help you understand how public sentiment evolved -【使用场景】 -- 需要了解事件的完整发展脉络 -- 需要对比不同阶段的舆情变化 -- 需要获取全面的实体和关系信息 +[Use Cases] +- Understanding the full development timeline of an event +- Comparing sentiment changes across different stages +- Getting comprehensive entity and relationship information -【返回内容】 -- 当前有效事实(模拟最新结果) -- 历史/过期事实(演变记录) -- 所有涉及的实体""" +[Returned Content] +- Currently valid facts (latest simulation results) +- Historical/expired facts (evolution records) +- All involved entities""" TOOL_DESC_QUICK_SEARCH = """\ -【简单搜索 - 快速检索】 -轻量级的快速检索工具,适合简单、直接的信息查询。 +[Quick Search - Fast Retrieval] +A lightweight fast retrieval tool suitable for simple, direct information queries. -【使用场景】 -- 需要快速查找某个具体信息 -- 需要验证某个事实 -- 简单的信息检索 +[Use Cases] +- Quickly finding specific information +- Verifying a fact +- Simple information retrieval -【返回内容】 -- 与查询最相关的事实列表""" +[Returned Content] +- List of facts most relevant to the query""" TOOL_DESC_INTERVIEW_AGENTS = """\ -【深度采访 - 真实Agent采访(双平台)】 -调用OASIS模拟环境的采访API,对正在运行的模拟Agent进行真实采访! -这不是LLM模拟,而是调用真实的采访接口获取模拟Agent的原始回答。 -默认在Twitter和Reddit两个平台同时采访,获取更全面的观点。 +[Deep Interview - Real Agent Interview (Dual Platform)] +Calls the OASIS simulation environment's interview API to conduct real interviews with running simulation agents! +This is NOT LLM simulation - it calls the real interview API to get original responses from simulation agents. +By default, interviews are conducted simultaneously on both Twitter and Reddit platforms for more comprehensive perspectives. -功能流程: -1. 自动读取人设文件,了解所有模拟Agent -2. 智能选择与采访主题最相关的Agent(如学生、媒体、官方等) -3. 自动生成采访问题 -4. 调用 /api/simulation/interview/batch 接口在双平台进行真实采访 -5. 整合所有采访结果,提供多视角分析 +Workflow: +1. Automatically reads persona files to understand all simulation agents +2. Intelligently selects agents most relevant to the interview topic (e.g., students, media, officials) +3. Automatically generates interview questions +4. Calls the /api/simulation/interview/batch endpoint for real interviews on both platforms +5. Consolidates all interview results and provides multi-perspective analysis -【使用场景】 -- 需要从不同角色视角了解事件看法(学生怎么看?媒体怎么看?官方怎么说?) -- 需要收集多方意见和立场 -- 需要获取模拟Agent的真实回答(来自OASIS模拟环境) -- 想让报告更生动,包含"采访实录" +[Use Cases] +- Understanding event perspectives from different role viewpoints (What do students think? What about media? What do officials say?) +- Collecting opinions and positions from multiple parties +- Getting real responses from simulation agents (from OASIS simulation environment) +- Making reports more vivid with "interview transcripts" -【返回内容】 -- 被采访Agent的身份信息 -- 各Agent在Twitter和Reddit两个平台的采访回答 -- 关键引言(可直接引用) -- 采访摘要和观点对比 +[Returned Content] +- Identity information of interviewed agents +- Interview responses from each agent on both Twitter and Reddit platforms +- Key quotes (can be directly quoted) +- Interview summary and perspective comparison -【重要】需要OASIS模拟环境正在运行才能使用此功能!""" +[Important] Requires the OASIS simulation environment to be running!""" # ── 大纲规划 prompt ── PLAN_SYSTEM_PROMPT = """\ -你是一个「未来预测报告」的撰写专家,拥有对模拟世界的「上帝视角」——你可以洞察模拟中每一位Agent的行为、言论和互动。 +You are a "Future Prediction Report" writing expert with a "God's-eye view" of the simulation world — you can observe the behavior, speech, and interactions of every Agent in the simulation. -【核心理念】 -我们构建了一个模拟世界,并向其中注入了特定的「模拟需求」作为变量。模拟世界的演化结果,就是对未来可能发生情况的预测。你正在观察的不是"实验数据",而是"未来的预演"。 +[Core Philosophy] +We have built a simulation world and injected a specific "simulation requirement" as a variable. The evolution of the simulation world represents a prediction of what may happen in the future. What you are observing is not "experimental data" but a "rehearsal of the future." -【你的任务】 -撰写一份「未来预测报告」,回答: -1. 在我们设定的条件下,未来发生了什么? -2. 各类Agent(人群)是如何反应和行动? -3. 这个模拟揭示了哪些值得关注的未来趋势和风险? +[Your Task] +Write a "Future Prediction Report" that answers: +1. Under the conditions we set, what happened in the future? +2. How did various groups of Agents (people) react and act? +3. What noteworthy future trends and risks does this simulation reveal? -【报告定位】 -- ✅ 这是一份基于模拟的未来预测报告,揭示"如果这样,未来会怎样" -- ✅ 聚焦于预测结果:事件走向、群体反应、涌现现象、潜在风险 -- ✅ 模拟世界中的Agent言行就是对未来人群行为的预测 -- ❌ 不是对现实世界现状的分析 -- ❌ 不是泛泛而谈的舆情综述 +[Report Positioning] +- ✅ This is a simulation-based future prediction report, revealing "if this, then what happens in the future" +- ✅ Focus on prediction outcomes: event trajectory, group reactions, emergent phenomena, potential risks +- ✅ Agent behavior and speech in the simulation world represent predictions of future crowd behavior +- ❌ NOT an analysis of the current state of the real world +- ❌ NOT a generic public sentiment overview -【章节数量限制】 -- 最少2个章节,最多5个章节 -- 不需要子章节,每个章节直接撰写完整内容 -- 内容要精炼,聚焦于核心预测发现 -- 章节结构由你根据预测结果自主设计 +[Chapter Count Limits] +- Minimum 2 chapters, maximum 5 chapters +- No sub-chapters needed; each chapter should directly write complete content +- Content should be concise, focused on core prediction findings +- Chapter structure should be designed by you based on prediction results -请输出JSON格式的报告大纲,格式如下: +Please output the report outline in JSON format as follows: { - "title": "报告标题", - "summary": "报告摘要(一句话概括核心预测发现)", + "title": "Report Title", + "summary": "Report Summary (one sentence summarizing core prediction findings)", "sections": [ { - "title": "章节标题", - "description": "章节内容描述" + "title": "Section Title", + "description": "Section Content Description" } ] } -注意:sections数组最少2个,最多5个元素!""" +Note: The sections array must have a minimum of 2 and maximum of 5 elements!""" PLAN_USER_PROMPT_TEMPLATE = """\ -【预测场景设定】 -我们向模拟世界注入的变量(模拟需求):{simulation_requirement} +[Prediction Scenario Setup] +The variable (simulation requirement) we injected into the simulation world: {simulation_requirement} -【模拟世界规模】 -- 参与模拟的实体数量: {total_nodes} -- 实体间产生的关系数量: {total_edges} -- 实体类型分布: {entity_types} -- 活跃Agent数量: {total_entities} +[Simulation World Scale] +- Number of entities participating in simulation: {total_nodes} +- Number of relationships generated between entities: {total_edges} +- Entity type distribution: {entity_types} +- Number of active Agents: {total_entities} -【模拟预测到的部分未来事实样本】 +[Sample of Predicted Future Facts from the Simulation] {related_facts_json} -请以「上帝视角」审视这个未来预演: -1. 在我们设定的条件下,未来呈现出了什么样的状态? -2. 各类人群(Agent)是如何反应和行动的? -3. 这个模拟揭示了哪些值得关注的未来趋势? +Please examine this future rehearsal from a "God's-eye view": +1. Under the conditions we set, what state has the future taken on? +2. How are various groups of people (Agents) reacting and acting? +3. What noteworthy future trends does this simulation reveal? -根据预测结果,设计最合适的报告章节结构。 +Design the most appropriate report section structure based on the prediction results. -【再次提醒】报告章节数量:最少2个,最多5个,内容要精炼聚焦于核心预测发现。""" +[Reminder] Report chapter count: minimum 2, maximum 5. Content should be concise and focused on core prediction findings.""" # ── 章节生成 prompt ── SECTION_SYSTEM_PROMPT_TEMPLATE = """\ -你是一个「未来预测报告」的撰写专家,正在撰写报告的一个章节。 +You are a "Future Prediction Report" writing expert, currently writing a section of the report. -报告标题: {report_title} -报告摘要: {report_summary} -预测场景(模拟需求): {simulation_requirement} +Report Title: {report_title} +Report Summary: {report_summary} +Prediction Scenario (Simulation Requirement): {simulation_requirement} -当前要撰写的章节: {section_title} +Section currently being written: {section_title} ═══════════════════════════════════════════════════════════════ -【核心理念】 +[Core Philosophy] ═══════════════════════════════════════════════════════════════ -模拟世界是对未来的预演。我们向模拟世界注入了特定条件(模拟需求), -模拟中Agent的行为和互动,就是对未来人群行为的预测。 +The simulation world is a rehearsal of the future. We injected specific conditions (simulation requirement) into the simulation world. The behavior and interactions of Agents in the simulation represent predictions of future crowd behavior. -你的任务是: -- 揭示在设定条件下,未来发生了什么 -- 预测各类人群(Agent)是如何反应和行动的 -- 发现值得关注的未来趋势、风险和机会 +Your task is to: +- Reveal what happened in the future under the set conditions +- Predict how various groups of people (Agents) react and act +- Discover noteworthy future trends, risks, and opportunities -❌ 不要写成对现实世界现状的分析 -✅ 要聚焦于"未来会怎样"——模拟结果就是预测的未来 +❌ Do NOT write this as an analysis of the current state of the real world +✅ Focus on "what will happen in the future" — the simulation results ARE the predicted future ═══════════════════════════════════════════════════════════════ -【最重要的规则 - 必须遵守】 +[Most Important Rules - Must Be Followed] ═══════════════════════════════════════════════════════════════ -1. 【必须调用工具观察模拟世界】 - - 你正在以「上帝视角」观察未来的预演 - - 所有内容必须来自模拟世界中发生的事件和Agent言行 - - 禁止使用你自己的知识来编写报告内容 - - 每个章节至少调用3次工具(最多5次)来观察模拟的世界,它代表了未来 +1. [Must Call Tools to Observe the Simulation World] + - You are observing the future rehearsal from a "God's-eye view" + - All content must come from events and Agent behavior occurring in the simulation world + - Do NOT use your own knowledge to write report content + - Each section must call tools at least 3 times (maximum 5) to observe the simulated world, which represents the future -2. 【必须引用Agent的原始言行】 - - Agent的发言和行为是对未来人群行为的预测 - - 在报告中使用引用格式展示这些预测,例如: - > "某类人群会表示:原文内容..." - - 这些引用是模拟预测的核心证据 +2. [Must Quote Agent's Original Words and Actions] + - Agent speech and behavior are predictions of future crowd behavior + - Display these predictions using quote format in the report, for example: + > "A certain group of people would say: original content..." + - These quotes are core evidence for simulation predictions -3. 【语言一致性 - 引用内容必须翻译为报告语言】 - - 工具返回的内容可能包含与报告语言不同的表述 - - 报告必须全部使用与用户指定语言一致的语言撰写 - - 当你引用工具返回的其他语言内容时,必须将其翻译为报告语言后再写入 - - 翻译时保持原意不变,确保表述自然通顺 - - 这一规则同时适用于正文和引用块(> 格式)中的内容 +3. [Language Consistency - Quoted Content Must Be Translated to Report Language] + - Content returned by tools may contain expressions in a different language than the report + - The entire report must be written in the language specified by the user + - When quoting content returned by tools in other languages, you must translate it into the report language before including it + - Maintain the original meaning during translation and ensure natural expression + - This rule applies to both body text and quote blocks (> format) -4. 【忠实呈现预测结果】 - - 报告内容必须反映模拟世界中的代表未来的模拟结果 - - 不要添加模拟中不存在的信息 - - 如果某方面信息不足,如实说明 +4. [Faithfully Present Prediction Results] + - Report content must reflect the simulation results representing the future in the simulation world + - Do not add information that does not exist in the simulation + - If information on a certain aspect is insufficient, state so honestly ═══════════════════════════════════════════════════════════════ -【⚠️ 格式规范 - 极其重要!】 +[⚠️ Format Requirements - Extremely Important!] ═══════════════════════════════════════════════════════════════ -【一个章节 = 最小内容单位】 -- 每个章节是报告的最小分块单位 -- ❌ 禁止在章节内使用任何 Markdown 标题(#、##、###、#### 等) -- ❌ 禁止在内容开头添加章节主标题 -- ✅ 章节标题由系统自动添加,你只需撰写纯正文内容 -- ✅ 使用**粗体**、段落分隔、引用、列表来组织内容,但不要用标题 +[One Section = Minimum Content Unit] +- Each section is the minimum chunk unit of the report +- ❌ Do NOT use any Markdown headings (#, ##, ###, #### etc.) within a section +- ❌ Do NOT add the section main title at the beginning of the content +- ✅ Section titles are automatically added by the system; you only need to write plain body content +- ✅ Use **bold text**, paragraph breaks, quotes, and lists to organize content, but do NOT use headings -【正确示例】 +[Correct Example] ``` -本章节分析了事件的舆论传播态势。通过对模拟数据的深入分析,我们发现... +This section analyzes the public sentiment propagation dynamics of the event. Through in-depth analysis of simulation data, we found... -**首发引爆阶段** +**Initial Ignition Phase** -微博作为舆情的第一现场,承担了信息首发的核心功能: +Platform A served as the first site of public sentiment, carrying the core function of information release: -> "微博贡献了68%的首发声量..." +> "Platform A contributed 68% of the initial volume..." -**情绪放大阶段** +**Emotion Amplification Phase** -抖音平台进一步放大了事件影响力: +Platform B further amplified the event's impact: -- 视觉冲击力强 -- 情绪共鸣度高 +- Strong visual impact +- High emotional resonance ``` -【错误示例】 +[Incorrect Example] ``` -## 执行摘要 ← 错误!不要添加任何标题 -### 一、首发阶段 ← 错误!不要用###分小节 -#### 1.1 详细分析 ← 错误!不要用####细分 +## Executive Summary ← WRONG! Do not add any headings +### Part 1: Initial Phase ← WRONG! Do not use ### for sub-sections +#### 1.1 Detailed Analysis ← WRONG! Do not use #### for sub-divisions -本章节分析了... +This section analyzes... ``` ═══════════════════════════════════════════════════════════════ -【可用检索工具】(每章节调用3-5次) +[Available Retrieval Tools] (Call 3-5 times per section) ═══════════════════════════════════════════════════════════════ {tools_description} -【工具使用建议 - 请混合使用不同工具,不要只用一种】 -- insight_forge: 深度洞察分析,自动分解问题并多维度检索事实和关系 -- panorama_search: 广角全景搜索,了解事件全貌、时间线和演变过程 -- quick_search: 快速验证某个具体信息点 -- interview_agents: 采访模拟Agent,获取不同角色的第一人称观点和真实反应 +[Tool Usage Tips - Please mix different tools, do not use only one] +- insight_forge: Deep insight analysis, automatically decomposes questions and retrieves facts and relationships from multiple dimensions +- panorama_search: Panoramic search to understand event overview, timeline, and evolution process +- quick_search: Quickly verify a specific piece of information +- interview_agents: Interview simulation agents to get first-person perspectives and real reactions from different roles ═══════════════════════════════════════════════════════════════ -【工作流程】 +[Workflow] ═══════════════════════════════════════════════════════════════ -每次回复你只能做以下两件事之一(不可同时做): +Each response you can only do ONE of the following two things (NOT both): -选项A - 调用工具: -输出你的思考,然后用以下格式调用一个工具: +Option A - Call a tool: +Output your thoughts, then call a tool using the following format: -{{"name": "工具名称", "parameters": {{"参数名": "参数值"}}}} +{{"name": "tool_name", "parameters": {{"param_name": "param_value"}}}} -系统会执行工具并把结果返回给你。你不需要也不能自己编写工具返回结果。 +The system will execute the tool and return the result to you. You do not need to and cannot write tool results yourself. -选项B - 输出最终内容: -当你已通过工具获取了足够信息,以 "Final Answer:" 开头输出章节内容。 +Option B - Output final content: +When you have gathered enough information through tools, output the section content starting with "Final Answer:". -⚠️ 严格禁止: -- 禁止在一次回复中同时包含工具调用和 Final Answer -- 禁止自己编造工具返回结果(Observation),所有工具结果由系统注入 -- 每次回复最多调用一个工具 +⚠️ Strictly prohibited: +- Do NOT include both a tool call and Final Answer in the same response +- Do NOT fabricate tool results (Observation); all tool results are injected by the system +- Each response can call at most one tool ═══════════════════════════════════════════════════════════════ -【章节内容要求】 +[Section Content Requirements] ═══════════════════════════════════════════════════════════════ -1. 内容必须基于工具检索到的模拟数据 -2. 大量引用原文来展示模拟效果 -3. 使用Markdown格式(但禁止使用标题): - - 使用 **粗体文字** 标记重点(代替子标题) - - 使用列表(-或1.2.3.)组织要点 - - 使用空行分隔不同段落 - - ❌ 禁止使用 #、##、###、#### 等任何标题语法 -4. 【引用格式规范 - 必须单独成段】 - 引用必须独立成段,前后各有一个空行,不能混在段落中: +1. Content must be based on simulation data retrieved by tools +2. Extensively quote original text to demonstrate simulation results +3. Use Markdown format (but headings are prohibited): + - Use **bold text** to highlight key points (replacing sub-headings) + - Use lists (- or 1.2.3.) to organize key points + - Use blank lines to separate paragraphs + - ❌ Do NOT use #, ##, ###, #### or any heading syntax +4. [Quote Format Requirements - Must Be Standalone Paragraphs] + Quotes must be standalone paragraphs with blank lines before and after, not mixed into regular paragraphs: - ✅ 正确格式: + ✅ Correct format: ``` - 校方的回应被认为缺乏实质内容。 + The institution's response was considered lacking in substance. - > "校方的应对模式在瞬息万变的社交媒体环境中显得僵化和迟缓。" + > "The institution's response mode appears rigid and slow in the rapidly changing social media environment." - 这一评价反映了公众的普遍不满。 + This assessment reflects widespread public dissatisfaction. ``` - ❌ 错误格式: + ❌ Incorrect format: ``` - 校方的回应被认为缺乏实质内容。> "校方的应对模式..." 这一评价反映了... + The institution's response was considered lacking in substance.> "The institution's response mode..." This assessment reflects... ``` -5. 保持与其他章节的逻辑连贯性 -6. 【避免重复】仔细阅读下方已完成的章节内容,不要重复描述相同的信息 -7. 【再次强调】不要添加任何标题!用**粗体**代替小节标题""" +5. Maintain logical coherence with other sections +6. [Avoid Repetition] Carefully read the content of completed sections below and do not repeat the same information +7. [Emphasize Again] Do NOT add any headings! Use **bold text** instead of sub-section headings""" SECTION_USER_PROMPT_TEMPLATE = """\ -已完成的章节内容(请仔细阅读,避免重复): +Completed section content (please read carefully to avoid repetition): {previous_content} ═══════════════════════════════════════════════════════════════ -【当前任务】撰写章节: {section_title} +[Current Task] Write Section: {section_title} ═══════════════════════════════════════════════════════════════ -【重要提醒】 -1. 仔细阅读上方已完成的章节,避免重复相同的内容! -2. 开始前必须先调用工具获取模拟数据 -3. 请混合使用不同工具,不要只用一种 -4. 报告内容必须来自检索结果,不要使用自己的知识 +[Important Reminders] +1. Carefully read the completed sections above to avoid repeating the same content! +2. You must call tools to retrieve simulation data before starting +3. Please mix different tools, do not use only one +4. Report content must come from retrieval results, do not use your own knowledge -【⚠️ 格式警告 - 必须遵守】 -- ❌ 不要写任何标题(#、##、###、####都不行) -- ❌ 不要写"{section_title}"作为开头 -- ✅ 章节标题由系统自动添加 -- ✅ 直接写正文,用**粗体**代替小节标题 +[⚠️ Format Warning - Must Be Followed] +- ❌ Do not write any headings (#, ##, ###, #### are all prohibited) +- ❌ Do not write "{section_title}" as the beginning +- ✅ Section titles are automatically added by the system +- ✅ Write body text directly, use **bold text** instead of sub-section headings -请开始: -1. 首先思考(Thought)这个章节需要什么信息 -2. 然后调用工具(Action)获取模拟数据 -3. 收集足够信息后输出 Final Answer(纯正文,无任何标题)""" +Please begin: +1. First think (Thought) about what information this section needs +2. Then call a tool (Action) to retrieve simulation data +3. After gathering enough information, output Final Answer (plain body text, no headings)""" # ── ReACT 循环内消息模板 ── REACT_OBSERVATION_TEMPLATE = """\ -Observation(检索结果): +Observation (Retrieval Result): -═══ 工具 {tool_name} 返回 ═══ +═══ Tool {tool_name} Returned ═══ {result} ═══════════════════════════════════════════════════════════════ -已调用工具 {tool_calls_count}/{max_tool_calls} 次(已用: {used_tools_str}){unused_hint} -- 如果信息充分:以 "Final Answer:" 开头输出章节内容(必须引用上述原文) -- 如果需要更多信息:调用一个工具继续检索 +Called tools {tool_calls_count}/{max_tool_calls} times (used: {used_tools_str}){unused_hint} +- If information is sufficient: output section content starting with "Final Answer:" (must quote the above original text) +- If more information is needed: call a tool to continue retrieving ═══════════════════════════════════════════════════════════════""" REACT_INSUFFICIENT_TOOLS_MSG = ( - "【注意】你只调用了{tool_calls_count}次工具,至少需要{min_tool_calls}次。" - "请再调用工具获取更多模拟数据,然后再输出 Final Answer。{unused_hint}" + "[Note] You have only called tools {tool_calls_count} time(s), at least {min_tool_calls} required. " + "Please call tools to get more simulation data before outputting Final Answer. {unused_hint}" ) REACT_INSUFFICIENT_TOOLS_MSG_ALT = ( - "当前只调用了 {tool_calls_count} 次工具,至少需要 {min_tool_calls} 次。" - "请调用工具获取模拟数据。{unused_hint}" + "Currently only called tools {tool_calls_count} time(s), at least {min_tool_calls} required. " + "Please call a tool to retrieve simulation data. {unused_hint}" ) REACT_TOOL_LIMIT_MSG = ( - "工具调用次数已达上限({tool_calls_count}/{max_tool_calls}),不能再调用工具。" - '请立即基于已获取的信息,以 "Final Answer:" 开头输出章节内容。' + "Tool call limit reached ({tool_calls_count}/{max_tool_calls}), no more tools can be called. " + 'Please immediately output section content starting with "Final Answer:" based on the information already gathered.' ) -REACT_UNUSED_TOOLS_HINT = "\n💡 你还没有使用过: {unused_list},建议尝试不同工具获取多角度信息" +REACT_UNUSED_TOOLS_HINT = "\n💡 You haven't used: {unused_list}, try different tools for multi-angle information" -REACT_FORCE_FINAL_MSG = "已达到工具调用限制,请直接输出 Final Answer: 并生成章节内容。" +REACT_FORCE_FINAL_MSG = "Tool call limit reached. Please directly output Final Answer: and generate section content." # ── Chat prompt ── CHAT_SYSTEM_PROMPT_TEMPLATE = """\ -你是一个简洁高效的模拟预测助手。 +You are a concise and efficient simulation prediction assistant. -【背景】 -预测条件: {simulation_requirement} +[Background] +Prediction Condition: {simulation_requirement} -【已生成的分析报告】 +[Generated Analysis Report] {report_content} -【规则】 -1. 优先基于上述报告内容回答问题 -2. 直接回答问题,避免冗长的思考论述 -3. 仅在报告内容不足以回答时,才调用工具检索更多数据 -4. 回答要简洁、清晰、有条理 +[Rules] +1. Prioritize answering questions based on the report content above +2. Answer directly, avoid lengthy reasoning +3. Only call tools to retrieve more data when the report content is insufficient to answer +4. Answers should be concise, clear, and well-organized -【可用工具】(仅在需要时使用,最多调用1-2次) +[Available Tools] (use only when needed, maximum 1-2 calls) {tools_description} -【工具调用格式】 +[Tool Call Format] -{{"name": "工具名称", "parameters": {{"参数名": "参数值"}}}} +{{"name": "tool_name", "parameters": {{"param_name": "param_value"}}}} -【回答风格】 -- 简洁直接,不要长篇大论 -- 使用 > 格式引用关键内容 -- 优先给出结论,再解释原因""" +[Response Style] +- Concise and direct, avoid lengthy explanations +- Use > format to quote key content +- Lead with conclusions, then explain reasoning""" -CHAT_OBSERVATION_SUFFIX = "\n\n请简洁回答问题。" +CHAT_OBSERVATION_SUFFIX = "\n\nPlease answer the question concisely." # ═══════════════════════════════════════════════════════════════ @@ -923,32 +922,32 @@ class ReportAgent: "name": "insight_forge", "description": TOOL_DESC_INSIGHT_FORGE, "parameters": { - "query": "你想深入分析的问题或话题", - "report_context": "当前报告章节的上下文(可选,有助于生成更精准的子问题)" + "query": "The question or topic you want to analyze in depth", + "report_context": "Current report section context (optional, helps generate more precise sub-questions)" } }, "panorama_search": { "name": "panorama_search", "description": TOOL_DESC_PANORAMA_SEARCH, "parameters": { - "query": "搜索查询,用于相关性排序", - "include_expired": "是否包含过期/历史内容(默认True)" + "query": "Search query, used for relevance ranking", + "include_expired": "Whether to include expired/historical content (default True)" } }, "quick_search": { "name": "quick_search", "description": TOOL_DESC_QUICK_SEARCH, "parameters": { - "query": "搜索查询字符串", - "limit": "返回结果数量(可选,默认10)" + "query": "Search query string", + "limit": "Number of results to return (optional, default 10)" } }, "interview_agents": { "name": "interview_agents", "description": TOOL_DESC_INTERVIEW_AGENTS, "parameters": { - "interview_topic": "采访主题或需求描述(如:'了解学生对宿舍甲醛事件的看法')", - "max_agents": "最多采访的Agent数量(可选,默认5,最大10)" + "interview_topic": "Interview topic or requirement description (e.g., 'understand student views on the dormitory incident')", + "max_agents": "Maximum number of agents to interview (optional, default 5, max 10)" } } } @@ -1055,11 +1054,11 @@ class ReportAgent: return json.dumps(result, ensure_ascii=False, indent=2) else: - return f"未知工具: {tool_name}。请使用以下工具之一: insight_forge, panorama_search, quick_search" + return f"Unknown tool: {tool_name}. Please use one of the following tools: insight_forge, panorama_search, quick_search" except Exception as e: logger.error(t('report.toolExecFailed', toolName=tool_name, error=str(e))) - return f"工具执行失败: {str(e)}" + return f"Tool execution failed: {str(e)}" # 合法的工具名称集合,用于裸 JSON 兜底解析时校验 VALID_TOOL_NAMES = {"insight_forge", "panorama_search", "quick_search", "interview_agents"} @@ -1126,12 +1125,12 @@ class ReportAgent: def _get_tools_description(self) -> str: """生成工具描述文本""" - desc_parts = ["可用工具:"] + desc_parts = ["Available tools:"] for name, tool in self.tools.items(): params_desc = ", ".join([f"{k}: {v}" for k, v in tool["parameters"].items()]) desc_parts.append(f"- {name}: {tool['description']}") if params_desc: - desc_parts.append(f" 参数: {params_desc}") + desc_parts.append(f" Parameters: {params_desc}") return "\n".join(desc_parts) def plan_outline( @@ -1163,7 +1162,7 @@ class ReportAgent: if progress_callback: progress_callback("planning", 30, t('progress.generatingOutline')) - system_prompt = f"{PLAN_SYSTEM_PROMPT}\n\n{get_language_instruction()}" + system_prompt = f"{get_language_instruction()}\n\n{PLAN_SYSTEM_PROMPT}" user_prompt = PLAN_USER_PROMPT_TEMPLATE.format( simulation_requirement=self.simulation_requirement, total_nodes=context.get('graph_statistics', {}).get('total_nodes', 0), @@ -1209,12 +1208,12 @@ class ReportAgent: logger.error(t('report.outlinePlanFailed', error=str(e))) # 返回默认大纲(3个章节,作为fallback) return ReportOutline( - title="未来预测报告", - summary="基于模拟预测的未来趋势与风险分析", + title="Future Prediction Report", + summary="Future trends and risk analysis based on simulation predictions", sections=[ - ReportSection(title="预测场景与核心发现"), - ReportSection(title="人群行为预测分析"), - ReportSection(title="趋势展望与风险提示") + ReportSection(title="Prediction Scenario and Core Findings"), + ReportSection(title="Crowd Behavior Prediction Analysis"), + ReportSection(title="Trend Outlook and Risk Warnings") ] ) @@ -1259,7 +1258,7 @@ class ReportAgent: section_title=section.title, tools_description=self._get_tools_description(), ) - system_prompt = f"{system_prompt}\n\n{get_language_instruction()}" + system_prompt = f"{get_language_instruction()}\n\n{system_prompt}" # 构建用户prompt - 每个已完成章节各传入最大4000字 if previous_sections: @@ -1270,7 +1269,7 @@ class ReportAgent: previous_parts.append(truncated) previous_content = "\n\n---\n\n".join(previous_parts) else: - previous_content = "(这是第一个章节)" + previous_content = "(This is the first section)" user_prompt = SECTION_USER_PROMPT_TEMPLATE.format( previous_content=previous_content, @@ -1314,7 +1313,7 @@ class ReportAgent: # 如果还有迭代次数,添加消息并重试 if iteration < max_iterations - 1: messages.append({"role": "assistant", "content": "(响应为空)"}) - messages.append({"role": "user", "content": "请继续生成内容。"}) + messages.append({"role": "user", "content": "Please continue generating content."}) continue # 最后一次迭代也返回 None,跳出循环进入强制收尾 break @@ -1339,11 +1338,11 @@ class ReportAgent: messages.append({ "role": "user", "content": ( - "【格式错误】你在一次回复中同时包含了工具调用和 Final Answer,这是不允许的。\n" - "每次回复只能做以下两件事之一:\n" - "- 调用一个工具(输出一个 块,不要写 Final Answer)\n" - "- 输出最终内容(以 'Final Answer:' 开头,不要包含 )\n" - "请重新回复,只做其中一件事。" + "[Format Error] You included both a tool call and Final Answer in the same response, which is not allowed.\n" + "Each response can only do ONE of the following two things:\n" + "- Call a tool (output a block, do not write Final Answer)\n" + "- Output final content (start with 'Final Answer:', do not include )\n" + "Please reply again, doing only one of the two." ), }) continue @@ -1805,7 +1804,7 @@ class ReportAgent: report_content=report_content if report_content else "(暂无报告)", tools_description=self._get_tools_description(), ) - system_prompt = f"{system_prompt}\n\n{get_language_instruction()}" + system_prompt = f"{get_language_instruction()}\n\n{system_prompt}" # 构建消息 messages = [{"role": "system", "content": system_prompt}] diff --git a/backend/scripts/run_parallel_simulation.py b/backend/scripts/run_parallel_simulation.py index bc352c6..e3774e1 100644 --- a/backend/scripts/run_parallel_simulation.py +++ b/backend/scripts/run_parallel_simulation.py @@ -1032,18 +1032,13 @@ def create_model(config: Dict[str, Any], use_boost: bool = False): print(f"{config_label} model={llm_model}, base_url={llm_base_url[:40] if llm_base_url else '默认'}...") # camel-ai reads OPENAI_API_KEY from env automatically - # Pass api_key and base_url via model_config_dict - # camel-ai extracts these for the OpenAI client constructor - model_config = {} - if llm_api_key: - model_config["api_key"] = llm_api_key - if llm_base_url: - model_config["base_url"] = llm_base_url + # AsyncOpenAI reads OPENAI_API_KEY and OPENAI_BASE_URL from env automatically + # Pass empty model_config_dict so nothing is spread to create() call return ModelFactory.create( model_platform=ModelPlatformType.OPENAI, model_type=llm_model, - model_config_dict=model_config, + model_config_dict={}, ) diff --git a/backend/scripts/run_reddit_simulation.py b/backend/scripts/run_reddit_simulation.py index b2fe977..803bc66 100644 --- a/backend/scripts/run_reddit_simulation.py +++ b/backend/scripts/run_reddit_simulation.py @@ -461,18 +461,13 @@ class RedditSimulationRunner: print(f"LLM配置: model={llm_model}, base_url={llm_base_url[:40] if llm_base_url else '默认'}...") - # Pass api_key and base_url via model_config_dict - # camel-ai extracts these for the OpenAI client constructor - model_config = {} - if llm_api_key: - model_config["api_key"] = llm_api_key - if llm_base_url: - model_config["base_url"] = llm_base_url + # AsyncOpenAI reads OPENAI_API_KEY and OPENAI_BASE_URL from env automatically + # Pass empty model_config_dict so nothing is spread to create() call return ModelFactory.create( model_platform=ModelPlatformType.OPENAI, model_type=llm_model, - model_config_dict=model_config, + model_config_dict={}, ) def _get_active_agents_for_round( diff --git a/backend/scripts/run_twitter_simulation.py b/backend/scripts/run_twitter_simulation.py index 71d6368..34d50dd 100644 --- a/backend/scripts/run_twitter_simulation.py +++ b/backend/scripts/run_twitter_simulation.py @@ -454,18 +454,13 @@ class TwitterSimulationRunner: print(f"LLM配置: model={llm_model}, base_url={llm_base_url[:40] if llm_base_url else '默认'}...") - # Pass api_key and base_url via model_config_dict - # camel-ai extracts these for the OpenAI client constructor - model_config = {} - if llm_api_key: - model_config["api_key"] = llm_api_key - if llm_base_url: - model_config["base_url"] = llm_base_url + # AsyncOpenAI reads OPENAI_API_KEY and OPENAI_BASE_URL from env automatically + # Pass empty model_config_dict so nothing is spread to create() call return ModelFactory.create( model_platform=ModelPlatformType.OPENAI, model_type=llm_model, - model_config_dict=model_config, + model_config_dict={}, ) def _get_active_agents_for_round( diff --git a/locales/languages.json b/locales/languages.json index e791ff1..4bd404c 100644 --- a/locales/languages.json +++ b/locales/languages.json @@ -1,11 +1,11 @@ { "zh": { "label": "中文", - "llmInstruction": "请使用中文回答。" + "llmInstruction": "重要:你必须使用中文撰写所有内容。请使用中文回答。" }, "en": { "label": "English", - "llmInstruction": "Please respond in English." + "llmInstruction": "IMPORTANT: You MUST write ALL content exclusively in English. Do NOT use any other language in your response." }, "es": { "label": "Español", @@ -29,6 +29,6 @@ }, "th": { "label": "ไทย", - "llmInstruction": "กรุณาตอบเป็นภาษาไทย" + "llmInstruction": "IMPORTANT: You MUST write ALL content exclusively in Thai language (ภาษาไทย). Do NOT use any other language in your response. กรุณาตอบเป็นภาษาไทยเท่านั้น ห้ามใช้ภาษาอื่น" } -} +} \ No newline at end of file