From c9f76babeb10599769bc6b17ebd7dc780d3215d4 Mon Sep 17 00:00:00 2001 From: Kunthawat Greethong Date: Fri, 26 Jun 2026 11:46:37 +0700 Subject: [PATCH] Phase 4.4: Context-aware entity filtering in Step 1 - OntologyGenerator.generate() now accepts template_filter_rules parameter - When template_id is provided, API loads filter rules from templates.json - Filter rules injected into ontology system prompt: - exclude_self: don't create entity for the business/brand that uploaded data - exclude_types: don't create specific entity types - focus: guide LLM to focus on specific entity categories - API endpoint accepts template_id in form data --- backend/app/api/graph.py | 21 +++++++++++- backend/app/services/ontology_generator.py | 40 ++++++++++++++++++---- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/backend/app/api/graph.py b/backend/app/api/graph.py index 86e3ebc..3907f97 100644 --- a/backend/app/api/graph.py +++ b/backend/app/api/graph.py @@ -154,9 +154,27 @@ def generate_ontology(): simulation_requirement = request.form.get('simulation_requirement', '') project_name = request.form.get('project_name', 'Unnamed Project') additional_context = request.form.get('additional_context', '') + template_id = request.form.get('template_id', '') logger.debug(f"项目名称: {project_name}") logger.debug(f"模拟需求: {simulation_requirement[:100]}...") + if template_id: + logger.debug(f"Template: {template_id}") + + # Load template filter rules if template_id provided + template_filter_rules = None + if template_id: + import json as _json + _templates_path = os.path.join(os.path.dirname(__file__), '..', 'templates.json') + try: + with open(_templates_path, 'r', encoding='utf-8') as _f: + _templates = _json.load(_f)['templates'] + for _tmpl in _templates: + if _tmpl['id'] == template_id: + template_filter_rules = _tmpl.get('entity_filter', {}) + break + except Exception: + pass if not simulation_requirement: return jsonify({ @@ -218,7 +236,8 @@ def generate_ontology(): ontology = generator.generate( document_texts=document_texts, simulation_requirement=simulation_requirement, - additional_context=additional_context if additional_context else None + additional_context=additional_context if additional_context else None, + template_filter_rules=template_filter_rules ) # 保存本体到项目 diff --git a/backend/app/services/ontology_generator.py b/backend/app/services/ontology_generator.py index ac9f795..7ae82f0 100644 --- a/backend/app/services/ontology_generator.py +++ b/backend/app/services/ontology_generator.py @@ -186,20 +186,22 @@ class OntologyGenerator: self, document_texts: List[str], simulation_requirement: str, - additional_context: Optional[str] = None + additional_context: Optional[str] = None, + template_filter_rules: Optional[Dict[str, Any]] = None ) -> Dict[str, Any]: """ - 生成本体定义 + Generate ontology definition Args: - document_texts: 文档文本列表 - simulation_requirement: 模拟需求描述 - additional_context: 额外上下文 + document_texts: Document text list + simulation_requirement: Simulation requirement description + additional_context: Additional context + template_filter_rules: Entity filter rules from template (e.g., exclude_types, exclude_self) Returns: - 本体定义(entity_types, edge_types等) + Ontology definition (entity_types, edge_types, etc.) """ - # 构建用户消息 + # Build user message user_message = self._build_user_message( document_texts, simulation_requirement, @@ -208,6 +210,30 @@ class OntologyGenerator: lang_instruction = get_language_instruction() system_prompt = f"{ONTOLOGY_SYSTEM_PROMPT}\n\n{lang_instruction}\nIMPORTANT: Entity type names MUST be in English PascalCase (e.g., 'PersonEntity', 'MediaOrganization'). Relationship type names MUST be in English UPPER_SNAKE_CASE (e.g., 'WORKS_FOR'). Attribute names MUST be in English snake_case. Only description fields and analysis_summary should use the specified language above." + + # Add template-aware entity filtering rules + if template_filter_rules: + exclude_types = template_filter_rules.get('exclude_types', []) + exclude_self = template_filter_rules.get('exclude_self', False) + focus = template_filter_rules.get('focus', '') + + filter_instruction = "\n\n## Context-Aware Entity Filtering\n" + + if exclude_self: + filter_instruction += ( + "- IMPORTANT: The uploaded data is from a business/brand/advertiser. " + "Do NOT create entity types for the business/brand that created this content. " + "Only create entities for the TARGET AUDIENCE, competitors, influencers, media, etc.\n" + ) + + if exclude_types: + filter_instruction += f"- Do NOT create entity types matching: {', '.join(exclude_types)}\n" + + if focus: + filter_instruction += f"- Focus entity types on: {focus}\n" + + system_prompt += filter_instruction + messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message}