Phase 4.4: Context-aware entity filtering in Step 1
- OntologyGenerator.generate() now accepts template_filter_rules parameter - When template_id is provided, API loads filter rules from templates.json - Filter rules injected into ontology system prompt: - exclude_self: don't create entity for the business/brand that uploaded data - exclude_types: don't create specific entity types - focus: guide LLM to focus on specific entity categories - API endpoint accepts template_id in form data
This commit is contained in:
@@ -154,9 +154,27 @@ def generate_ontology():
|
|||||||
simulation_requirement = request.form.get('simulation_requirement', '')
|
simulation_requirement = request.form.get('simulation_requirement', '')
|
||||||
project_name = request.form.get('project_name', 'Unnamed Project')
|
project_name = request.form.get('project_name', 'Unnamed Project')
|
||||||
additional_context = request.form.get('additional_context', '')
|
additional_context = request.form.get('additional_context', '')
|
||||||
|
template_id = request.form.get('template_id', '')
|
||||||
|
|
||||||
logger.debug(f"项目名称: {project_name}")
|
logger.debug(f"项目名称: {project_name}")
|
||||||
logger.debug(f"模拟需求: {simulation_requirement[:100]}...")
|
logger.debug(f"模拟需求: {simulation_requirement[:100]}...")
|
||||||
|
if template_id:
|
||||||
|
logger.debug(f"Template: {template_id}")
|
||||||
|
|
||||||
|
# Load template filter rules if template_id provided
|
||||||
|
template_filter_rules = None
|
||||||
|
if template_id:
|
||||||
|
import json as _json
|
||||||
|
_templates_path = os.path.join(os.path.dirname(__file__), '..', 'templates.json')
|
||||||
|
try:
|
||||||
|
with open(_templates_path, 'r', encoding='utf-8') as _f:
|
||||||
|
_templates = _json.load(_f)['templates']
|
||||||
|
for _tmpl in _templates:
|
||||||
|
if _tmpl['id'] == template_id:
|
||||||
|
template_filter_rules = _tmpl.get('entity_filter', {})
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
if not simulation_requirement:
|
if not simulation_requirement:
|
||||||
return jsonify({
|
return jsonify({
|
||||||
@@ -218,7 +236,8 @@ def generate_ontology():
|
|||||||
ontology = generator.generate(
|
ontology = generator.generate(
|
||||||
document_texts=document_texts,
|
document_texts=document_texts,
|
||||||
simulation_requirement=simulation_requirement,
|
simulation_requirement=simulation_requirement,
|
||||||
additional_context=additional_context if additional_context else None
|
additional_context=additional_context if additional_context else None,
|
||||||
|
template_filter_rules=template_filter_rules
|
||||||
)
|
)
|
||||||
|
|
||||||
# 保存本体到项目
|
# 保存本体到项目
|
||||||
|
|||||||
@@ -186,20 +186,22 @@ class OntologyGenerator:
|
|||||||
self,
|
self,
|
||||||
document_texts: List[str],
|
document_texts: List[str],
|
||||||
simulation_requirement: str,
|
simulation_requirement: str,
|
||||||
additional_context: Optional[str] = None
|
additional_context: Optional[str] = None,
|
||||||
|
template_filter_rules: Optional[Dict[str, Any]] = None
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
生成本体定义
|
Generate ontology definition
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
document_texts: 文档文本列表
|
document_texts: Document text list
|
||||||
simulation_requirement: 模拟需求描述
|
simulation_requirement: Simulation requirement description
|
||||||
additional_context: 额外上下文
|
additional_context: Additional context
|
||||||
|
template_filter_rules: Entity filter rules from template (e.g., exclude_types, exclude_self)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
本体定义(entity_types, edge_types等)
|
Ontology definition (entity_types, edge_types, etc.)
|
||||||
"""
|
"""
|
||||||
# 构建用户消息
|
# Build user message
|
||||||
user_message = self._build_user_message(
|
user_message = self._build_user_message(
|
||||||
document_texts,
|
document_texts,
|
||||||
simulation_requirement,
|
simulation_requirement,
|
||||||
@@ -208,6 +210,30 @@ class OntologyGenerator:
|
|||||||
|
|
||||||
lang_instruction = get_language_instruction()
|
lang_instruction = get_language_instruction()
|
||||||
system_prompt = f"{ONTOLOGY_SYSTEM_PROMPT}\n\n{lang_instruction}\nIMPORTANT: Entity type names MUST be in English PascalCase (e.g., 'PersonEntity', 'MediaOrganization'). Relationship type names MUST be in English UPPER_SNAKE_CASE (e.g., 'WORKS_FOR'). Attribute names MUST be in English snake_case. Only description fields and analysis_summary should use the specified language above."
|
system_prompt = f"{ONTOLOGY_SYSTEM_PROMPT}\n\n{lang_instruction}\nIMPORTANT: Entity type names MUST be in English PascalCase (e.g., 'PersonEntity', 'MediaOrganization'). Relationship type names MUST be in English UPPER_SNAKE_CASE (e.g., 'WORKS_FOR'). Attribute names MUST be in English snake_case. Only description fields and analysis_summary should use the specified language above."
|
||||||
|
|
||||||
|
# Add template-aware entity filtering rules
|
||||||
|
if template_filter_rules:
|
||||||
|
exclude_types = template_filter_rules.get('exclude_types', [])
|
||||||
|
exclude_self = template_filter_rules.get('exclude_self', False)
|
||||||
|
focus = template_filter_rules.get('focus', '')
|
||||||
|
|
||||||
|
filter_instruction = "\n\n## Context-Aware Entity Filtering\n"
|
||||||
|
|
||||||
|
if exclude_self:
|
||||||
|
filter_instruction += (
|
||||||
|
"- IMPORTANT: The uploaded data is from a business/brand/advertiser. "
|
||||||
|
"Do NOT create entity types for the business/brand that created this content. "
|
||||||
|
"Only create entities for the TARGET AUDIENCE, competitors, influencers, media, etc.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
if exclude_types:
|
||||||
|
filter_instruction += f"- Do NOT create entity types matching: {', '.join(exclude_types)}\n"
|
||||||
|
|
||||||
|
if focus:
|
||||||
|
filter_instruction += f"- Focus entity types on: {focus}\n"
|
||||||
|
|
||||||
|
system_prompt += filter_instruction
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": system_prompt},
|
{"role": "system", "content": system_prompt},
|
||||||
{"role": "user", "content": user_message}
|
{"role": "user", "content": user_message}
|
||||||
|
|||||||
Reference in New Issue
Block a user