Introduce Project ID for context management, finalizing the stateful API pipeline from file submission to graph construction.
This commit is contained in:
9
backend/app/models/__init__.py
Normal file
9
backend/app/models/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
数据模型模块
|
||||
"""
|
||||
|
||||
from .task import TaskManager, TaskStatus
|
||||
from .project import Project, ProjectStatus, ProjectManager
|
||||
|
||||
__all__ = ['TaskManager', 'TaskStatus', 'Project', 'ProjectStatus', 'ProjectManager']
|
||||
|
||||
305
backend/app/models/project.py
Normal file
305
backend/app/models/project.py
Normal file
@@ -0,0 +1,305 @@
|
||||
"""
|
||||
项目上下文管理
|
||||
用于在服务端持久化项目状态,避免前端在接口间传递大量数据
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import uuid
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from ..config import Config
|
||||
|
||||
|
||||
class ProjectStatus(str, Enum):
|
||||
"""项目状态"""
|
||||
CREATED = "created" # 刚创建,文件已上传
|
||||
ONTOLOGY_GENERATED = "ontology_generated" # 本体已生成
|
||||
GRAPH_BUILDING = "graph_building" # 图谱构建中
|
||||
GRAPH_COMPLETED = "graph_completed" # 图谱构建完成
|
||||
FAILED = "failed" # 失败
|
||||
|
||||
|
||||
@dataclass
|
||||
class Project:
|
||||
"""项目数据模型"""
|
||||
project_id: str
|
||||
name: str
|
||||
status: ProjectStatus
|
||||
created_at: str
|
||||
updated_at: str
|
||||
|
||||
# 文件信息
|
||||
files: List[Dict[str, str]] = field(default_factory=list) # [{filename, path, size}]
|
||||
total_text_length: int = 0
|
||||
|
||||
# 本体信息(接口1生成后填充)
|
||||
ontology: Optional[Dict[str, Any]] = None
|
||||
analysis_summary: Optional[str] = None
|
||||
|
||||
# 图谱信息(接口2完成后填充)
|
||||
graph_id: Optional[str] = None
|
||||
graph_build_task_id: Optional[str] = None
|
||||
|
||||
# 配置
|
||||
simulation_requirement: Optional[str] = None
|
||||
chunk_size: int = 500
|
||||
chunk_overlap: int = 50
|
||||
|
||||
# 错误信息
|
||||
error: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""转换为字典"""
|
||||
return {
|
||||
"project_id": self.project_id,
|
||||
"name": self.name,
|
||||
"status": self.status.value if isinstance(self.status, ProjectStatus) else self.status,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"files": self.files,
|
||||
"total_text_length": self.total_text_length,
|
||||
"ontology": self.ontology,
|
||||
"analysis_summary": self.analysis_summary,
|
||||
"graph_id": self.graph_id,
|
||||
"graph_build_task_id": self.graph_build_task_id,
|
||||
"simulation_requirement": self.simulation_requirement,
|
||||
"chunk_size": self.chunk_size,
|
||||
"chunk_overlap": self.chunk_overlap,
|
||||
"error": self.error
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'Project':
|
||||
"""从字典创建"""
|
||||
status = data.get('status', 'created')
|
||||
if isinstance(status, str):
|
||||
status = ProjectStatus(status)
|
||||
|
||||
return cls(
|
||||
project_id=data['project_id'],
|
||||
name=data.get('name', 'Unnamed Project'),
|
||||
status=status,
|
||||
created_at=data.get('created_at', ''),
|
||||
updated_at=data.get('updated_at', ''),
|
||||
files=data.get('files', []),
|
||||
total_text_length=data.get('total_text_length', 0),
|
||||
ontology=data.get('ontology'),
|
||||
analysis_summary=data.get('analysis_summary'),
|
||||
graph_id=data.get('graph_id'),
|
||||
graph_build_task_id=data.get('graph_build_task_id'),
|
||||
simulation_requirement=data.get('simulation_requirement'),
|
||||
chunk_size=data.get('chunk_size', 500),
|
||||
chunk_overlap=data.get('chunk_overlap', 50),
|
||||
error=data.get('error')
|
||||
)
|
||||
|
||||
|
||||
class ProjectManager:
|
||||
"""项目管理器 - 负责项目的持久化存储和检索"""
|
||||
|
||||
# 项目存储根目录
|
||||
PROJECTS_DIR = os.path.join(Config.UPLOAD_FOLDER, 'projects')
|
||||
|
||||
@classmethod
|
||||
def _ensure_projects_dir(cls):
|
||||
"""确保项目目录存在"""
|
||||
os.makedirs(cls.PROJECTS_DIR, exist_ok=True)
|
||||
|
||||
@classmethod
|
||||
def _get_project_dir(cls, project_id: str) -> str:
|
||||
"""获取项目目录路径"""
|
||||
return os.path.join(cls.PROJECTS_DIR, project_id)
|
||||
|
||||
@classmethod
|
||||
def _get_project_meta_path(cls, project_id: str) -> str:
|
||||
"""获取项目元数据文件路径"""
|
||||
return os.path.join(cls._get_project_dir(project_id), 'project.json')
|
||||
|
||||
@classmethod
|
||||
def _get_project_files_dir(cls, project_id: str) -> str:
|
||||
"""获取项目文件存储目录"""
|
||||
return os.path.join(cls._get_project_dir(project_id), 'files')
|
||||
|
||||
@classmethod
|
||||
def _get_project_text_path(cls, project_id: str) -> str:
|
||||
"""获取项目提取文本存储路径"""
|
||||
return os.path.join(cls._get_project_dir(project_id), 'extracted_text.txt')
|
||||
|
||||
@classmethod
|
||||
def create_project(cls, name: str = "Unnamed Project") -> Project:
|
||||
"""
|
||||
创建新项目
|
||||
|
||||
Args:
|
||||
name: 项目名称
|
||||
|
||||
Returns:
|
||||
新创建的Project对象
|
||||
"""
|
||||
cls._ensure_projects_dir()
|
||||
|
||||
project_id = f"proj_{uuid.uuid4().hex[:12]}"
|
||||
now = datetime.now().isoformat()
|
||||
|
||||
project = Project(
|
||||
project_id=project_id,
|
||||
name=name,
|
||||
status=ProjectStatus.CREATED,
|
||||
created_at=now,
|
||||
updated_at=now
|
||||
)
|
||||
|
||||
# 创建项目目录结构
|
||||
project_dir = cls._get_project_dir(project_id)
|
||||
files_dir = cls._get_project_files_dir(project_id)
|
||||
os.makedirs(project_dir, exist_ok=True)
|
||||
os.makedirs(files_dir, exist_ok=True)
|
||||
|
||||
# 保存项目元数据
|
||||
cls.save_project(project)
|
||||
|
||||
return project
|
||||
|
||||
@classmethod
|
||||
def save_project(cls, project: Project) -> None:
|
||||
"""保存项目元数据"""
|
||||
project.updated_at = datetime.now().isoformat()
|
||||
meta_path = cls._get_project_meta_path(project.project_id)
|
||||
|
||||
with open(meta_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(project.to_dict(), f, ensure_ascii=False, indent=2)
|
||||
|
||||
@classmethod
|
||||
def get_project(cls, project_id: str) -> Optional[Project]:
|
||||
"""
|
||||
获取项目
|
||||
|
||||
Args:
|
||||
project_id: 项目ID
|
||||
|
||||
Returns:
|
||||
Project对象,如果不存在返回None
|
||||
"""
|
||||
meta_path = cls._get_project_meta_path(project_id)
|
||||
|
||||
if not os.path.exists(meta_path):
|
||||
return None
|
||||
|
||||
with open(meta_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
return Project.from_dict(data)
|
||||
|
||||
@classmethod
|
||||
def list_projects(cls, limit: int = 50) -> List[Project]:
|
||||
"""
|
||||
列出所有项目
|
||||
|
||||
Args:
|
||||
limit: 返回数量限制
|
||||
|
||||
Returns:
|
||||
项目列表,按创建时间倒序
|
||||
"""
|
||||
cls._ensure_projects_dir()
|
||||
|
||||
projects = []
|
||||
for project_id in os.listdir(cls.PROJECTS_DIR):
|
||||
project = cls.get_project(project_id)
|
||||
if project:
|
||||
projects.append(project)
|
||||
|
||||
# 按创建时间倒序排序
|
||||
projects.sort(key=lambda p: p.created_at, reverse=True)
|
||||
|
||||
return projects[:limit]
|
||||
|
||||
@classmethod
|
||||
def delete_project(cls, project_id: str) -> bool:
|
||||
"""
|
||||
删除项目及其所有文件
|
||||
|
||||
Args:
|
||||
project_id: 项目ID
|
||||
|
||||
Returns:
|
||||
是否删除成功
|
||||
"""
|
||||
project_dir = cls._get_project_dir(project_id)
|
||||
|
||||
if not os.path.exists(project_dir):
|
||||
return False
|
||||
|
||||
shutil.rmtree(project_dir)
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def save_file_to_project(cls, project_id: str, file_storage, original_filename: str) -> Dict[str, str]:
|
||||
"""
|
||||
保存上传的文件到项目目录
|
||||
|
||||
Args:
|
||||
project_id: 项目ID
|
||||
file_storage: Flask的FileStorage对象
|
||||
original_filename: 原始文件名
|
||||
|
||||
Returns:
|
||||
文件信息字典 {filename, path, size}
|
||||
"""
|
||||
files_dir = cls._get_project_files_dir(project_id)
|
||||
os.makedirs(files_dir, exist_ok=True)
|
||||
|
||||
# 生成安全的文件名
|
||||
ext = os.path.splitext(original_filename)[1].lower()
|
||||
safe_filename = f"{uuid.uuid4().hex[:8]}{ext}"
|
||||
file_path = os.path.join(files_dir, safe_filename)
|
||||
|
||||
# 保存文件
|
||||
file_storage.save(file_path)
|
||||
|
||||
# 获取文件大小
|
||||
file_size = os.path.getsize(file_path)
|
||||
|
||||
return {
|
||||
"original_filename": original_filename,
|
||||
"saved_filename": safe_filename,
|
||||
"path": file_path,
|
||||
"size": file_size
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def save_extracted_text(cls, project_id: str, text: str) -> None:
|
||||
"""保存提取的文本"""
|
||||
text_path = cls._get_project_text_path(project_id)
|
||||
with open(text_path, 'w', encoding='utf-8') as f:
|
||||
f.write(text)
|
||||
|
||||
@classmethod
|
||||
def get_extracted_text(cls, project_id: str) -> Optional[str]:
|
||||
"""获取提取的文本"""
|
||||
text_path = cls._get_project_text_path(project_id)
|
||||
|
||||
if not os.path.exists(text_path):
|
||||
return None
|
||||
|
||||
with open(text_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
|
||||
@classmethod
|
||||
def get_project_files(cls, project_id: str) -> List[str]:
|
||||
"""获取项目的所有文件路径"""
|
||||
files_dir = cls._get_project_files_dir(project_id)
|
||||
|
||||
if not os.path.exists(files_dir):
|
||||
return []
|
||||
|
||||
return [
|
||||
os.path.join(files_dir, f)
|
||||
for f in os.listdir(files_dir)
|
||||
if os.path.isfile(os.path.join(files_dir, f))
|
||||
]
|
||||
|
||||
178
backend/app/models/task.py
Normal file
178
backend/app/models/task.py
Normal file
@@ -0,0 +1,178 @@
|
||||
"""
|
||||
任务状态管理
|
||||
用于跟踪长时间运行的任务(如图谱构建)
|
||||
"""
|
||||
|
||||
import uuid
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Dict, Any, Optional
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
class TaskStatus(str, Enum):
|
||||
"""任务状态枚举"""
|
||||
PENDING = "pending" # 等待中
|
||||
PROCESSING = "processing" # 处理中
|
||||
COMPLETED = "completed" # 已完成
|
||||
FAILED = "failed" # 失败
|
||||
|
||||
|
||||
@dataclass
|
||||
class Task:
|
||||
"""任务数据类"""
|
||||
task_id: str
|
||||
task_type: str
|
||||
status: TaskStatus
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
progress: int = 0 # 进度百分比 0-100
|
||||
message: str = "" # 状态消息
|
||||
result: Optional[Dict] = None # 任务结果
|
||||
error: Optional[str] = None # 错误信息
|
||||
metadata: Dict = field(default_factory=dict) # 额外元数据
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""转换为字典"""
|
||||
return {
|
||||
"task_id": self.task_id,
|
||||
"task_type": self.task_type,
|
||||
"status": self.status.value,
|
||||
"created_at": self.created_at.isoformat(),
|
||||
"updated_at": self.updated_at.isoformat(),
|
||||
"progress": self.progress,
|
||||
"message": self.message,
|
||||
"result": self.result,
|
||||
"error": self.error,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
|
||||
|
||||
class TaskManager:
|
||||
"""
|
||||
任务管理器
|
||||
线程安全的任务状态管理
|
||||
"""
|
||||
|
||||
_instance = None
|
||||
_lock = threading.Lock()
|
||||
|
||||
def __new__(cls):
|
||||
"""单例模式"""
|
||||
if cls._instance is None:
|
||||
with cls._lock:
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._tasks: Dict[str, Task] = {}
|
||||
cls._instance._task_lock = threading.Lock()
|
||||
return cls._instance
|
||||
|
||||
def create_task(self, task_type: str, metadata: Optional[Dict] = None) -> str:
|
||||
"""
|
||||
创建新任务
|
||||
|
||||
Args:
|
||||
task_type: 任务类型
|
||||
metadata: 额外元数据
|
||||
|
||||
Returns:
|
||||
任务ID
|
||||
"""
|
||||
task_id = str(uuid.uuid4())
|
||||
now = datetime.now()
|
||||
|
||||
task = Task(
|
||||
task_id=task_id,
|
||||
task_type=task_type,
|
||||
status=TaskStatus.PENDING,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
metadata=metadata or {}
|
||||
)
|
||||
|
||||
with self._task_lock:
|
||||
self._tasks[task_id] = task
|
||||
|
||||
return task_id
|
||||
|
||||
def get_task(self, task_id: str) -> Optional[Task]:
|
||||
"""获取任务"""
|
||||
with self._task_lock:
|
||||
return self._tasks.get(task_id)
|
||||
|
||||
def update_task(
|
||||
self,
|
||||
task_id: str,
|
||||
status: Optional[TaskStatus] = None,
|
||||
progress: Optional[int] = None,
|
||||
message: Optional[str] = None,
|
||||
result: Optional[Dict] = None,
|
||||
error: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
更新任务状态
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
status: 新状态
|
||||
progress: 进度
|
||||
message: 消息
|
||||
result: 结果
|
||||
error: 错误信息
|
||||
"""
|
||||
with self._task_lock:
|
||||
task = self._tasks.get(task_id)
|
||||
if task:
|
||||
task.updated_at = datetime.now()
|
||||
if status is not None:
|
||||
task.status = status
|
||||
if progress is not None:
|
||||
task.progress = progress
|
||||
if message is not None:
|
||||
task.message = message
|
||||
if result is not None:
|
||||
task.result = result
|
||||
if error is not None:
|
||||
task.error = error
|
||||
|
||||
def complete_task(self, task_id: str, result: Dict):
|
||||
"""标记任务完成"""
|
||||
self.update_task(
|
||||
task_id,
|
||||
status=TaskStatus.COMPLETED,
|
||||
progress=100,
|
||||
message="任务完成",
|
||||
result=result
|
||||
)
|
||||
|
||||
def fail_task(self, task_id: str, error: str):
|
||||
"""标记任务失败"""
|
||||
self.update_task(
|
||||
task_id,
|
||||
status=TaskStatus.FAILED,
|
||||
message="任务失败",
|
||||
error=error
|
||||
)
|
||||
|
||||
def list_tasks(self, task_type: Optional[str] = None) -> list:
|
||||
"""列出任务"""
|
||||
with self._task_lock:
|
||||
tasks = list(self._tasks.values())
|
||||
if task_type:
|
||||
tasks = [t for t in tasks if t.task_type == task_type]
|
||||
return [t.to_dict() for t in sorted(tasks, key=lambda x: x.created_at, reverse=True)]
|
||||
|
||||
def cleanup_old_tasks(self, max_age_hours: int = 24):
|
||||
"""清理旧任务"""
|
||||
from datetime import timedelta
|
||||
cutoff = datetime.now() - timedelta(hours=max_age_hours)
|
||||
|
||||
with self._task_lock:
|
||||
old_ids = [
|
||||
tid for tid, task in self._tasks.items()
|
||||
if task.created_at < cutoff and task.status in [TaskStatus.COMPLETED, TaskStatus.FAILED]
|
||||
]
|
||||
for tid in old_ids:
|
||||
del self._tasks[tid]
|
||||
|
||||
Reference in New Issue
Block a user