Merge_PR_414_standardize_tenant_db_directory

This commit is contained in:
ajaysi
2026-03-12 15:55:00 +05:30
8 changed files with 86 additions and 44 deletions

View File

@@ -86,16 +86,56 @@ def _ensure_daily_workflow_schema(engine, user_id: str) -> None:
except Exception as e:
logger.error(f"Failed daily_workflow_plans schema compatibility check for user {user_id}: {e}")
def _sanitize_user_id(user_id: str) -> str:
"""Sanitize user_id to be safe for filesystem."""
return "".join(c for c in user_id if c.isalnum() or c in ('-', '_'))
def ensure_user_workspace_db_directory(user_id: str) -> str:
"""Ensure modern `db/` directory exists, migrating legacy `database/` when safe."""
safe_user_id = _sanitize_user_id(user_id)
user_workspace = os.path.join(WORKSPACE_DIR, f"workspace_{safe_user_id}")
db_dir = os.path.join(user_workspace, 'db')
legacy_db_dir = os.path.join(user_workspace, 'database')
if os.path.isdir(legacy_db_dir) and not os.path.exists(db_dir):
try:
os.rename(legacy_db_dir, db_dir)
logger.info(f"Migrated legacy database directory to db/: {user_workspace}")
except OSError as rename_error:
logger.warning(
f"Could not rename legacy database directory for {user_workspace}: {rename_error}"
)
os.makedirs(db_dir, exist_ok=True)
for filename in os.listdir(legacy_db_dir):
src = os.path.join(legacy_db_dir, filename)
dst = os.path.join(db_dir, filename)
if os.path.isfile(src) and not os.path.exists(dst):
try:
os.link(src, dst)
except OSError:
# Fall back to copy when hard-linking is not possible.
import shutil
shutil.copy2(src, dst)
else:
os.makedirs(db_dir, exist_ok=True)
return db_dir
def get_user_db_path(user_id: str) -> str:
"""Get the database path for a specific user."""
# Sanitize user_id to be safe for filesystem
safe_user_id = "".join(c for c in user_id if c.isalnum() or c in ('-', '_'))
safe_user_id = _sanitize_user_id(user_id)
user_workspace = os.path.join(WORKSPACE_DIR, f"workspace_{safe_user_id}")
db_dir = ensure_user_workspace_db_directory(user_id)
# Check for legacy naming convention first (to support existing data)
# Some older workspaces might have 'alwrity.db' instead of 'alwrity_{user_id}.db'
legacy_db_path = os.path.join(user_workspace, 'db', 'alwrity.db')
specific_db_path = os.path.join(user_workspace, 'db', f'alwrity_{safe_user_id}.db')
legacy_db_path = os.path.join(db_dir, 'alwrity.db')
specific_db_path = os.path.join(db_dir, f'alwrity_{safe_user_id}.db')
# Backward compatibility when filesystem migration couldn't run yet.
legacy_dir_path = os.path.join(user_workspace, 'database', f'alwrity_{safe_user_id}.db')
legacy_dir_default = os.path.join(user_workspace, 'database', 'alwrity.db')
# If the specific one exists, use it (preferred)
if os.path.exists(specific_db_path):
@@ -104,6 +144,12 @@ def get_user_db_path(user_id: str) -> str:
# If legacy exists and specific doesn't, use legacy
if os.path.exists(legacy_db_path):
return legacy_db_path
if os.path.exists(legacy_dir_path):
return legacy_dir_path
if os.path.exists(legacy_dir_default):
return legacy_dir_default
# Default to specific for new databases
return specific_db_path

View File

@@ -100,7 +100,7 @@ class DeepCrawlService:
# Note: Path logic should be consistent with project structure
# Assuming workspace path is available via env or config, or constructing it.
# Using relative path for now, adjusted to project root.
# The memory says: workspace/workspace_{user_id}/db/alwrity.db
# Database files are stored under workspace/workspace_{user_id}/db/.
# So workspace root is workspace/workspace_{user_id}/
workspace_dir = f"workspace/workspace_{user_id}/crawled_content"
os.makedirs(workspace_dir, exist_ok=True)

View File

@@ -13,7 +13,7 @@ from loguru import logger
from sqlalchemy.orm import Session
from sqlalchemy import text
from services.database import init_user_database
from services.database import init_user_database, ensure_user_workspace_db_directory
class UserWorkspaceManager:
"""Manages user-specific workspaces and progressive setup."""
@@ -36,6 +36,11 @@ class UserWorkspaceManager:
"""Sanitize user_id to be safe for filesystem (matches database.py logic)."""
return "".join(c for c in user_id if c.isalnum() or c in ('-', '_'))
def _ensure_workspace_db_directory(self, user_id: str) -> None:
"""Ensure workspace uses canonical `db/` layout via database service authority."""
ensure_user_workspace_db_directory(user_id)
def create_user_workspace(self, user_id: str) -> Dict[str, Any]:
"""Create a complete user workspace with progressive setup."""
try:
@@ -59,6 +64,9 @@ class UserWorkspaceManager:
# Format: workspaces/workspace_{user_id}
user_dir = self.user_workspaces_dir / f"workspace_{safe_user_id}"
user_dir.mkdir(parents=True, exist_ok=True)
# Ensure canonical DB directory and migrate legacy layout if needed
self._ensure_workspace_db_directory(user_id)
# Create subdirectories
subdirs = [
@@ -74,8 +82,7 @@ class UserWorkspaceManager:
"cache",
"exports",
"templates",
"database",
"db", # Requested 'db' folder
"db", # Official database folder
"media", # Requested 'media' folder
"data" # User specific data folder
]
@@ -358,8 +365,8 @@ class UserWorkspaceManager:
if user_dir.exists():
shutil.rmtree(user_dir)
# Note: We do not drop tables here because each user has their own DB file
# (alwrity.db) inside their workspace. Deleting the workspace folder
# Note: We do not drop tables here because each user has their own DB file
# inside workspace/workspace_{id}/db/. Deleting the workspace folder
# deletes the DB file as well.
logger.info(f"✅ User workspace cleaned up for user {user_id}")