Enforce runtime-only workspace directory creation policy

This commit is contained in:
ي
2026-03-12 15:00:59 +05:30
parent bc49329ed6
commit 3ebe884a37
12 changed files with 241 additions and 105 deletions

View File

@@ -7,6 +7,8 @@ import os
from pathlib import Path
from typing import List, Dict, Any
from services.workspace_dirs import ensure_global_operational_dirs
class EnvironmentSetup:
"""Manages environment setup for ALwrity backend."""
@@ -15,16 +17,11 @@ class EnvironmentSetup:
self.production_mode = production_mode
# Use safer directory paths that don't conflict with deployment platforms
if production_mode:
# In production, use temp directories or skip directory creation
self.required_directories = []
# In production, only create operational directories
self.required_directories = ["logs", "temp"]
else:
# In development, use local directories
self.required_directories = [
"lib/workspace/alwrity_content",
"lib/workspace/alwrity_web_research",
"lib/workspace/alwrity_prompts",
"lib/workspace/alwrity_config"
]
# In development, only create operational directories
self.required_directories = ["logs", "temp"]
def setup_directories(self) -> bool:
"""Create necessary directories for ALwrity."""
@@ -39,15 +36,15 @@ class EnvironmentSetup:
print(" ⚠️ Skipping directory creation in production mode")
return True
for directory in self.required_directories:
try:
Path(directory).mkdir(parents=True, exist_ok=True)
if verbose:
try:
ensure_global_operational_dirs(self.required_directories)
if verbose:
for directory in self.required_directories:
print(f" ✅ Created: {directory}")
except Exception as e:
if verbose:
print(f" ❌ Failed to create {directory}: {e}")
return False
except Exception as e:
if verbose:
print(f" ❌ Failed to create operational directories: {e}")
return False
if verbose:
print("✅ All directories created successfully")

View File

@@ -12,17 +12,13 @@ from api.story_writer.utils.auth import require_authenticated_user
from utils.asset_tracker import save_asset_to_library
from models.story_models import StoryAudioResult
from services.story_writer.audio_generation_service import StoryAudioGenerationService
from pathlib import Path
from utils.logger_utils import get_service_logger
router = APIRouter(tags=["youtube-audio"])
logger = get_service_logger("api.youtube.audio")
# Audio output directory
# api/youtube/handlers/audio.py -> handlers -> youtube -> api -> backend -> root
base_dir = Path(__file__).resolve().parents[4]
YOUTUBE_AUDIO_DIR = base_dir / "workspace" / "media" / "youtube_audio"
YOUTUBE_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
from ..paths import YOUTUBE_AUDIO_DIR, ensure_youtube_media_dirs
# Initialize audio service
audio_service = StoryAudioGenerationService(output_dir=str(YOUTUBE_AUDIO_DIR))
@@ -266,6 +262,7 @@ async def generate_youtube_scene_audio(
Similar to Podcast's audio generation endpoint.
"""
user_id = require_authenticated_user(current_user)
ensure_youtube_media_dirs(user_id)
if not request.text or not request.text.strip():
raise HTTPException(status_code=400, detail="Text is required")

View File

@@ -1,6 +1,5 @@
"""YouTube Creator avatar upload and AI optimization handlers."""
from pathlib import Path
import uuid
from typing import Dict, Any, Optional
@@ -18,12 +17,7 @@ from utils.logger_utils import get_service_logger
router = APIRouter(prefix="/avatar", tags=["youtube-avatar"])
logger = get_service_logger("api.youtube.avatar")
# Directories
# api/youtube/handlers/avatar.py -> handlers -> youtube -> api -> backend -> root
base_dir = Path(__file__).parent.parent.parent.parent.parent
DATA_MEDIA_DIR = base_dir / "data" / "media"
YOUTUBE_AVATARS_DIR = DATA_MEDIA_DIR / "youtube_avatars"
YOUTUBE_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
from ..paths import YOUTUBE_AVATARS_DIR, ensure_youtube_media_dirs
def require_authenticated_user(current_user: Dict[str, Any]) -> str:
@@ -256,6 +250,7 @@ async def upload_youtube_avatar(
):
"""Upload a YouTube creator avatar image."""
user_id = require_authenticated_user(current_user)
ensure_youtube_media_dirs(user_id)
if not file:
raise HTTPException(status_code=400, detail="No file uploaded")
@@ -328,6 +323,7 @@ async def make_avatar_presentable(
Uses AI image editing with enhanced prompts to optimize the uploaded photo.
"""
user_id = require_authenticated_user(current_user)
ensure_youtube_media_dirs(user_id)
try:
avatar_bytes = _load_youtube_image_bytes(avatar_url)
@@ -488,6 +484,7 @@ async def generate_creator_avatar(
the video type, audience, tone, and brand style.
"""
user_id = require_authenticated_user(current_user)
ensure_youtube_media_dirs(user_id)
try:
return await _generate_avatar_from_context(
@@ -518,6 +515,7 @@ async def regenerate_creator_avatar(
to provide variation while maintaining the same optimization based on plan data.
"""
user_id = require_authenticated_user(current_user)
ensure_youtube_media_dirs(user_id)
try:
# Parse video plan to extract context

View File

@@ -1,6 +1,6 @@
from pathlib import Path
"""YouTube Creator scene image generation handlers."""
from pathlib import Path
from typing import Dict, Any, Optional
import uuid
from concurrent.futures import ThreadPoolExecutor
@@ -23,13 +23,7 @@ from ..task_manager import task_manager
router = APIRouter(tags=["youtube-image"])
logger = get_service_logger("api.youtube.image")
# Directories
# api/youtube/handlers/images.py -> handlers -> youtube -> api -> backend -> root
base_dir = Path(__file__).parent.parent.parent.parent.parent
DATA_MEDIA_DIR = base_dir / "data" / "media"
YOUTUBE_IMAGES_DIR = DATA_MEDIA_DIR / "youtube_images"
YOUTUBE_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
YOUTUBE_AVATARS_DIR = DATA_MEDIA_DIR / "youtube_avatars"
from ..paths import YOUTUBE_IMAGES_DIR, YOUTUBE_AVATARS_DIR, ensure_youtube_media_dirs
# Thread pool for background image generation
_image_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="youtube_image")
@@ -102,6 +96,7 @@ async def generate_youtube_scene_image(
"""Generate a YouTube scene image with background task processing."""
logger.info(f"[YouTube] Image generation request received: scene='{request.scene_title}', user={current_user.get('id')}")
user_id = require_authenticated_user(current_user)
ensure_youtube_media_dirs(user_id)
logger.info(f"[YouTube] User authenticated: {user_id}")
if not request.scene_title:
@@ -312,7 +307,6 @@ def _execute_image_generation_task(task_id: str, request_data: dict, user_id: st
image_metadata = _save_scene_image(image_bytes, request.scene_id)
# Verify file was saved correctly
from pathlib import Path
saved_path = Path(image_metadata["image_path"])
if not saved_path.exists() or saved_path.stat().st_size == 0:
raise IOError(f"Image file was not saved correctly: {saved_path}")

View File

@@ -0,0 +1,21 @@
"""Centralized YouTube media paths and runtime directory creation."""
from pathlib import Path
from typing import Iterable, Optional
from services.workspace_dirs import ensure_user_workspace_dirs
BASE_DIR = Path(__file__).resolve().parents[3]
DATA_MEDIA_DIR = BASE_DIR / "workspace" / "media"
YOUTUBE_VIDEO_DIR = DATA_MEDIA_DIR / "youtube_videos"
YOUTUBE_AVATARS_DIR = DATA_MEDIA_DIR / "youtube_avatars"
YOUTUBE_IMAGES_DIR = DATA_MEDIA_DIR / "youtube_images"
YOUTUBE_AUDIO_DIR = DATA_MEDIA_DIR / "youtube_audio"
def ensure_youtube_media_dirs(user_id: str, capabilities: Optional[Iterable[str]] = None) -> None:
"""Ensure YouTube-related media directories at request/runtime."""
ensure_user_workspace_dirs(user_id, capabilities=capabilities or {"media", "content"})
for directory in [YOUTUBE_VIDEO_DIR, YOUTUBE_AVATARS_DIR, YOUTUBE_IMAGES_DIR, YOUTUBE_AUDIO_DIR]:
directory.mkdir(parents=True, exist_ok=True)

View File

@@ -34,17 +34,12 @@ from .handlers import audio as audio_handlers
router = APIRouter(prefix="/youtube", tags=["youtube"])
logger = get_service_logger("api.youtube")
# Video output and image directories
# api/youtube/router.py -> youtube -> api -> backend -> root
base_dir = Path(__file__).resolve().parents[3]
DATA_MEDIA_DIR = base_dir / "workspace" / "media"
YOUTUBE_VIDEO_DIR = DATA_MEDIA_DIR / "youtube_videos"
YOUTUBE_AVATARS_DIR = DATA_MEDIA_DIR / "youtube_avatars"
YOUTUBE_IMAGES_DIR = DATA_MEDIA_DIR / "youtube_images"
# Ensure directories exist
for directory in [YOUTUBE_VIDEO_DIR, YOUTUBE_AVATARS_DIR, YOUTUBE_IMAGES_DIR]:
directory.mkdir(parents=True, exist_ok=True)
from .paths import (
YOUTUBE_VIDEO_DIR,
YOUTUBE_AVATARS_DIR,
YOUTUBE_IMAGES_DIR,
ensure_youtube_media_dirs,
)
# Include sub-routers for avatar, images, and audio
router.include_router(avatar_handlers.router)

View File

@@ -16,15 +16,20 @@ from loguru import logger
import os
import time
from services.workspace_dirs import ensure_global_operational_dirs
# Logging configuration - Store in root workspace to avoid uvicorn reloads
# backend/middleware/logging_middleware.py -> middleware -> backend -> root
ROOT_DIR = Path(__file__).parent.parent.parent
LOG_BASE_DIR = ROOT_DIR / "workspace" / "logs"
os.makedirs(LOG_BASE_DIR, exist_ok=True)
# Ensure subdirectories exist
for subdir in ["seo_tools", "api_calls", "errors", "performance"]:
os.makedirs(f"{LOG_BASE_DIR}/{subdir}", exist_ok=True)
def ensure_logging_dirs() -> None:
"""Create log directories lazily at runtime."""
ensure_global_operational_dirs({"logs"})
LOG_BASE_DIR.mkdir(parents=True, exist_ok=True)
for subdir in ["seo_tools", "api_calls", "errors", "performance"]:
(LOG_BASE_DIR / subdir).mkdir(parents=True, exist_ok=True)
class PerformanceLogger:
"""Performance monitoring and logging for SEO operations"""
@@ -41,6 +46,7 @@ class PerformanceLogger:
"metadata": metadata or {}
}
ensure_logging_dirs()
await save_to_file(f"{LOG_BASE_DIR}/performance/metrics.jsonl", performance_log)
# Log performance warnings for slow operations
@@ -61,6 +67,7 @@ async def save_to_file(filepath: str, data: Dict[str, Any]) -> None:
"""
try:
# Ensure directory exists
ensure_logging_dirs()
Path(filepath).parent.mkdir(parents=True, exist_ok=True)
# Convert data to JSON string

View File

@@ -17,6 +17,8 @@ import os
import tempfile
import asyncio
from services.workspace_dirs import ensure_global_operational_dirs
# Import services
from services.llm_providers.main_text_generation import llm_text_gen
from services.seo_tools.meta_description_service import MetaDescriptionService
@@ -37,7 +39,13 @@ router = APIRouter(prefix="/api/seo", tags=["AI SEO Tools"])
# Configuration for intelligent logging
LOG_DIR = "logs/seo_tools"
os.makedirs(LOG_DIR, exist_ok=True)
def ensure_seo_logging_dir() -> str:
"""Create SEO log directory at runtime (no import-time writes)."""
ensure_global_operational_dirs({"logs"})
os.makedirs(LOG_DIR, exist_ok=True)
return LOG_DIR
# Request/Response Models
class BaseResponse(BaseModel):

View File

@@ -14,6 +14,7 @@ from sqlalchemy.orm import Session
from sqlalchemy import text
from services.database import init_user_database
from services.workspace_dirs import ensure_user_workspace_dirs
class UserWorkspaceManager:
"""Manages user-specific workspaces and progressive setup."""
@@ -55,33 +56,11 @@ class UserWorkspaceManager:
"production_mode": True
}
# Create user-specific directories
# Format: workspaces/workspace_{user_id}
user_dir = self.user_workspaces_dir / f"workspace_{safe_user_id}"
user_dir.mkdir(parents=True, exist_ok=True)
# Create subdirectories
subdirs = [
"content",
"content/images",
"content/videos",
"content/audio",
"content/text",
"content/youtube", # Consolidated
"content/story", # Consolidated
"research",
"config",
"cache",
"exports",
"templates",
"database",
"db", # Requested 'db' folder
"media", # Requested 'media' folder
"data" # User specific data folder
]
for subdir in subdirs:
(user_dir / subdir).mkdir(parents=True, exist_ok=True)
# Create user-specific directories lazily via centralized helper
user_dir = ensure_user_workspace_dirs(
user_id,
capabilities={"core", "content", "research", "media", "assets"},
)
# Create user-specific configuration
config = self._create_user_config(user_id)
@@ -273,9 +252,8 @@ class UserWorkspaceManager:
def _setup_ai_services(self, user_id: str):
"""Set up AI services for the user."""
safe_user_id = self._sanitize_user_id(user_id)
# Create user-specific AI service configuration
user_dir = self.user_workspaces_dir / f"workspace_{safe_user_id}"
user_dir = ensure_user_workspace_dirs(user_id, capabilities={"ai_services"})
ai_config = user_dir / "config" / "ai_services.json"
ai_services = {
@@ -284,9 +262,6 @@ class UserWorkspaceManager:
"copilotkit": {"enabled": True, "assistant_type": "content"}
}
# Ensure config directory exists
ai_config.parent.mkdir(parents=True, exist_ok=True)
with open(ai_config, 'w') as f:
json.dump(ai_services, f, indent=2)
@@ -307,9 +282,8 @@ class UserWorkspaceManager:
def _setup_integrations(self, user_id: str):
"""Set up external integrations."""
safe_user_id = self._sanitize_user_id(user_id)
# Create integrations configuration
user_dir = self.user_workspaces_dir / f"workspace_{safe_user_id}"
user_dir = ensure_user_workspace_dirs(user_id, capabilities={"integrations"})
integrations_config = user_dir / "config" / "integrations.json"
integrations = {
@@ -318,28 +292,18 @@ class UserWorkspaceManager:
"wordpress": {"enabled": False, "connected": False}
}
# Ensure config directory exists
integrations_config.parent.mkdir(parents=True, exist_ok=True)
with open(integrations_config, 'w') as f:
json.dump(integrations, f, indent=2)
def _setup_complete_features(self, user_id: str):
"""Set up complete feature set."""
safe_user_id = self._sanitize_user_id(user_id)
# Create comprehensive workspace
user_dir = self.user_workspaces_dir / f"workspace_{safe_user_id}"
user_dir = ensure_user_workspace_dirs(user_id, capabilities={"core", "content", "research", "media", "assets"})
# Create additional directories for complete setup
complete_dirs = [
"ai_models",
"content_templates",
"export_templates",
"backup"
]
complete_dirs = ["ai_models", "content_templates", "export_templates", "backup"]
for dir_name in complete_dirs:
(user_dir / dir_name).mkdir(exist_ok=True)
(user_dir / dir_name).mkdir(parents=True, exist_ok=True)
# Create final configuration
final_config = {

View File

@@ -0,0 +1,80 @@
"""Workspace directory helpers.
Centralizes directory creation so API/service imports stay side-effect free.
"""
from pathlib import Path
from typing import Iterable, Optional, Set
from services.database import WORKSPACE_DIR
GLOBAL_OPERATIONAL_DIRS = {
"logs": Path("logs"),
"temp": Path("temp"),
}
USER_CAPABILITY_DIRS = {
"core": {
"config",
"cache",
"exports",
"templates",
"database",
"db",
"data",
},
"content": {
"content",
"content/images",
"content/videos",
"content/audio",
"content/text",
"content/youtube",
"content/story",
},
"research": {"research"},
"media": {"media"},
"assets": {"assets", "assets/avatars", "assets/voice_samples"},
"integrations": {"integrations"},
"ai_services": {"config"},
}
def _sanitize_user_id(user_id: str) -> str:
return "".join(c for c in user_id if c.isalnum() or c in ("-", "_"))
def ensure_global_operational_dirs(dir_names: Optional[Iterable[str]] = None) -> None:
"""Create only operational global directories (logs/temp), on demand."""
targets = set(dir_names or GLOBAL_OPERATIONAL_DIRS.keys())
for name in targets:
directory = GLOBAL_OPERATIONAL_DIRS.get(name)
if directory:
directory.mkdir(parents=True, exist_ok=True)
def ensure_user_workspace_dirs(user_id: str, capabilities: Optional[Iterable[str]] = None) -> Path:
"""Ensure user workspace directories required by capabilities.
Args:
user_id: tenant/user identifier.
capabilities: iterable of capability keys from USER_CAPABILITY_DIRS.
"""
safe_user_id = _sanitize_user_id(user_id)
user_dir = Path(WORKSPACE_DIR) / f"workspace_{safe_user_id}"
requested = set(capabilities or {"core"})
requested.add("core")
subdirs: Set[str] = set()
for capability in requested:
subdirs.update(USER_CAPABILITY_DIRS.get(capability, set()))
user_dir.mkdir(parents=True, exist_ok=True)
for subdir in sorted(subdirs):
(user_dir / subdir).mkdir(parents=True, exist_ok=True)
return user_dir

View File

@@ -0,0 +1,72 @@
"""Guardrail: startup-loaded modules must not write filesystem at import time."""
from __future__ import annotations
import ast
from pathlib import Path
STARTUP_MODULES = [
"backend/app.py",
"backend/alwrity_utils/router_manager.py",
"backend/routers/seo_tools.py",
"backend/middleware/logging_middleware.py",
"backend/api/youtube/router.py",
"backend/api/youtube/handlers/avatar.py",
"backend/api/youtube/handlers/images.py",
"backend/api/youtube/handlers/audio.py",
"backend/utils/media_utils.py",
]
def _is_forbidden_call(node: ast.Call) -> bool:
func = node.func
if isinstance(func, ast.Attribute):
if isinstance(func.value, ast.Name) and func.value.id == "os" and func.attr == "makedirs":
return True
if func.attr == "mkdir":
return True
return False
def _top_level_forbidden_calls(tree: ast.AST) -> list[tuple[int, str]]:
"""Return forbidden calls that execute during module import.
We intentionally do not flag calls inside function/class bodies, because those
are runtime operations and safe for this policy.
"""
violations: list[tuple[int, str]] = []
for statement in getattr(tree, "body", []):
# Skip function/class definitions: their bodies are not executed at import time.
if isinstance(statement, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
continue
nodes_to_visit = [statement]
while nodes_to_visit:
node = nodes_to_visit.pop()
# Do not descend into nested function/class definitions.
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
continue
if isinstance(node, ast.Call) and _is_forbidden_call(node):
violations.append((node.lineno, ast.unparse(node.func)))
nodes_to_visit.extend(ast.iter_child_nodes(node))
return violations
def test_no_import_time_mkdir_calls_in_startup_modules() -> None:
all_violations: list[str] = []
for rel in STARTUP_MODULES:
module_file = Path(rel)
source = module_file.read_text(encoding="utf-8")
tree = ast.parse(source)
violations = _top_level_forbidden_calls(tree)
for line, call in violations:
all_violations.append(f"{rel}:{line} -> {call}")
assert not all_violations, "Top-level filesystem writes found:\n" + "\n".join(all_violations)

View File

@@ -27,9 +27,10 @@ YOUTUBE_IMAGES_DIR = DATA_MEDIA_DIR / "youtube_images"
PODCAST_IMAGES_DIR = DATA_MEDIA_DIR / "podcast_images"
PODCAST_AVATARS_DIR = PODCAST_IMAGES_DIR / "avatars"
# Ensure directories exist
for directory in [YOUTUBE_AVATARS_DIR, YOUTUBE_IMAGES_DIR, PODCAST_IMAGES_DIR, PODCAST_AVATARS_DIR]:
directory.mkdir(parents=True, exist_ok=True)
def ensure_media_dirs() -> None:
"""Create shared media directories at runtime."""
for directory in [YOUTUBE_AVATARS_DIR, YOUTUBE_IMAGES_DIR, PODCAST_IMAGES_DIR, PODCAST_AVATARS_DIR]:
directory.mkdir(parents=True, exist_ok=True)
def resolve_media_path(media_url_or_path: str) -> Optional[Path]:
@@ -46,6 +47,8 @@ def resolve_media_path(media_url_or_path: str) -> Optional[Path]:
"""
if not media_url_or_path:
return None
ensure_media_dirs()
try:
# Extract filename from URL/path