diff --git a/backend/alwrity_utils/environment_setup.py b/backend/alwrity_utils/environment_setup.py index 404ffbba..35e2ba0d 100644 --- a/backend/alwrity_utils/environment_setup.py +++ b/backend/alwrity_utils/environment_setup.py @@ -7,6 +7,8 @@ import os from pathlib import Path from typing import List, Dict, Any +from services.workspace_dirs import ensure_global_operational_dirs + class EnvironmentSetup: """Manages environment setup for ALwrity backend.""" @@ -15,16 +17,11 @@ class EnvironmentSetup: self.production_mode = production_mode # Use safer directory paths that don't conflict with deployment platforms if production_mode: - # In production, use temp directories or skip directory creation - self.required_directories = [] + # In production, only create operational directories + self.required_directories = ["logs", "temp"] else: - # In development, use local directories - self.required_directories = [ - "lib/workspace/alwrity_content", - "lib/workspace/alwrity_web_research", - "lib/workspace/alwrity_prompts", - "lib/workspace/alwrity_config" - ] + # In development, only create operational directories + self.required_directories = ["logs", "temp"] def setup_directories(self) -> bool: """Create necessary directories for ALwrity.""" @@ -39,15 +36,15 @@ class EnvironmentSetup: print(" ⚠️ Skipping directory creation in production mode") return True - for directory in self.required_directories: - try: - Path(directory).mkdir(parents=True, exist_ok=True) - if verbose: + try: + ensure_global_operational_dirs(self.required_directories) + if verbose: + for directory in self.required_directories: print(f" ✅ Created: {directory}") - except Exception as e: - if verbose: - print(f" ❌ Failed to create {directory}: {e}") - return False + except Exception as e: + if verbose: + print(f" ❌ Failed to create operational directories: {e}") + return False if verbose: print("✅ All directories created successfully") diff --git a/backend/api/youtube/handlers/audio.py b/backend/api/youtube/handlers/audio.py index 301c0e69..84e8c3ff 100644 --- a/backend/api/youtube/handlers/audio.py +++ b/backend/api/youtube/handlers/audio.py @@ -12,17 +12,13 @@ from api.story_writer.utils.auth import require_authenticated_user from utils.asset_tracker import save_asset_to_library from models.story_models import StoryAudioResult from services.story_writer.audio_generation_service import StoryAudioGenerationService -from pathlib import Path from utils.logger_utils import get_service_logger router = APIRouter(tags=["youtube-audio"]) logger = get_service_logger("api.youtube.audio") # Audio output directory -# api/youtube/handlers/audio.py -> handlers -> youtube -> api -> backend -> root -base_dir = Path(__file__).resolve().parents[4] -YOUTUBE_AUDIO_DIR = base_dir / "workspace" / "media" / "youtube_audio" -YOUTUBE_AUDIO_DIR.mkdir(parents=True, exist_ok=True) +from ..paths import YOUTUBE_AUDIO_DIR, ensure_youtube_media_dirs # Initialize audio service audio_service = StoryAudioGenerationService(output_dir=str(YOUTUBE_AUDIO_DIR)) @@ -266,6 +262,7 @@ async def generate_youtube_scene_audio( Similar to Podcast's audio generation endpoint. """ user_id = require_authenticated_user(current_user) + ensure_youtube_media_dirs(user_id) if not request.text or not request.text.strip(): raise HTTPException(status_code=400, detail="Text is required") diff --git a/backend/api/youtube/handlers/avatar.py b/backend/api/youtube/handlers/avatar.py index 2fe18747..8fe63461 100644 --- a/backend/api/youtube/handlers/avatar.py +++ b/backend/api/youtube/handlers/avatar.py @@ -1,6 +1,5 @@ """YouTube Creator avatar upload and AI optimization handlers.""" -from pathlib import Path import uuid from typing import Dict, Any, Optional @@ -18,12 +17,7 @@ from utils.logger_utils import get_service_logger router = APIRouter(prefix="/avatar", tags=["youtube-avatar"]) logger = get_service_logger("api.youtube.avatar") -# Directories -# api/youtube/handlers/avatar.py -> handlers -> youtube -> api -> backend -> root -base_dir = Path(__file__).parent.parent.parent.parent.parent -DATA_MEDIA_DIR = base_dir / "data" / "media" -YOUTUBE_AVATARS_DIR = DATA_MEDIA_DIR / "youtube_avatars" -YOUTUBE_AVATARS_DIR.mkdir(parents=True, exist_ok=True) +from ..paths import YOUTUBE_AVATARS_DIR, ensure_youtube_media_dirs def require_authenticated_user(current_user: Dict[str, Any]) -> str: @@ -256,6 +250,7 @@ async def upload_youtube_avatar( ): """Upload a YouTube creator avatar image.""" user_id = require_authenticated_user(current_user) + ensure_youtube_media_dirs(user_id) if not file: raise HTTPException(status_code=400, detail="No file uploaded") @@ -328,6 +323,7 @@ async def make_avatar_presentable( Uses AI image editing with enhanced prompts to optimize the uploaded photo. """ user_id = require_authenticated_user(current_user) + ensure_youtube_media_dirs(user_id) try: avatar_bytes = _load_youtube_image_bytes(avatar_url) @@ -488,6 +484,7 @@ async def generate_creator_avatar( the video type, audience, tone, and brand style. """ user_id = require_authenticated_user(current_user) + ensure_youtube_media_dirs(user_id) try: return await _generate_avatar_from_context( @@ -518,6 +515,7 @@ async def regenerate_creator_avatar( to provide variation while maintaining the same optimization based on plan data. """ user_id = require_authenticated_user(current_user) + ensure_youtube_media_dirs(user_id) try: # Parse video plan to extract context diff --git a/backend/api/youtube/handlers/images.py b/backend/api/youtube/handlers/images.py index 8403ab94..f0ac712b 100644 --- a/backend/api/youtube/handlers/images.py +++ b/backend/api/youtube/handlers/images.py @@ -1,6 +1,6 @@ +from pathlib import Path """YouTube Creator scene image generation handlers.""" -from pathlib import Path from typing import Dict, Any, Optional import uuid from concurrent.futures import ThreadPoolExecutor @@ -23,13 +23,7 @@ from ..task_manager import task_manager router = APIRouter(tags=["youtube-image"]) logger = get_service_logger("api.youtube.image") -# Directories -# api/youtube/handlers/images.py -> handlers -> youtube -> api -> backend -> root -base_dir = Path(__file__).parent.parent.parent.parent.parent -DATA_MEDIA_DIR = base_dir / "data" / "media" -YOUTUBE_IMAGES_DIR = DATA_MEDIA_DIR / "youtube_images" -YOUTUBE_IMAGES_DIR.mkdir(parents=True, exist_ok=True) -YOUTUBE_AVATARS_DIR = DATA_MEDIA_DIR / "youtube_avatars" +from ..paths import YOUTUBE_IMAGES_DIR, YOUTUBE_AVATARS_DIR, ensure_youtube_media_dirs # Thread pool for background image generation _image_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="youtube_image") @@ -102,6 +96,7 @@ async def generate_youtube_scene_image( """Generate a YouTube scene image with background task processing.""" logger.info(f"[YouTube] Image generation request received: scene='{request.scene_title}', user={current_user.get('id')}") user_id = require_authenticated_user(current_user) + ensure_youtube_media_dirs(user_id) logger.info(f"[YouTube] User authenticated: {user_id}") if not request.scene_title: @@ -312,7 +307,6 @@ def _execute_image_generation_task(task_id: str, request_data: dict, user_id: st image_metadata = _save_scene_image(image_bytes, request.scene_id) # Verify file was saved correctly - from pathlib import Path saved_path = Path(image_metadata["image_path"]) if not saved_path.exists() or saved_path.stat().st_size == 0: raise IOError(f"Image file was not saved correctly: {saved_path}") diff --git a/backend/api/youtube/paths.py b/backend/api/youtube/paths.py new file mode 100644 index 00000000..6c121975 --- /dev/null +++ b/backend/api/youtube/paths.py @@ -0,0 +1,21 @@ +"""Centralized YouTube media paths and runtime directory creation.""" + +from pathlib import Path +from typing import Iterable, Optional + +from services.workspace_dirs import ensure_user_workspace_dirs + + +BASE_DIR = Path(__file__).resolve().parents[3] +DATA_MEDIA_DIR = BASE_DIR / "workspace" / "media" +YOUTUBE_VIDEO_DIR = DATA_MEDIA_DIR / "youtube_videos" +YOUTUBE_AVATARS_DIR = DATA_MEDIA_DIR / "youtube_avatars" +YOUTUBE_IMAGES_DIR = DATA_MEDIA_DIR / "youtube_images" +YOUTUBE_AUDIO_DIR = DATA_MEDIA_DIR / "youtube_audio" + + +def ensure_youtube_media_dirs(user_id: str, capabilities: Optional[Iterable[str]] = None) -> None: + """Ensure YouTube-related media directories at request/runtime.""" + ensure_user_workspace_dirs(user_id, capabilities=capabilities or {"media", "content"}) + for directory in [YOUTUBE_VIDEO_DIR, YOUTUBE_AVATARS_DIR, YOUTUBE_IMAGES_DIR, YOUTUBE_AUDIO_DIR]: + directory.mkdir(parents=True, exist_ok=True) diff --git a/backend/api/youtube/router.py b/backend/api/youtube/router.py index 28e22a0a..9a6ea9d1 100644 --- a/backend/api/youtube/router.py +++ b/backend/api/youtube/router.py @@ -34,17 +34,12 @@ from .handlers import audio as audio_handlers router = APIRouter(prefix="/youtube", tags=["youtube"]) logger = get_service_logger("api.youtube") -# Video output and image directories -# api/youtube/router.py -> youtube -> api -> backend -> root -base_dir = Path(__file__).resolve().parents[3] -DATA_MEDIA_DIR = base_dir / "workspace" / "media" -YOUTUBE_VIDEO_DIR = DATA_MEDIA_DIR / "youtube_videos" -YOUTUBE_AVATARS_DIR = DATA_MEDIA_DIR / "youtube_avatars" -YOUTUBE_IMAGES_DIR = DATA_MEDIA_DIR / "youtube_images" - -# Ensure directories exist -for directory in [YOUTUBE_VIDEO_DIR, YOUTUBE_AVATARS_DIR, YOUTUBE_IMAGES_DIR]: - directory.mkdir(parents=True, exist_ok=True) +from .paths import ( + YOUTUBE_VIDEO_DIR, + YOUTUBE_AVATARS_DIR, + YOUTUBE_IMAGES_DIR, + ensure_youtube_media_dirs, +) # Include sub-routers for avatar, images, and audio router.include_router(avatar_handlers.router) diff --git a/backend/middleware/logging_middleware.py b/backend/middleware/logging_middleware.py index 1490b41e..baaf6254 100644 --- a/backend/middleware/logging_middleware.py +++ b/backend/middleware/logging_middleware.py @@ -16,15 +16,20 @@ from loguru import logger import os import time +from services.workspace_dirs import ensure_global_operational_dirs + # Logging configuration - Store in root workspace to avoid uvicorn reloads # backend/middleware/logging_middleware.py -> middleware -> backend -> root ROOT_DIR = Path(__file__).parent.parent.parent LOG_BASE_DIR = ROOT_DIR / "workspace" / "logs" -os.makedirs(LOG_BASE_DIR, exist_ok=True) -# Ensure subdirectories exist -for subdir in ["seo_tools", "api_calls", "errors", "performance"]: - os.makedirs(f"{LOG_BASE_DIR}/{subdir}", exist_ok=True) + +def ensure_logging_dirs() -> None: + """Create log directories lazily at runtime.""" + ensure_global_operational_dirs({"logs"}) + LOG_BASE_DIR.mkdir(parents=True, exist_ok=True) + for subdir in ["seo_tools", "api_calls", "errors", "performance"]: + (LOG_BASE_DIR / subdir).mkdir(parents=True, exist_ok=True) class PerformanceLogger: """Performance monitoring and logging for SEO operations""" @@ -41,6 +46,7 @@ class PerformanceLogger: "metadata": metadata or {} } + ensure_logging_dirs() await save_to_file(f"{LOG_BASE_DIR}/performance/metrics.jsonl", performance_log) # Log performance warnings for slow operations @@ -61,6 +67,7 @@ async def save_to_file(filepath: str, data: Dict[str, Any]) -> None: """ try: # Ensure directory exists + ensure_logging_dirs() Path(filepath).parent.mkdir(parents=True, exist_ok=True) # Convert data to JSON string diff --git a/backend/routers/seo_tools.py b/backend/routers/seo_tools.py index 0b7e3181..9c149b74 100644 --- a/backend/routers/seo_tools.py +++ b/backend/routers/seo_tools.py @@ -17,6 +17,8 @@ import os import tempfile import asyncio +from services.workspace_dirs import ensure_global_operational_dirs + # Import services from services.llm_providers.main_text_generation import llm_text_gen from services.seo_tools.meta_description_service import MetaDescriptionService @@ -37,7 +39,13 @@ router = APIRouter(prefix="/api/seo", tags=["AI SEO Tools"]) # Configuration for intelligent logging LOG_DIR = "logs/seo_tools" -os.makedirs(LOG_DIR, exist_ok=True) + + +def ensure_seo_logging_dir() -> str: + """Create SEO log directory at runtime (no import-time writes).""" + ensure_global_operational_dirs({"logs"}) + os.makedirs(LOG_DIR, exist_ok=True) + return LOG_DIR # Request/Response Models class BaseResponse(BaseModel): diff --git a/backend/services/user_workspace_manager.py b/backend/services/user_workspace_manager.py index 6c404db3..09878012 100644 --- a/backend/services/user_workspace_manager.py +++ b/backend/services/user_workspace_manager.py @@ -14,6 +14,7 @@ from sqlalchemy.orm import Session from sqlalchemy import text from services.database import init_user_database +from services.workspace_dirs import ensure_user_workspace_dirs class UserWorkspaceManager: """Manages user-specific workspaces and progressive setup.""" @@ -55,33 +56,11 @@ class UserWorkspaceManager: "production_mode": True } - # Create user-specific directories - # Format: workspaces/workspace_{user_id} - user_dir = self.user_workspaces_dir / f"workspace_{safe_user_id}" - user_dir.mkdir(parents=True, exist_ok=True) - - # Create subdirectories - subdirs = [ - "content", - "content/images", - "content/videos", - "content/audio", - "content/text", - "content/youtube", # Consolidated - "content/story", # Consolidated - "research", - "config", - "cache", - "exports", - "templates", - "database", - "db", # Requested 'db' folder - "media", # Requested 'media' folder - "data" # User specific data folder - ] - - for subdir in subdirs: - (user_dir / subdir).mkdir(parents=True, exist_ok=True) + # Create user-specific directories lazily via centralized helper + user_dir = ensure_user_workspace_dirs( + user_id, + capabilities={"core", "content", "research", "media", "assets"}, + ) # Create user-specific configuration config = self._create_user_config(user_id) @@ -273,9 +252,8 @@ class UserWorkspaceManager: def _setup_ai_services(self, user_id: str): """Set up AI services for the user.""" - safe_user_id = self._sanitize_user_id(user_id) # Create user-specific AI service configuration - user_dir = self.user_workspaces_dir / f"workspace_{safe_user_id}" + user_dir = ensure_user_workspace_dirs(user_id, capabilities={"ai_services"}) ai_config = user_dir / "config" / "ai_services.json" ai_services = { @@ -284,9 +262,6 @@ class UserWorkspaceManager: "copilotkit": {"enabled": True, "assistant_type": "content"} } - # Ensure config directory exists - ai_config.parent.mkdir(parents=True, exist_ok=True) - with open(ai_config, 'w') as f: json.dump(ai_services, f, indent=2) @@ -307,9 +282,8 @@ class UserWorkspaceManager: def _setup_integrations(self, user_id: str): """Set up external integrations.""" - safe_user_id = self._sanitize_user_id(user_id) # Create integrations configuration - user_dir = self.user_workspaces_dir / f"workspace_{safe_user_id}" + user_dir = ensure_user_workspace_dirs(user_id, capabilities={"integrations"}) integrations_config = user_dir / "config" / "integrations.json" integrations = { @@ -318,28 +292,18 @@ class UserWorkspaceManager: "wordpress": {"enabled": False, "connected": False} } - # Ensure config directory exists - integrations_config.parent.mkdir(parents=True, exist_ok=True) - with open(integrations_config, 'w') as f: json.dump(integrations, f, indent=2) def _setup_complete_features(self, user_id: str): """Set up complete feature set.""" - safe_user_id = self._sanitize_user_id(user_id) # Create comprehensive workspace - user_dir = self.user_workspaces_dir / f"workspace_{safe_user_id}" - + user_dir = ensure_user_workspace_dirs(user_id, capabilities={"core", "content", "research", "media", "assets"}) + # Create additional directories for complete setup - complete_dirs = [ - "ai_models", - "content_templates", - "export_templates", - "backup" - ] - + complete_dirs = ["ai_models", "content_templates", "export_templates", "backup"] for dir_name in complete_dirs: - (user_dir / dir_name).mkdir(exist_ok=True) + (user_dir / dir_name).mkdir(parents=True, exist_ok=True) # Create final configuration final_config = { diff --git a/backend/services/workspace_dirs.py b/backend/services/workspace_dirs.py new file mode 100644 index 00000000..182fc8d4 --- /dev/null +++ b/backend/services/workspace_dirs.py @@ -0,0 +1,80 @@ +"""Workspace directory helpers. + +Centralizes directory creation so API/service imports stay side-effect free. +""" + +from pathlib import Path +from typing import Iterable, Optional, Set + +from services.database import WORKSPACE_DIR + + +GLOBAL_OPERATIONAL_DIRS = { + "logs": Path("logs"), + "temp": Path("temp"), +} + + +USER_CAPABILITY_DIRS = { + "core": { + "config", + "cache", + "exports", + "templates", + "database", + "db", + "data", + }, + "content": { + "content", + "content/images", + "content/videos", + "content/audio", + "content/text", + "content/youtube", + "content/story", + }, + "research": {"research"}, + "media": {"media"}, + "assets": {"assets", "assets/avatars", "assets/voice_samples"}, + "integrations": {"integrations"}, + "ai_services": {"config"}, +} + + +def _sanitize_user_id(user_id: str) -> str: + return "".join(c for c in user_id if c.isalnum() or c in ("-", "_")) + + +def ensure_global_operational_dirs(dir_names: Optional[Iterable[str]] = None) -> None: + """Create only operational global directories (logs/temp), on demand.""" + targets = set(dir_names or GLOBAL_OPERATIONAL_DIRS.keys()) + for name in targets: + directory = GLOBAL_OPERATIONAL_DIRS.get(name) + if directory: + directory.mkdir(parents=True, exist_ok=True) + + +def ensure_user_workspace_dirs(user_id: str, capabilities: Optional[Iterable[str]] = None) -> Path: + """Ensure user workspace directories required by capabilities. + + Args: + user_id: tenant/user identifier. + capabilities: iterable of capability keys from USER_CAPABILITY_DIRS. + """ + safe_user_id = _sanitize_user_id(user_id) + user_dir = Path(WORKSPACE_DIR) / f"workspace_{safe_user_id}" + + requested = set(capabilities or {"core"}) + requested.add("core") + + subdirs: Set[str] = set() + for capability in requested: + subdirs.update(USER_CAPABILITY_DIRS.get(capability, set())) + + user_dir.mkdir(parents=True, exist_ok=True) + for subdir in sorted(subdirs): + (user_dir / subdir).mkdir(parents=True, exist_ok=True) + + return user_dir + diff --git a/backend/tests/test_no_import_time_mkdir.py b/backend/tests/test_no_import_time_mkdir.py new file mode 100644 index 00000000..2773e232 --- /dev/null +++ b/backend/tests/test_no_import_time_mkdir.py @@ -0,0 +1,72 @@ +"""Guardrail: startup-loaded modules must not write filesystem at import time.""" + +from __future__ import annotations + +import ast +from pathlib import Path + + +STARTUP_MODULES = [ + "backend/app.py", + "backend/alwrity_utils/router_manager.py", + "backend/routers/seo_tools.py", + "backend/middleware/logging_middleware.py", + "backend/api/youtube/router.py", + "backend/api/youtube/handlers/avatar.py", + "backend/api/youtube/handlers/images.py", + "backend/api/youtube/handlers/audio.py", + "backend/utils/media_utils.py", +] + + +def _is_forbidden_call(node: ast.Call) -> bool: + func = node.func + if isinstance(func, ast.Attribute): + if isinstance(func.value, ast.Name) and func.value.id == "os" and func.attr == "makedirs": + return True + if func.attr == "mkdir": + return True + return False + + +def _top_level_forbidden_calls(tree: ast.AST) -> list[tuple[int, str]]: + """Return forbidden calls that execute during module import. + + We intentionally do not flag calls inside function/class bodies, because those + are runtime operations and safe for this policy. + """ + violations: list[tuple[int, str]] = [] + + for statement in getattr(tree, "body", []): + # Skip function/class definitions: their bodies are not executed at import time. + if isinstance(statement, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): + continue + + nodes_to_visit = [statement] + while nodes_to_visit: + node = nodes_to_visit.pop() + + # Do not descend into nested function/class definitions. + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): + continue + + if isinstance(node, ast.Call) and _is_forbidden_call(node): + violations.append((node.lineno, ast.unparse(node.func))) + + nodes_to_visit.extend(ast.iter_child_nodes(node)) + + return violations + + +def test_no_import_time_mkdir_calls_in_startup_modules() -> None: + all_violations: list[str] = [] + + for rel in STARTUP_MODULES: + module_file = Path(rel) + source = module_file.read_text(encoding="utf-8") + tree = ast.parse(source) + violations = _top_level_forbidden_calls(tree) + for line, call in violations: + all_violations.append(f"{rel}:{line} -> {call}") + + assert not all_violations, "Top-level filesystem writes found:\n" + "\n".join(all_violations) diff --git a/backend/utils/media_utils.py b/backend/utils/media_utils.py index 57a07234..d97cf90b 100644 --- a/backend/utils/media_utils.py +++ b/backend/utils/media_utils.py @@ -27,9 +27,10 @@ YOUTUBE_IMAGES_DIR = DATA_MEDIA_DIR / "youtube_images" PODCAST_IMAGES_DIR = DATA_MEDIA_DIR / "podcast_images" PODCAST_AVATARS_DIR = PODCAST_IMAGES_DIR / "avatars" -# Ensure directories exist -for directory in [YOUTUBE_AVATARS_DIR, YOUTUBE_IMAGES_DIR, PODCAST_IMAGES_DIR, PODCAST_AVATARS_DIR]: - directory.mkdir(parents=True, exist_ok=True) +def ensure_media_dirs() -> None: + """Create shared media directories at runtime.""" + for directory in [YOUTUBE_AVATARS_DIR, YOUTUBE_IMAGES_DIR, PODCAST_IMAGES_DIR, PODCAST_AVATARS_DIR]: + directory.mkdir(parents=True, exist_ok=True) def resolve_media_path(media_url_or_path: str) -> Optional[Path]: @@ -46,6 +47,8 @@ def resolve_media_path(media_url_or_path: str) -> Optional[Path]: """ if not media_url_or_path: return None + + ensure_media_dirs() try: # Extract filename from URL/path