Fix podcast-only mode: skip seo_analyzer imports to prevent bs4/beautifulsoup4 loading

- Conditionally import component_logic_router only when NOT in podcast mode - Conditionally import seo_tools_router only when NOT in podcast mode - Both use seo_analyzer which requires beautifulsoup4 - Also added debug logging to render-build.sh to verify ALWRITY_ENABLED_FEATURES - Added beautifulsoup4 to requirements-podcast.txt (was missing)
2026-04-06 13:16:32 +05:30
parent 03d43fb54b
commit 0d0d964605
4 changed files with 222 additions and 182 deletions
--- a/backend/app.py
+++ b/backend/app.py
@@ -119,7 +119,9 @@ setup_clean_logging()
 # Import middleware
 from middleware.auth_middleware import get_current_user
-# Import component logic endpoints (needs OnboardingSession, so import after models)
+# Import component logic endpoints (skip in podcast-only mode - uses seo_analyzer)
 component_logic_router = None
 if not PODCAST_ONLY_DEMO_MODE:
    from api.component_logic import router as component_logic_router
 # Import subscription API endpoints
@@ -130,7 +132,9 @@ step3_routes = None
 if not PODCAST_ONLY_DEMO_MODE:
    from api.onboarding_utils.step3_routes import router as step3_routes
-# Import SEO tools router
+# Import SEO tools router (skip in podcast-only mode - uses seo_analyzer)
 seo_tools_router = None
 if not PODCAST_ONLY_DEMO_MODE:
    from routers.seo_tools import router as seo_tools_router
 # Skip Facebook Writer, LinkedIn, and other non-podcast routes in podcast-only mode
@@ -162,8 +166,11 @@ else:
    hallucination_detector_router = None
    writing_assistant_router = None
-# Import research configuration router
+# Import research configuration router (skip in podcast-only mode)
 if not is_podcast_only_demo_mode():
    from api.research_config import router as research_config_router
 else:
    research_config_router = None
 # Import user data endpoints
 # Import content planning endpoints (skip in podcast-only mode)
@@ -174,7 +181,11 @@ else:
    content_planning_router = None
    strategy_copilot_router = None
 # Import user data endpoints (skip in podcast-only mode to save memory)
 if not is_podcast_only_demo_mode():
    from api.user_data import router as user_data_router
 else:
    user_data_router = None
 # Import database service
 from services.database import close_database
@@ -192,7 +203,8 @@ if not is_podcast_only_demo_mode():
 else:
    oauth_token_monitoring_router = None
-# Import SEO Dashboard endpoints
+# Import SEO Dashboard endpoints (skip in podcast-only mode to save memory)
 if not is_podcast_only_demo_mode():
    from api.seo_dashboard import (
        get_seo_dashboard_data,
        get_seo_health_score,
@@ -222,6 +234,34 @@ from api.seo_dashboard import (
        get_sif_indexing_health,
        get_onboarding_task_health,
    )
 else:
    get_seo_dashboard_data = None
    get_seo_health_score = None
    get_seo_metrics = None
    get_platform_status = None
    get_ai_insights = None
    seo_dashboard_health_check = None
    analyze_seo_comprehensive = None
    analyze_seo_full = None
    get_seo_metrics_detailed = None
    get_analysis_summary = None
    batch_analyze_urls = None
    SEOAnalysisRequest = None
    get_seo_dashboard_overview = None
    get_gsc_raw_data = None
    get_bing_raw_data = None
    get_competitive_insights = None
    get_deep_competitor_analysis = None
    run_strategic_insights = None
    get_strategic_insights_history = None
    refresh_analytics_data = None
    analyze_urls_ai = None
    AnalyzeURLsRequest = None
    get_analyzed_pages = None
    get_semantic_health = None
    get_semantic_cache_stats = None
    get_sif_indexing_health = None
    get_onboarding_task_health = None
 # Initialize FastAPI app
@@ -413,7 +453,8 @@ router_group_status["assets_serving"] = {
    "reason": "Required for podcast media assets",
 }
-# SEO Dashboard endpoints
+# SEO Dashboard endpoints (skip in podcast-only mode)
 if not is_podcast_only_demo_mode():
    @app.get("/api/seo-dashboard/data")
    async def seo_dashboard_data():
        """Get complete SEO dashboard data."""
@@ -439,7 +480,6 @@ async def seo_insights():
        """Get AI insights."""
        return await get_ai_insights()
 # New SEO Dashboard endpoints with real data
    @app.get("/api/seo-dashboard/overview")
    async def seo_dashboard_overview_endpoint(current_user: dict = Depends(get_current_user), site_url: str = None):
        """Get comprehensive SEO dashboard overview with real GSC/Bing data."""
@@ -481,7 +521,6 @@ async def refresh_analytics_data_endpoint(current_user: dict = Depends(get_curre
        return await refresh_analytics_data(current_user, site_url)
    @app.get("/api/seo-dashboard/onboarding-task-health")
    async def onboarding_task_health_endpoint(current_user: dict = Depends(get_current_user), site_url: str = None):
        """Get consolidated health for onboarding-scheduled SEO tasks."""
@@ -492,7 +531,6 @@ async def seo_dashboard_health():
        """Health check for SEO dashboard."""
        return await seo_dashboard_health_check()
 # Phase 2B: Semantic health monitoring endpoint (24-hour polling)
    @app.get("/api/seo-dashboard/semantic-health")
    async def semantic_health_endpoint(current_user: dict = Depends(get_current_user)):
        """
--- a/backend/render-build.sh
+++ b/backend/render-build.sh
@@ -9,8 +9,9 @@ python -m pip install --upgrade pip setuptools wheel
 # 2. Install requirements based on mode
 echo "📦 Checking ALWRITY_ENABLED_FEATURES..."
 ENABLED_FEATURES="${ALWRITY_ENABLED_FEATURES:-all}"
 echo "DEBUG: ENABLED_FEATURES='$ENABLED_FEATURES'"
-if [[ "$ENABLED_FEATURES" == *"podcast"* ]]; then
+if [[ "$ENABLED_FEATURES" == "podcast" ]]; then
    echo "🔊 Podcast-only mode: Installing lean requirements..."
    python -m pip install --no-cache-dir -r requirements-podcast.txt --only-binary :all: --retries 10 --timeout 120
 else
--- a/backend/requirements-podcast.txt
+++ b/backend/requirements-podcast.txt
@@ -38,6 +38,7 @@ exa-py==1.9.1
 # Text processing (minimal)
 markdown>=3.5.0
 beautifulsoup4>=4.12.0
 # Data processing (numpy needed for moviepy)
 numpy>=1.24.0
--- a/backend/start_alwrity_backend.py
+++ b/backend/start_alwrity_backend.py
@@ -100,7 +100,7 @@ def bootstrap_linguistic_models() -> BootstrapResult:
    verbose = os.getenv("ALWRITY_VERBOSE", "false").lower() == "true"
    if verbose:
-        print("🔍 Bootstrapping linguistic models...")
+        print("[DEBUG] Bootstrapping linguistic models...")
    # Check and download spaCy model
    try:
@@ -108,7 +108,7 @@ def bootstrap_linguistic_models() -> BootstrapResult:
        try:
            nlp = spacy.load("en_core_web_sm")
            if verbose:
-                print("   ✅ spaCy model 'en_core_web_sm' available")
+                print("   [OK] spaCy model 'en_core_web_sm' available")
        except OSError:
            if verbose:
                print("   ⚠️  spaCy model 'en_core_web_sm' not found, downloading...")
@@ -117,10 +117,10 @@ def bootstrap_linguistic_models() -> BootstrapResult:
                    sys.executable, "-m", "spacy", "download", "en_core_web_sm"
                ])
                if verbose:
-                    print("   ✅ spaCy model downloaded successfully")
+                    print("   [OK] spaCy model downloaded successfully")
            except subprocess.CalledProcessError as e:
                if verbose:
-                    print(f"   ❌ Failed to download spaCy model: {e}")
+                    print(f"   [FAIL] Failed to download spaCy model: {e}")
                    print("   Please run: python -m spacy download en_core_web_sm")
                return BootstrapResult(name="linguistic_models", success=False, skipped=False, reason="spacy_download_failed")
    except ImportError:
@@ -140,14 +140,14 @@ def bootstrap_linguistic_models() -> BootstrapResult:
            try:
                nltk.data.find(path)
                if verbose:
-                    print(f"   ✅ NLTK {data_package} available")
+                    print(f"   [OK] NLTK {data_package} available")
            except LookupError:
                if verbose:
                    print(f"   ⚠️  NLTK {data_package} not found, downloading...")
                try:
                    nltk.download(data_package, quiet=True)
                    if verbose:
-                        print(f"   ✅ NLTK {data_package} downloaded")
+                        print(f"   [OK] NLTK {data_package} downloaded")
                except Exception as e:
                    if verbose:
                        print(f"   ⚠️  Failed to download {data_package}: {e}")
@@ -155,7 +155,7 @@ def bootstrap_linguistic_models() -> BootstrapResult:
                        try:
                            nltk.download('punkt', quiet=True)
                            if verbose:
-                                print(f"   ✅ NLTK punkt (fallback) downloaded")
+                                print(f"   [OK] NLTK punkt (fallback) downloaded")
                        except:
                            pass
    except ImportError:
@@ -163,7 +163,7 @@ def bootstrap_linguistic_models() -> BootstrapResult:
            print("   ⚠️  NLTK not installed - skipping")
    if verbose:
-        print("✅ Linguistic model bootstrap complete")
+        print("[OK] Linguistic model bootstrap complete")
    return BootstrapResult(name="linguistic_models", success=True, skipped=False)
@@ -207,7 +207,7 @@ def bootstrap_local_llm_models() -> BootstrapResult:
            # This checks cache and downloads if missing
            snapshot_download(repo_id=target_model, repo_type="model")
            if verbose:
-                print(f"   ✅ Local LLM '{target_model}' available")
+                print(f"   [OK] Local LLM '{target_model}' available")
        except Exception as e:
            if verbose:
                print(f"   ⚠️  Failed to download/check local LLM: {e}")
@@ -244,7 +244,7 @@ if __name__ == "__main__":
    features_str = ",".join(sorted(enabled_features))
    os.environ["ALWRITY_ENABLED_FEATURES"] = features_str
-    print(f"\n📋 Enabled features: {features_str}")
+    print(f"\n[OK] Enabled features: {features_str}")
    if should_bootstrap_linguistic_models():
        result = bootstrap_linguistic_models()
@@ -252,7 +252,7 @@ if __name__ == "__main__":
    else:
        verbose = os.getenv("ALWRITY_VERBOSE", "false").lower() == "true"
        if verbose:
-            print("⏭️  Skipping linguistic model bootstrap (profile-gated)")
+            print("[SKIP]  Skipping linguistic model bootstrap (profile-gated)")
        BOOTSTRAP_RESULTS.append(BootstrapResult(name="linguistic_models", success=True, skipped=True, reason="profile_gated"))
    if should_bootstrap_local_llm_models():
@@ -261,7 +261,7 @@ if __name__ == "__main__":
    else:
        verbose = os.getenv("ALWRITY_VERBOSE", "false").lower() == "true"
        if verbose:
-            print("⏭️  Skipping local LLM model bootstrap (feature-gated)")
+            print("[SKIP]  Skipping local LLM model bootstrap (feature-gated)")
        BOOTSTRAP_RESULTS.append(BootstrapResult(name="local_llm_models", success=True, skipped=True, reason="feature_gated"))
    summary = {
@@ -270,9 +270,9 @@ if __name__ == "__main__":
    }
    os.environ["ALWRITY_BOOTSTRAP_SUMMARY"] = json.dumps(summary)
-    print(f"\n📋 Bootstrap Summary:")
+    print(f"\n[INFO] Bootstrap Summary:")
    for r in BOOTSTRAP_RESULTS:
-        status = "⏭️  Skipped" if r.skipped else ("✅ Enabled" if r.success else "❌ Failed")
+        status = "[SKIP]  Skipped" if r.skipped else ("[OK] Enabled" if r.success else "[FAIL] Failed")
        print(f"   {r.name}: {status}" + (f" ({r.reason})" if r.reason else ""))
 # NOW import modular utilities (after bootstrap)
@@ -286,12 +286,12 @@ from alwrity_utils import (
 def start_backend(enable_reload=False, production_mode=False):
    """Start the backend server."""
-    print("🚀 Starting ALwrity Backend...")
+    print("==> Starting ALwrity Backend...")
    podcast_only_demo_mode = os.getenv("ALWRITY_PODCAST_ONLY_DEMO_MODE", os.getenv("PODCAST_ONLY_DEMO_MODE", "false")).lower() in {"1", "true", "yes", "on"}
    if podcast_only_demo_mode:
        print("\n" + "=" * 60)
-        print("🎙️  PODCAST-ONLY DEMO MODE ACTIVE")
+        print("==> PODCAST-ONLY DEMO MODE ACTIVE")
        print("   Non-podcast router groups are intentionally skipped.")
        print("=" * 60)
@@ -315,10 +315,10 @@ def start_backend(enable_reload=False, production_mode=False):
    # Set reload based on argument or environment variable
    if enable_reload and not production_mode:
        os.environ.setdefault("RELOAD", "true")
-        print("   🔄 Development mode: Auto-reload enabled")
+        print("   [DEV] Development mode: Auto-reload enabled")
    else:
        os.environ.setdefault("RELOAD", "false")
-        print("   🏭 Production mode: Auto-reload disabled")
+        print("   [PROD] Production mode: Auto-reload disabled")
    host = os.getenv("HOST", "0.0.0.0")
    port = int(os.getenv("PORT", "8000"))
@@ -326,9 +326,9 @@ def start_backend(enable_reload=False, production_mode=False):
    print(f"[DEBUG] Bind prepared - host={host}, port={port}, reload={reload}", flush=True)
    print(f"[DEBUG] ENV check - ALWRITY_ENABLED_FEATURES={os.getenv('ALWRITY_ENABLED_FEATURES')}", flush=True)
-    print(f"   📍 Host: {host}", flush=True)
+    print(f"   ==> Host: {host}", flush=True)
-    print(f"   🔌 Port: {port}", flush=True)
+    print(f"   ==> Port: {port}", flush=True)
-    print(f"   🔄 Reload: {reload}", flush=True)
+    print(f"   [DEV] Reload: {reload}", flush=True)
    print(f"[DEBUG] About to import app module...", flush=True)
    print("[DEBUG] >>> START APP IMPORT <<<", flush=True)
@@ -342,7 +342,7 @@ def start_backend(enable_reload=False, production_mode=False):
        # Note: Database already initialized by DatabaseSetup in main()
-        print("\n🌐 ALwrity Backend Server", flush=True)
+        print("\n[WORLD] ALwrity Backend Server", flush=True)
        print("=" * 50, flush=True)
        print(f"   📖 API Documentation: http://localhost:{os.getenv('PORT', '8000')}/api/docs", flush=True)
        print(f"   🔍 Health Check: http://localhost:{os.getenv('PORT', '8000')}/health", flush=True)
@@ -501,12 +501,12 @@ def main():
        "Starting server"
    ]
-    print("🔧 Initializing ALwrity...")
+    print("==> Initializing ALwrity...")
    # Apply production optimizations if needed
    if production_mode:
        if not production_optimizer.apply_production_optimizations():
-            print("❌ Production optimization failed")
+            print("[FAIL] Production optimization failed")
            return False
    # Step 1: Dependencies
@@ -515,11 +515,11 @@ def main():
    if not critical_ok:
        print("installing...", end=" ", flush=True)
        if not dependency_manager.install_requirements():
-            print("❌ Failed")
+            print("[FAIL] Failed")
            return False
-        print("✅ Done")
+        print("[OK] Done")
    else:
-        print("✅ Done")
+        print("[OK] Done")
    # Check optional dependencies (non-critical) - only in verbose mode
    if verbose_mode:
@@ -528,24 +528,24 @@ def main():
    # Step 2: Environment
    print(f"   🔧 {setup_steps[1]}...", end=" ", flush=True)
    if not environment_setup.setup_directories():
-        print("❌ Directory setup failed")
+        print("[FAIL] Directory setup failed")
        return False
    if not environment_setup.setup_environment_variables():
-        print("❌ Environment setup failed")
+        print("[FAIL] Environment setup failed")
        return False
    # Create .env file only in development
    if not production_mode:
        environment_setup.create_env_file()
-    print("✅ Done")
+    print("[OK] Done")
    # Step 3: Database
    print(f"   📊 {setup_steps[2]}...", end=" ", flush=True)
    if not database_setup.setup_essential_tables():
        print("⚠️  Issues detected, continuing...")
    else:
-        print("✅ Done")
+        print("[OK] Done")
    # Setup advanced features in development, verify in all modes
    if not production_mode: