Research component integration, Copilotkit implementation, SEO copilotkit implementation, Wix SEO metadata complete, Wix SEO metadata review

2025-11-03 16:01:44 +05:30
parent de4328175d
commit e69107b07c
94 changed files with 9748 additions and 1565 deletions
--- a/backend/services/blog_writer/outline/outline_generator.py
+++ b/backend/services/blog_writer/outline/outline_generator.py
@@ -42,10 +42,20 @@ class OutlineGenerator:
        self.response_processor = ResponseProcessor()
        self.parallel_processor = ParallelProcessor(self.source_mapper, self.grounding_engine)
    
-    async def generate(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
+    async def generate(self, request: BlogOutlineRequest, user_id: str) -> BlogOutlineResponse:
        """
-        Generate AI-powered outline using research results
+        Generate AI-powered outline using research results.
+        
+        Args:
+            request: Outline generation request with research data
+            user_id: User ID (required for subscription checks and usage tracking)
+            
+        Raises:
+            ValueError: If user_id is not provided
        """
+        if not user_id:
+            raise ValueError("user_id is required for outline generation (subscription checks and usage tracking)")
+        
        # Extract research insights
        research = request.research
        primary_keywords = research.keyword_analysis.get('primary', [])
@@ -68,15 +78,15 @@ class OutlineGenerator:
        # Define schema with proper property ordering (critical for Gemini API)
        outline_schema = self.prompt_builder.get_outline_schema()
        
-        # Generate outline using structured JSON response with retry logic
-        outline_data = await self.response_processor.generate_with_retry(outline_prompt, outline_schema)
+        # Generate outline using structured JSON response with retry logic (user_id required)
+        outline_data = await self.response_processor.generate_with_retry(outline_prompt, outline_schema, user_id)
        
        # Convert to BlogOutlineSection objects
        outline_sections = self.response_processor.convert_to_sections(outline_data, sources)
        
-        # Run parallel processing for speed optimization
+        # Run parallel processing for speed optimization (user_id required)
        mapped_sections, grounding_insights = await self.parallel_processor.run_parallel_processing_async(
-            outline_sections, research
+            outline_sections, research, user_id
        )
        
        # Enhance sections with grounding insights
@@ -85,9 +95,9 @@ class OutlineGenerator:
            mapped_sections, research.grounding_metadata, grounding_insights
        )
        
-        # Optimize outline for better flow, SEO, and engagement
+        # Optimize outline for better flow, SEO, and engagement (user_id required)
        logger.info("Optimizing outline for better flow and engagement...")
-        optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization")
+        optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id)
        
        # Rebalance word counts for optimal distribution
        target_words = request.word_count or 1500
@@ -118,10 +128,21 @@ class OutlineGenerator:
            research_coverage=research_coverage
        )
    
-    async def generate_with_progress(self, request: BlogOutlineRequest, task_id: str) -> BlogOutlineResponse:
+    async def generate_with_progress(self, request: BlogOutlineRequest, task_id: str, user_id: str) -> BlogOutlineResponse:
        """
        Outline generation method with progress updates for real-time feedback.
+        
+        Args:
+            request: Outline generation request with research data
+            task_id: Task ID for progress updates
+            user_id: User ID (required for subscription checks and usage tracking)
+            
+        Raises:
+            ValueError: If user_id is not provided
        """
+        if not user_id:
+            raise ValueError("user_id is required for outline generation (subscription checks and usage tracking)")
+        
        from api.blog_writer.task_manager import task_manager
        
        # Extract research insights
@@ -150,17 +171,17 @@ class OutlineGenerator:
        
        await task_manager.update_progress(task_id, "🔄 Making AI request to generate structured outline...")
        
-        # Generate outline using structured JSON response with retry logic
-        outline_data = await self.response_processor.generate_with_retry(outline_prompt, outline_schema, task_id)
+        # Generate outline using structured JSON response with retry logic (user_id required for subscription checks)
+        outline_data = await self.response_processor.generate_with_retry(outline_prompt, outline_schema, user_id, task_id)
        
        await task_manager.update_progress(task_id, "📝 Processing outline structure and validating sections...")
        
        # Convert to BlogOutlineSection objects
        outline_sections = self.response_processor.convert_to_sections(outline_data, sources)
        
-        # Run parallel processing for speed optimization
+        # Run parallel processing for speed optimization (user_id required for subscription checks)
        mapped_sections, grounding_insights = await self.parallel_processor.run_parallel_processing(
-            outline_sections, research, task_id
+            outline_sections, research, user_id, task_id
        )
        
        # Enhance sections with grounding insights (depends on both previous tasks)
@@ -169,9 +190,9 @@ class OutlineGenerator:
            mapped_sections, research.grounding_metadata, grounding_insights
        )
        
-        # Optimize outline for better flow, SEO, and engagement
+        # Optimize outline for better flow, SEO, and engagement (user_id required for subscription checks)
        await task_manager.update_progress(task_id, "🎯 Optimizing outline for better flow and engagement...")
-        optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization")
+        optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id)
        
        # Rebalance word counts for optimal distribution
        await task_manager.update_progress(task_id, "⚖️ Rebalancing word count distribution...")
--- a/backend/services/blog_writer/outline/outline_optimizer.py
+++ b/backend/services/blog_writer/outline/outline_optimizer.py
@@ -13,8 +13,23 @@ from models.blog_models import BlogOutlineSection
 class OutlineOptimizer:
    """Optimizes outlines for better flow, SEO, and engagement."""
    
-    async def optimize(self, outline: List[BlogOutlineSection], focus: str = "general optimization") -> List[BlogOutlineSection]:
-        """Optimize entire outline for better flow, SEO, and engagement."""
+    async def optimize(self, outline: List[BlogOutlineSection], focus: str, user_id: str) -> List[BlogOutlineSection]:
+        """Optimize entire outline for better flow, SEO, and engagement.
+        
+        Args:
+            outline: List of outline sections to optimize
+            focus: Optimization focus (e.g., "general optimization")
+            user_id: User ID (required for subscription checks and usage tracking)
+            
+        Returns:
+            List of optimized outline sections
+            
+        Raises:
+            ValueError: If user_id is not provided
+        """
+        if not user_id:
+            raise ValueError("user_id is required for outline optimization (subscription checks and usage tracking)")
+        
        outline_text = "\n".join([f"{i+1}. {s.heading}" for i, s in enumerate(outline)])
        
        optimization_prompt = f"""Optimize this blog outline for better flow, engagement, and SEO:
@@ -67,7 +82,8 @@ Return JSON format:
            optimized_data = llm_text_gen(
                prompt=optimization_prompt,
                json_struct=optimization_schema,
-                system_prompt=None
+                system_prompt=None,
+                user_id=user_id
            )
            
            # Handle the new schema format with "outline" wrapper
--- a/backend/services/blog_writer/outline/outline_service.py
+++ b/backend/services/blog_writer/outline/outline_service.py
@@ -29,11 +29,21 @@ class OutlineService:
        self.outline_optimizer = OutlineOptimizer()
        self.section_enhancer = SectionEnhancer()
    
-    async def generate_outline(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
+    async def generate_outline(self, request: BlogOutlineRequest, user_id: str) -> BlogOutlineResponse:
        """
-        Stage 2: Content Planning with AI-generated outline using research results
-        Uses Gemini with research data to create comprehensive, SEO-optimized outline
+        Stage 2: Content Planning with AI-generated outline using research results.
+        Uses Gemini with research data to create comprehensive, SEO-optimized outline.
+        
+        Args:
+            request: Outline generation request with research data
+            user_id: User ID (required for subscription checks and usage tracking)
+            
+        Raises:
+            ValueError: If user_id is not provided
        """
+        if not user_id:
+            raise ValueError("user_id is required for outline generation (subscription checks and usage tracking)")
+        
        # Extract cache parameters - use original user keywords for consistent caching
        keywords = request.research.original_keywords or request.research.keyword_analysis.get('primary', [])
        industry = getattr(request.persona, 'industry', 'general') if request.persona else 'general'
@@ -56,9 +66,9 @@ class OutlineService:
            logger.info(f"Using cached outline for keywords: {keywords}")
            return BlogOutlineResponse(**cached_result)
        
-        # Generate new outline if not cached
+        # Generate new outline if not cached (user_id required)
        logger.info(f"Generating new outline for keywords: {keywords}")
-        result = await self.outline_generator.generate(request)
+        result = await self.outline_generator.generate(request, user_id)
        
        # Cache the result
        persistent_outline_cache.cache_outline(
@@ -73,7 +83,7 @@ class OutlineService:
        
        return result
    
-    async def generate_outline_with_progress(self, request: BlogOutlineRequest, task_id: str) -> BlogOutlineResponse:
+    async def generate_outline_with_progress(self, request: BlogOutlineRequest, task_id: str, user_id: str) -> BlogOutlineResponse:
        """
        Outline generation method with progress updates for real-time feedback.
        """
@@ -104,7 +114,7 @@ class OutlineService:
        
        # Generate new outline if not cached
        logger.info(f"Generating new outline for keywords: {keywords} (with progress updates)")
-        result = await self.outline_generator.generate_with_progress(request, task_id)
+        result = await self.outline_generator.generate_with_progress(request, task_id, user_id)
        
        # Cache the result
        persistent_outline_cache.cache_outline(
--- a/backend/services/blog_writer/outline/parallel_processor.py
+++ b/backend/services/blog_writer/outline/parallel_processor.py
@@ -17,18 +17,25 @@ class ParallelProcessor:
        self.source_mapper = source_mapper
        self.grounding_engine = grounding_engine
    
-    async def run_parallel_processing(self, outline_sections, research, task_id: str = None) -> Tuple[Any, Any]:
+    async def run_parallel_processing(self, outline_sections, research, user_id: str, task_id: str = None) -> Tuple[Any, Any]:
        """
        Run source mapping and grounding insights extraction in parallel.
        
        Args:
            outline_sections: List of outline sections to process
            research: Research data object
+            user_id: User ID (required for subscription checks and usage tracking)
            task_id: Optional task ID for progress updates
            
        Returns:
            Tuple of (mapped_sections, grounding_insights)
+            
+        Raises:
+            ValueError: If user_id is not provided
        """
+        if not user_id:
+            raise ValueError("user_id is required for parallel processing (subscription checks and usage tracking)")
+        
        if task_id:
            from api.blog_writer.task_manager import task_manager
            await task_manager.update_progress(task_id, "⚡ Running parallel processing for maximum speed...")
@@ -37,7 +44,7 @@ class ParallelProcessor:
        
        # Run these tasks in parallel to save time
        source_mapping_task = asyncio.create_task(
-            self._run_source_mapping(outline_sections, research, task_id)
+            self._run_source_mapping(outline_sections, research, task_id, user_id)
        )
        
        grounding_insights_task = asyncio.create_task(
@@ -52,22 +59,29 @@ class ParallelProcessor:
        
        return mapped_sections, grounding_insights
    
-    async def run_parallel_processing_async(self, outline_sections, research) -> Tuple[Any, Any]:
+    async def run_parallel_processing_async(self, outline_sections, research, user_id: str) -> Tuple[Any, Any]:
        """
        Run parallel processing without progress updates (for non-progress methods).
        
        Args:
            outline_sections: List of outline sections to process
            research: Research data object
+            user_id: User ID (required for subscription checks and usage tracking)
            
        Returns:
            Tuple of (mapped_sections, grounding_insights)
+            
+        Raises:
+            ValueError: If user_id is not provided
        """
+        if not user_id:
+            raise ValueError("user_id is required for parallel processing (subscription checks and usage tracking)")
+        
        logger.info("Running parallel processing for maximum speed...")
        
        # Run these tasks in parallel to save time
        source_mapping_task = asyncio.create_task(
-            self._run_source_mapping_async(outline_sections, research)
+            self._run_source_mapping_async(outline_sections, research, user_id)
        )
        
        grounding_insights_task = asyncio.create_task(
@@ -82,12 +96,12 @@ class ParallelProcessor:
        
        return mapped_sections, grounding_insights
    
-    async def _run_source_mapping(self, outline_sections, research, task_id):
+    async def _run_source_mapping(self, outline_sections, research, task_id, user_id: str):
        """Run source mapping in parallel."""
        if task_id:
            from api.blog_writer.task_manager import task_manager
            await task_manager.update_progress(task_id, "🔗 Applying intelligent source-to-section mapping...")
-        return self.source_mapper.map_sources_to_sections(outline_sections, research)
+        return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id)
    
    async def _run_grounding_insights_extraction(self, research, task_id):
        """Run grounding insights extraction in parallel."""
@@ -96,10 +110,10 @@ class ParallelProcessor:
            await task_manager.update_progress(task_id, "🧠 Extracting grounding metadata insights...")
        return self.grounding_engine.extract_contextual_insights(research.grounding_metadata)
    
-    async def _run_source_mapping_async(self, outline_sections, research):
+    async def _run_source_mapping_async(self, outline_sections, research, user_id: str):
        """Run source mapping in parallel (async version without progress updates)."""
        logger.info("Applying intelligent source-to-section mapping...")
-        return self.source_mapper.map_sources_to_sections(outline_sections, research)
+        return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id)
    
    async def _run_grounding_insights_extraction_async(self, research):
        """Run grounding insights extraction in parallel (async version without progress updates)."""
--- a/backend/services/blog_writer/outline/response_processor.py
+++ b/backend/services/blog_writer/outline/response_processor.py
@@ -18,8 +18,21 @@ class ResponseProcessor:
        """Initialize the response processor."""
        pass
    
-    async def generate_with_retry(self, prompt: str, schema: Dict[str, Any], task_id: str = None) -> Dict[str, Any]:
-        """Generate outline with retry logic for API failures."""
+    async def generate_with_retry(self, prompt: str, schema: Dict[str, Any], user_id: str, task_id: str = None) -> Dict[str, Any]:
+        """Generate outline with retry logic for API failures.
+        
+        Args:
+            prompt: The prompt for outline generation
+            schema: JSON schema for structured response
+            user_id: User ID (required for subscription checks and usage tracking)
+            task_id: Optional task ID for progress updates
+            
+        Raises:
+            ValueError: If user_id is not provided
+        """
+        if not user_id:
+            raise ValueError("user_id is required for outline generation (subscription checks and usage tracking)")
+        
        from services.llm_providers.main_text_generation import llm_text_gen
        from api.blog_writer.task_manager import task_manager
        
@@ -34,7 +47,8 @@ class ResponseProcessor:
                outline_data = llm_text_gen(
                    prompt=prompt,
                    json_struct=schema,
-                    system_prompt=None
+                    system_prompt=None,
+                    user_id=user_id
                )
                
                # Log response for debugging
--- a/backend/services/blog_writer/outline/section_enhancer.py
+++ b/backend/services/blog_writer/outline/section_enhancer.py
@@ -12,8 +12,23 @@ from models.blog_models import BlogOutlineSection
 class SectionEnhancer:
    """Enhances individual outline sections using AI."""
    
-    async def enhance(self, section: BlogOutlineSection, focus: str = "general improvement") -> BlogOutlineSection:
-        """Enhance a section using AI with research context."""
+    async def enhance(self, section: BlogOutlineSection, focus: str, user_id: str) -> BlogOutlineSection:
+        """Enhance a section using AI with research context.
+        
+        Args:
+            section: Outline section to enhance
+            focus: Enhancement focus (e.g., "general improvement")
+            user_id: User ID (required for subscription checks and usage tracking)
+            
+        Returns:
+            Enhanced outline section
+            
+        Raises:
+            ValueError: If user_id is not provided
+        """
+        if not user_id:
+            raise ValueError("user_id is required for section enhancement (subscription checks and usage tracking)")
+        
        enhancement_prompt = f"""
        Enhance the following blog section to make it more engaging, comprehensive, and valuable:
        
@@ -61,7 +76,8 @@ class SectionEnhancer:
            enhanced_data = llm_text_gen(
                prompt=enhancement_prompt,
                json_struct=enhancement_schema,
-                system_prompt=None
+                system_prompt=None,
+                user_id=user_id
            )
            
            if isinstance(enhanced_data, dict) and 'error' not in enhanced_data:
--- a/backend/services/blog_writer/outline/source_mapper.py
+++ b/backend/services/blog_writer/outline/source_mapper.py
@@ -52,7 +52,8 @@ class SourceToSectionMapper:
    def map_sources_to_sections(
        self, 
        sections: List[BlogOutlineSection], 
-        research_data: BlogResearchResponse
+        research_data: BlogResearchResponse,
+        user_id: str
    ) -> List[BlogOutlineSection]:
        """
        Map research sources to outline sections using intelligent algorithms.
@@ -60,10 +61,17 @@ class SourceToSectionMapper:
        Args:
            sections: List of outline sections to map sources to
            research_data: Research data containing sources and metadata
+            user_id: User ID (required for subscription checks and usage tracking)
            
        Returns:
            List of outline sections with intelligently mapped sources
+            
+        Raises:
+            ValueError: If user_id is not provided
        """
+        if not user_id:
+            raise ValueError("user_id is required for source mapping (subscription checks and usage tracking)")
+        
        if not sections or not research_data.sources:
            logger.warning("No sections or sources to map")
            return sections
@@ -73,8 +81,8 @@ class SourceToSectionMapper:
        # Step 1: Algorithmic mapping
        mapping_results = self._algorithmic_source_mapping(sections, research_data)
        
-        # Step 2: AI validation and improvement (single prompt)
-        validated_mapping = self._ai_validate_mapping(mapping_results, research_data)
+        # Step 2: AI validation and improvement (single prompt, user_id required for subscription checks)
+        validated_mapping = self._ai_validate_mapping(mapping_results, research_data, user_id)
        
        # Step 3: Apply validated mapping to sections
        mapped_sections = self._apply_mapping_to_sections(sections, validated_mapping)
@@ -261,7 +269,8 @@ class SourceToSectionMapper:
    def _ai_validate_mapping(
        self, 
        mapping_results: Dict[str, List[Tuple[ResearchSource, float]]], 
-        research_data: BlogResearchResponse
+        research_data: BlogResearchResponse,
+        user_id: str
    ) -> Dict[str, List[Tuple[ResearchSource, float]]]:
        """
        Use AI to validate and improve the algorithmic mapping results.
@@ -269,18 +278,25 @@ class SourceToSectionMapper:
        Args:
            mapping_results: Algorithmic mapping results
            research_data: Research data for context
+            user_id: User ID (required for subscription checks and usage tracking)
            
        Returns:
            AI-validated and improved mapping results
+            
+        Raises:
+            ValueError: If user_id is not provided
        """
+        if not user_id:
+            raise ValueError("user_id is required for AI validation (subscription checks and usage tracking)")
+        
        try:
            logger.info("Starting AI validation of source-to-section mapping...")
            
            # Build AI validation prompt
            validation_prompt = self._build_validation_prompt(mapping_results, research_data)
            
-            # Get AI validation response
-            validation_response = self._get_ai_validation_response(validation_prompt)
+            # Get AI validation response (user_id required for subscription checks)
+            validation_response = self._get_ai_validation_response(validation_prompt, user_id)
            
            # Parse and apply AI validation results
            validated_mapping = self._parse_validation_response(validation_response, mapping_results, research_data)
@@ -548,23 +564,31 @@ Analyze the mapping and provide your recommendations.
        
        return prompt
    
-    def _get_ai_validation_response(self, prompt: str) -> str:
+    def _get_ai_validation_response(self, prompt: str, user_id: str) -> str:
        """
        Get AI validation response using LLM provider.
        
        Args:
            prompt: Validation prompt
+            user_id: User ID (required for subscription checks and usage tracking)
            
        Returns:
            AI validation response
+            
+        Raises:
+            ValueError: If user_id is not provided
        """
+        if not user_id:
+            raise ValueError("user_id is required for AI validation response (subscription checks and usage tracking)")
+        
        try:
            from services.llm_providers.main_text_generation import llm_text_gen
            
            response = llm_text_gen(
                prompt=prompt,
                json_struct=None,
-                system_prompt=None
+                system_prompt=None,
+                user_id=user_id
            )
            
            return response