ALwrity Version 0.5.0 (Fastapi + React )

2025-08-06 12:48:02 +05:30
parent f28a919caa
commit 32f97fa6b3
476 changed files with 115544 additions and 28747 deletions
--- a/backend/services/component_logic/style_detection_logic.py
+++ b/backend/services/component_logic/style_detection_logic.py
@@ -0,0 +1,499 @@
+"""Style Detection Logic Service for ALwrity Backend.
+
+This service handles business logic for content style detection and analysis,
+migrated from the legacy StyleAnalyzer functionality.
+"""
+
+from typing import Dict, Any, List, Optional
+from loguru import logger
+from datetime import datetime
+import json
+import re
+import sys
+import os
+
+# Add the backend directory to Python path for absolute imports
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+# Import the new backend LLM providers from services
+from ..llm_providers.main_text_generation import llm_text_gen
+
+class StyleDetectionLogic:
+    """Business logic for content style detection and analysis."""
+    
+    def __init__(self):
+        """Initialize the Style Detection Logic service."""
+        logger.info("[StyleDetectionLogic.__init__] Initializing style detection service")
+        
+    def _clean_json_response(self, text: str) -> str:
+        """
+        Clean the LLM response to extract valid JSON.
+        
+        Args:
+            text (str): Raw response from LLM
+            
+        Returns:
+            str: Cleaned JSON string
+        """
+        try:
+            # Remove markdown code block markers
+            cleaned_string = text.replace("```json", "").replace("```", "").strip()
+            
+            # Log the cleaned JSON for debugging
+            logger.debug(f"[StyleDetectionLogic._clean_json_response] Cleaned JSON: {cleaned_string}")
+            
+            return cleaned_string
+            
+        except Exception as e:
+            logger.error(f"[StyleDetectionLogic._clean_json_response] Error cleaning response: {str(e)}")
+            return ""
+    
+    def analyze_content_style(self, content: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Analyze the style of the provided content using AI with enhanced prompts.
+        
+        Args:
+            content (Dict): Content to analyze, containing main_content, title, etc.
+            
+        Returns:
+            Dict: Analysis results with writing style, characteristics, and recommendations
+        """
+        try:
+            logger.info("[StyleDetectionLogic.analyze_content_style] Starting enhanced style analysis")
+            
+            # Extract content components
+            title = content.get('title', '')
+            description = content.get('description', '')
+            main_content = content.get('main_content', '')
+            headings = content.get('headings', [])
+            domain_info = content.get('domain_info', {})
+            brand_info = content.get('brand_info', {})
+            social_media = content.get('social_media', {})
+            content_structure = content.get('content_structure', {})
+            
+            # Construct the enhanced analysis prompt
+            prompt = f"""Analyze the following website content for comprehensive writing style, tone, and characteristics. 
+            This is a detailed analysis for content personalization and AI-powered content generation.
+
+            WEBSITE INFORMATION:
+            - Domain: {domain_info.get('domain_name', 'Unknown')}
+            - Website Type: {self._determine_website_type(domain_info)}
+            - Brand Name: {brand_info.get('company_name', 'Not specified')}
+            - Tagline: {brand_info.get('tagline', 'Not specified')}
+            - Social Media Presence: {', '.join(social_media.keys()) if social_media else 'None detected'}
+
+            CONTENT STRUCTURE:
+            - Headings: {len(headings)} total ({content_structure.get('headings', {}).get('h1', 0)} H1, {content_structure.get('headings', {}).get('h2', 0)} H2)
+            - Paragraphs: {content_structure.get('paragraphs', 0)}
+            - Images: {content_structure.get('images', 0)}
+            - Links: {content_structure.get('links', 0)}
+            - Has Navigation: {content_structure.get('has_navigation', False)}
+            - Has Call-to-Action: {content_structure.get('has_call_to_action', False)}
+
+            CONTENT TO ANALYZE:
+            Title: {title}
+            Description: {description}
+            Main Content: {main_content[:5000]}  # Enhanced content length
+            Key Headings: {headings[:10]}  # First 10 headings for context
+
+            ANALYSIS REQUIREMENTS:
+            1. Analyze the writing style, tone, and voice characteristics
+            2. Identify target audience demographics and expertise level
+            3. Determine content type and purpose
+            4. Assess content structure and organization patterns
+            5. Evaluate brand voice consistency and personality
+            6. Identify unique style elements and patterns
+            7. Consider the website type and industry context
+            8. Analyze social media presence impact on content style
+
+            IMPORTANT: Respond ONLY with a JSON object in the following format. Do not include any additional text, explanations, or markdown formatting:
+            {{
+                "writing_style": {{
+                    "tone": "detailed tone description with context",
+                    "voice": "active/passive with explanation",
+                    "complexity": "simple/moderate/complex with reasoning",
+                    "engagement_level": "low/medium/high with justification",
+                    "brand_personality": "detailed brand personality analysis",
+                    "formality_level": "casual/semi-formal/formal/professional",
+                    "emotional_appeal": "rational/emotional/mixed with examples"
+                }},
+                "content_characteristics": {{
+                    "sentence_structure": "detailed analysis of sentence patterns",
+                    "vocabulary_level": "basic/intermediate/advanced with examples",
+                    "paragraph_organization": "detailed structure analysis",
+                    "content_flow": "detailed flow analysis",
+                    "readability_score": "estimated readability level",
+                    "content_density": "high/medium/low with reasoning",
+                    "visual_elements_usage": "analysis of how visual elements complement text"
+                }},
+                "target_audience": {{
+                    "demographics": ["detailed demographic analysis"],
+                    "expertise_level": "beginner/intermediate/advanced with reasoning",
+                    "industry_focus": "detailed industry analysis",
+                    "geographic_focus": "detailed geographic analysis",
+                    "psychographic_profile": "detailed psychographic analysis",
+                    "pain_points": ["identified audience pain points"],
+                    "motivations": ["identified audience motivations"]
+                }},
+                "content_type": {{
+                    "primary_type": "detailed content type analysis",
+                    "secondary_types": ["list of secondary content types"],
+                    "purpose": "detailed content purpose analysis",
+                    "call_to_action": "detailed CTA analysis",
+                    "conversion_focus": "high/medium/low with reasoning",
+                    "educational_value": "high/medium/low with reasoning"
+                }},
+                "brand_analysis": {{
+                    "brand_voice": "detailed brand voice analysis",
+                    "brand_values": ["identified brand values"],
+                    "brand_positioning": "detailed positioning analysis",
+                    "competitive_differentiation": "detailed differentiation analysis",
+                    "trust_signals": ["identified trust elements"],
+                    "authority_indicators": ["identified authority elements"]
+                }},
+                "content_strategy_insights": {{
+                    "strengths": ["content strengths"],
+                    "weaknesses": ["content weaknesses"],
+                    "opportunities": ["content opportunities"],
+                    "threats": ["content threats"],
+                    "recommended_improvements": ["specific improvement suggestions"],
+                    "content_gaps": ["identified content gaps"]
+                }},
+                "recommended_settings": {{
+                    "writing_tone": "recommended tone for AI generation",
+                    "target_audience": "recommended audience focus",
+                    "content_type": "recommended content type",
+                    "creativity_level": "low/medium/high with reasoning",
+                    "geographic_location": "recommended geographic focus",
+                    "industry_context": "recommended industry approach",
+                    "brand_alignment": "recommended brand alignment strategy"
+                }}
+            }}
+            """
+            
+            # Call the LLM for analysis
+            logger.debug("[StyleDetectionLogic.analyze_content_style] Sending enhanced prompt to LLM")
+            analysis_text = llm_text_gen(prompt)
+            
+            # Clean and parse the response
+            cleaned_json = self._clean_json_response(analysis_text)
+            
+            try:
+                analysis_results = json.loads(cleaned_json)
+                logger.info("[StyleDetectionLogic.analyze_content_style] Successfully parsed enhanced analysis results")
+                return {
+                    'success': True,
+                    'analysis': analysis_results
+                }
+            except json.JSONDecodeError as e:
+                logger.error(f"[StyleDetectionLogic.analyze_content_style] Failed to parse JSON response: {e}")
+                logger.debug(f"[StyleDetectionLogic.analyze_content_style] Raw response: {analysis_text}")
+                return {
+                    'success': False,
+                    'error': 'Failed to parse analysis response'
+                }
+                
+        except Exception as e:
+            logger.error(f"[StyleDetectionLogic.analyze_content_style] Error in enhanced analysis: {str(e)}")
+            return {
+                'success': False,
+                'error': str(e)
+            }
+
+    def _determine_website_type(self, domain_info: Dict[str, Any]) -> str:
+        """Determine the type of website based on domain and content analysis."""
+        if domain_info.get('is_blog'):
+            return 'Blog/Content Platform'
+        elif domain_info.get('is_ecommerce'):
+            return 'E-commerce/Online Store'
+        elif domain_info.get('is_corporate'):
+            return 'Corporate/Business Website'
+        elif domain_info.get('has_blog_section'):
+            return 'Business with Blog'
+        elif domain_info.get('has_about_page') and domain_info.get('has_contact_page'):
+            return 'Professional Services'
+        else:
+            return 'General Website'
+    
+    def _get_fallback_analysis(self, content: Dict[str, Any]) -> Dict[str, Any]:
+        """Get fallback analysis when LLM analysis fails."""
+        main_content = content.get("main_content", "")
+        title = content.get("title", "")
+        
+        # Simple content analysis based on content characteristics
+        content_length = len(main_content)
+        word_count = len(main_content.split())
+        
+        # Determine tone based on content characteristics
+        if any(word in main_content.lower() for word in ['professional', 'business', 'industry', 'company']):
+            tone = "professional"
+        elif any(word in main_content.lower() for word in ['casual', 'fun', 'enjoy', 'exciting']):
+            tone = "casual"
+        else:
+            tone = "neutral"
+        
+        # Determine complexity based on sentence length and vocabulary
+        avg_sentence_length = word_count / max(len([s for s in main_content.split('.') if s.strip()]), 1)
+        if avg_sentence_length > 20:
+            complexity = "complex"
+        elif avg_sentence_length > 15:
+            complexity = "moderate"
+        else:
+            complexity = "simple"
+        
+        return {
+            "writing_style": {
+                "tone": tone,
+                "voice": "active",
+                "complexity": complexity,
+                "engagement_level": "medium"
+            },
+            "content_characteristics": {
+                "sentence_structure": "standard",
+                "vocabulary_level": "intermediate",
+                "paragraph_organization": "logical",
+                "content_flow": "smooth"
+            },
+            "target_audience": {
+                "demographics": ["general audience"],
+                "expertise_level": "intermediate",
+                "industry_focus": "general",
+                "geographic_focus": "global"
+            },
+            "content_type": {
+                "primary_type": "article",
+                "secondary_types": ["blog", "content"],
+                "purpose": "inform",
+                "call_to_action": "minimal"
+            },
+            "recommended_settings": {
+                "writing_tone": tone,
+                "target_audience": "general audience",
+                "content_type": "article",
+                "creativity_level": "medium",
+                "geographic_location": "global"
+            }
+        }
+    
+    def analyze_style_patterns(self, content: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Analyze recurring patterns in the content style.
+        
+        Args:
+            content (Dict): Content to analyze
+            
+        Returns:
+            Dict: Pattern analysis results
+        """
+        try:
+            logger.info("[StyleDetectionLogic.analyze_style_patterns] Starting pattern analysis")
+            
+            main_content = content.get("main_content", "")
+            
+            prompt = f"""Analyze the following content for recurring writing patterns and style characteristics.
+            Focus on identifying patterns in sentence structure, vocabulary usage, and writing techniques.
+
+            Content: {main_content[:3000]}
+
+            IMPORTANT: Respond ONLY with a JSON object in the following format:
+            {{
+                "patterns": {{
+                    "sentence_length": "short/medium/long",
+                    "vocabulary_patterns": ["list of patterns"],
+                    "rhetorical_devices": ["list of devices used"],
+                    "paragraph_structure": "description",
+                    "transition_phrases": ["list of common transitions"]
+                }},
+                "style_consistency": "high/medium/low",
+                "unique_elements": ["list of unique style elements"]
+            }}
+            """
+            
+            analysis_text = llm_text_gen(prompt)
+            cleaned_json = self._clean_json_response(analysis_text)
+            
+            try:
+                pattern_results = json.loads(cleaned_json)
+                return {
+                    'success': True,
+                    'patterns': pattern_results
+                }
+            except json.JSONDecodeError as e:
+                logger.error(f"[StyleDetectionLogic.analyze_style_patterns] Failed to parse JSON response: {e}")
+                return {
+                    'success': False,
+                    'error': 'Failed to parse pattern analysis response'
+                }
+                
+        except Exception as e:
+            logger.error(f"[StyleDetectionLogic.analyze_style_patterns] Error during analysis: {str(e)}")
+            return {
+                'success': False,
+                'error': str(e)
+            }
+    
+    def generate_style_guidelines(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Generate comprehensive content guidelines based on enhanced style analysis.
+        
+        Args:
+            analysis_results (Dict): Results from enhanced style analysis
+            
+        Returns:
+            Dict: Generated comprehensive guidelines
+        """
+        try:
+            logger.info("[StyleDetectionLogic.generate_style_guidelines] Generating comprehensive style guidelines")
+            
+            # Extract key information from analysis
+            writing_style = analysis_results.get('writing_style', {})
+            content_characteristics = analysis_results.get('content_characteristics', {})
+            target_audience = analysis_results.get('target_audience', {})
+            brand_analysis = analysis_results.get('brand_analysis', {})
+            content_strategy_insights = analysis_results.get('content_strategy_insights', {})
+            
+            prompt = f"""Based on the following comprehensive style analysis, generate detailed content creation guidelines for AI-powered content generation.
+
+            ANALYSIS DATA:
+            Writing Style: {writing_style}
+            Content Characteristics: {content_characteristics}
+            Target Audience: {target_audience}
+            Brand Analysis: {brand_analysis}
+            Content Strategy Insights: {content_strategy_insights}
+
+            REQUIREMENTS:
+            1. Create actionable guidelines for AI content generation
+            2. Provide specific recommendations for maintaining brand voice
+            3. Include strategies for audience engagement
+            4. Address content gaps and opportunities
+            5. Consider competitive positioning
+            6. Provide technical writing recommendations
+            7. Include SEO and conversion optimization tips
+            8. Address content structure and formatting
+
+            IMPORTANT: Respond ONLY with a JSON object in the following format:
+            {{
+                "guidelines": {{
+                    "tone_recommendations": [
+                        "specific tone guidelines with examples",
+                        "brand voice consistency tips",
+                        "emotional appeal strategies"
+                    ],
+                    "structure_guidelines": [
+                        "content structure recommendations",
+                        "formatting best practices",
+                        "organization strategies"
+                    ],
+                    "vocabulary_suggestions": [
+                        "specific vocabulary recommendations",
+                        "industry terminology guidance",
+                        "language complexity advice"
+                    ],
+                    "engagement_tips": [
+                        "audience engagement strategies",
+                        "interaction techniques",
+                        "conversion optimization tips"
+                    ],
+                    "audience_considerations": [
+                        "specific audience targeting advice",
+                        "pain point addressing strategies",
+                        "motivation-based content tips"
+                    ],
+                    "brand_alignment": [
+                        "brand voice consistency guidelines",
+                        "brand value integration tips",
+                        "competitive differentiation strategies"
+                    ],
+                    "seo_optimization": [
+                        "keyword integration strategies",
+                        "content optimization tips",
+                        "search visibility recommendations"
+                    ],
+                    "conversion_optimization": [
+                        "call-to-action strategies",
+                        "conversion funnel optimization",
+                        "lead generation techniques"
+                    ]
+                }},
+                "best_practices": [
+                    "comprehensive best practices list",
+                    "industry-specific recommendations",
+                    "quality assurance guidelines"
+                ],
+                "avoid_elements": [
+                    "elements to avoid with explanations",
+                    "common pitfalls to prevent",
+                    "brand-inappropriate content types"
+                ],
+                "content_strategy": "comprehensive content strategy recommendation with specific action items",
+                "ai_generation_tips": [
+                    "specific tips for AI content generation",
+                    "prompt optimization strategies",
+                    "quality control measures"
+                ],
+                "competitive_advantages": [
+                    "identified competitive advantages",
+                    "differentiation strategies",
+                    "market positioning recommendations"
+                ],
+                "content_calendar_suggestions": [
+                    "content frequency recommendations",
+                    "topic planning strategies",
+                    "seasonal content opportunities"
+                ]
+            }}
+            """
+            
+            guidelines_text = llm_text_gen(prompt)
+            cleaned_json = self._clean_json_response(guidelines_text)
+            
+            try:
+                guidelines = json.loads(cleaned_json)
+                return {
+                    'success': True,
+                    'guidelines': guidelines
+                }
+            except json.JSONDecodeError as e:
+                logger.error(f"[StyleDetectionLogic.generate_style_guidelines] Failed to parse JSON response: {e}")
+                return {
+                    'success': False,
+                    'error': 'Failed to parse guidelines response'
+                }
+                
+        except Exception as e:
+            logger.error(f"[StyleDetectionLogic.generate_style_guidelines] Error generating guidelines: {str(e)}")
+            return {
+                'success': False,
+                'error': str(e)
+            }
+    
+    def validate_style_analysis_request(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Validate style analysis request data.
+        
+        Args:
+            request_data (Dict): Request data to validate
+            
+        Returns:
+            Dict: Validation results
+        """
+        errors = []
+        
+        # Check if content is provided
+        if not request_data.get('content') and not request_data.get('url') and not request_data.get('text_sample'):
+            errors.append("Content is required for style analysis")
+        
+        # Check content length
+        content = request_data.get('content', {})
+        main_content = content.get('main_content', '')
+        if len(main_content) < 50:
+            errors.append("Content must be at least 50 characters long for meaningful analysis")
+        
+        # Check for required fields
+        if not content.get('title') and not content.get('main_content'):
+            errors.append("Either title or main content must be provided")
+        
+        return {
+            'valid': len(errors) == 0,
+            'errors': errors
+        }