499 lines
22 KiB
Python
499 lines
22 KiB
Python
"""Style Detection Logic Service for ALwrity Backend.
|
|
|
|
This service handles business logic for content style detection and analysis,
|
|
migrated from the legacy StyleAnalyzer functionality.
|
|
"""
|
|
|
|
from typing import Dict, Any, List, Optional
|
|
from loguru import logger
|
|
from datetime import datetime
|
|
import json
|
|
import re
|
|
import sys
|
|
import os
|
|
|
|
# Add the backend directory to Python path for absolute imports
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
|
|
|
# Import the new backend LLM providers from services
|
|
from ..llm_providers.main_text_generation import llm_text_gen
|
|
|
|
class StyleDetectionLogic:
|
|
"""Business logic for content style detection and analysis."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the Style Detection Logic service."""
|
|
logger.info("[StyleDetectionLogic.__init__] Initializing style detection service")
|
|
|
|
def _clean_json_response(self, text: str) -> str:
|
|
"""
|
|
Clean the LLM response to extract valid JSON.
|
|
|
|
Args:
|
|
text (str): Raw response from LLM
|
|
|
|
Returns:
|
|
str: Cleaned JSON string
|
|
"""
|
|
try:
|
|
# Remove markdown code block markers
|
|
cleaned_string = text.replace("```json", "").replace("```", "").strip()
|
|
|
|
# Log the cleaned JSON for debugging
|
|
logger.debug(f"[StyleDetectionLogic._clean_json_response] Cleaned JSON: {cleaned_string}")
|
|
|
|
return cleaned_string
|
|
|
|
except Exception as e:
|
|
logger.error(f"[StyleDetectionLogic._clean_json_response] Error cleaning response: {str(e)}")
|
|
return ""
|
|
|
|
def analyze_content_style(self, content: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Analyze the style of the provided content using AI with enhanced prompts.
|
|
|
|
Args:
|
|
content (Dict): Content to analyze, containing main_content, title, etc.
|
|
|
|
Returns:
|
|
Dict: Analysis results with writing style, characteristics, and recommendations
|
|
"""
|
|
try:
|
|
logger.info("[StyleDetectionLogic.analyze_content_style] Starting enhanced style analysis")
|
|
|
|
# Extract content components
|
|
title = content.get('title', '')
|
|
description = content.get('description', '')
|
|
main_content = content.get('main_content', '')
|
|
headings = content.get('headings', [])
|
|
domain_info = content.get('domain_info', {})
|
|
brand_info = content.get('brand_info', {})
|
|
social_media = content.get('social_media', {})
|
|
content_structure = content.get('content_structure', {})
|
|
|
|
# Construct the enhanced analysis prompt
|
|
prompt = f"""Analyze the following website content for comprehensive writing style, tone, and characteristics.
|
|
This is a detailed analysis for content personalization and AI-powered content generation.
|
|
|
|
WEBSITE INFORMATION:
|
|
- Domain: {domain_info.get('domain_name', 'Unknown')}
|
|
- Website Type: {self._determine_website_type(domain_info)}
|
|
- Brand Name: {brand_info.get('company_name', 'Not specified')}
|
|
- Tagline: {brand_info.get('tagline', 'Not specified')}
|
|
- Social Media Presence: {', '.join(social_media.keys()) if social_media else 'None detected'}
|
|
|
|
CONTENT STRUCTURE:
|
|
- Headings: {len(headings)} total ({content_structure.get('headings', {}).get('h1', 0)} H1, {content_structure.get('headings', {}).get('h2', 0)} H2)
|
|
- Paragraphs: {content_structure.get('paragraphs', 0)}
|
|
- Images: {content_structure.get('images', 0)}
|
|
- Links: {content_structure.get('links', 0)}
|
|
- Has Navigation: {content_structure.get('has_navigation', False)}
|
|
- Has Call-to-Action: {content_structure.get('has_call_to_action', False)}
|
|
|
|
CONTENT TO ANALYZE:
|
|
Title: {title}
|
|
Description: {description}
|
|
Main Content: {main_content[:5000]} # Enhanced content length
|
|
Key Headings: {headings[:10]} # First 10 headings for context
|
|
|
|
ANALYSIS REQUIREMENTS:
|
|
1. Analyze the writing style, tone, and voice characteristics
|
|
2. Identify target audience demographics and expertise level
|
|
3. Determine content type and purpose
|
|
4. Assess content structure and organization patterns
|
|
5. Evaluate brand voice consistency and personality
|
|
6. Identify unique style elements and patterns
|
|
7. Consider the website type and industry context
|
|
8. Analyze social media presence impact on content style
|
|
|
|
IMPORTANT: Respond ONLY with a JSON object in the following format. Do not include any additional text, explanations, or markdown formatting:
|
|
{{
|
|
"writing_style": {{
|
|
"tone": "detailed tone description with context",
|
|
"voice": "active/passive with explanation",
|
|
"complexity": "simple/moderate/complex with reasoning",
|
|
"engagement_level": "low/medium/high with justification",
|
|
"brand_personality": "detailed brand personality analysis",
|
|
"formality_level": "casual/semi-formal/formal/professional",
|
|
"emotional_appeal": "rational/emotional/mixed with examples"
|
|
}},
|
|
"content_characteristics": {{
|
|
"sentence_structure": "detailed analysis of sentence patterns",
|
|
"vocabulary_level": "basic/intermediate/advanced with examples",
|
|
"paragraph_organization": "detailed structure analysis",
|
|
"content_flow": "detailed flow analysis",
|
|
"readability_score": "estimated readability level",
|
|
"content_density": "high/medium/low with reasoning",
|
|
"visual_elements_usage": "analysis of how visual elements complement text"
|
|
}},
|
|
"target_audience": {{
|
|
"demographics": ["detailed demographic analysis"],
|
|
"expertise_level": "beginner/intermediate/advanced with reasoning",
|
|
"industry_focus": "detailed industry analysis",
|
|
"geographic_focus": "detailed geographic analysis",
|
|
"psychographic_profile": "detailed psychographic analysis",
|
|
"pain_points": ["identified audience pain points"],
|
|
"motivations": ["identified audience motivations"]
|
|
}},
|
|
"content_type": {{
|
|
"primary_type": "detailed content type analysis",
|
|
"secondary_types": ["list of secondary content types"],
|
|
"purpose": "detailed content purpose analysis",
|
|
"call_to_action": "detailed CTA analysis",
|
|
"conversion_focus": "high/medium/low with reasoning",
|
|
"educational_value": "high/medium/low with reasoning"
|
|
}},
|
|
"brand_analysis": {{
|
|
"brand_voice": "detailed brand voice analysis",
|
|
"brand_values": ["identified brand values"],
|
|
"brand_positioning": "detailed positioning analysis",
|
|
"competitive_differentiation": "detailed differentiation analysis",
|
|
"trust_signals": ["identified trust elements"],
|
|
"authority_indicators": ["identified authority elements"]
|
|
}},
|
|
"content_strategy_insights": {{
|
|
"strengths": ["content strengths"],
|
|
"weaknesses": ["content weaknesses"],
|
|
"opportunities": ["content opportunities"],
|
|
"threats": ["content threats"],
|
|
"recommended_improvements": ["specific improvement suggestions"],
|
|
"content_gaps": ["identified content gaps"]
|
|
}},
|
|
"recommended_settings": {{
|
|
"writing_tone": "recommended tone for AI generation",
|
|
"target_audience": "recommended audience focus",
|
|
"content_type": "recommended content type",
|
|
"creativity_level": "low/medium/high with reasoning",
|
|
"geographic_location": "recommended geographic focus",
|
|
"industry_context": "recommended industry approach",
|
|
"brand_alignment": "recommended brand alignment strategy"
|
|
}}
|
|
}}
|
|
"""
|
|
|
|
# Call the LLM for analysis
|
|
logger.debug("[StyleDetectionLogic.analyze_content_style] Sending enhanced prompt to LLM")
|
|
analysis_text = llm_text_gen(prompt)
|
|
|
|
# Clean and parse the response
|
|
cleaned_json = self._clean_json_response(analysis_text)
|
|
|
|
try:
|
|
analysis_results = json.loads(cleaned_json)
|
|
logger.info("[StyleDetectionLogic.analyze_content_style] Successfully parsed enhanced analysis results")
|
|
return {
|
|
'success': True,
|
|
'analysis': analysis_results
|
|
}
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"[StyleDetectionLogic.analyze_content_style] Failed to parse JSON response: {e}")
|
|
logger.debug(f"[StyleDetectionLogic.analyze_content_style] Raw response: {analysis_text}")
|
|
return {
|
|
'success': False,
|
|
'error': 'Failed to parse analysis response'
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"[StyleDetectionLogic.analyze_content_style] Error in enhanced analysis: {str(e)}")
|
|
return {
|
|
'success': False,
|
|
'error': str(e)
|
|
}
|
|
|
|
def _determine_website_type(self, domain_info: Dict[str, Any]) -> str:
|
|
"""Determine the type of website based on domain and content analysis."""
|
|
if domain_info.get('is_blog'):
|
|
return 'Blog/Content Platform'
|
|
elif domain_info.get('is_ecommerce'):
|
|
return 'E-commerce/Online Store'
|
|
elif domain_info.get('is_corporate'):
|
|
return 'Corporate/Business Website'
|
|
elif domain_info.get('has_blog_section'):
|
|
return 'Business with Blog'
|
|
elif domain_info.get('has_about_page') and domain_info.get('has_contact_page'):
|
|
return 'Professional Services'
|
|
else:
|
|
return 'General Website'
|
|
|
|
def _get_fallback_analysis(self, content: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Get fallback analysis when LLM analysis fails."""
|
|
main_content = content.get("main_content", "")
|
|
title = content.get("title", "")
|
|
|
|
# Simple content analysis based on content characteristics
|
|
content_length = len(main_content)
|
|
word_count = len(main_content.split())
|
|
|
|
# Determine tone based on content characteristics
|
|
if any(word in main_content.lower() for word in ['professional', 'business', 'industry', 'company']):
|
|
tone = "professional"
|
|
elif any(word in main_content.lower() for word in ['casual', 'fun', 'enjoy', 'exciting']):
|
|
tone = "casual"
|
|
else:
|
|
tone = "neutral"
|
|
|
|
# Determine complexity based on sentence length and vocabulary
|
|
avg_sentence_length = word_count / max(len([s for s in main_content.split('.') if s.strip()]), 1)
|
|
if avg_sentence_length > 20:
|
|
complexity = "complex"
|
|
elif avg_sentence_length > 15:
|
|
complexity = "moderate"
|
|
else:
|
|
complexity = "simple"
|
|
|
|
return {
|
|
"writing_style": {
|
|
"tone": tone,
|
|
"voice": "active",
|
|
"complexity": complexity,
|
|
"engagement_level": "medium"
|
|
},
|
|
"content_characteristics": {
|
|
"sentence_structure": "standard",
|
|
"vocabulary_level": "intermediate",
|
|
"paragraph_organization": "logical",
|
|
"content_flow": "smooth"
|
|
},
|
|
"target_audience": {
|
|
"demographics": ["general audience"],
|
|
"expertise_level": "intermediate",
|
|
"industry_focus": "general",
|
|
"geographic_focus": "global"
|
|
},
|
|
"content_type": {
|
|
"primary_type": "article",
|
|
"secondary_types": ["blog", "content"],
|
|
"purpose": "inform",
|
|
"call_to_action": "minimal"
|
|
},
|
|
"recommended_settings": {
|
|
"writing_tone": tone,
|
|
"target_audience": "general audience",
|
|
"content_type": "article",
|
|
"creativity_level": "medium",
|
|
"geographic_location": "global"
|
|
}
|
|
}
|
|
|
|
def analyze_style_patterns(self, content: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Analyze recurring patterns in the content style.
|
|
|
|
Args:
|
|
content (Dict): Content to analyze
|
|
|
|
Returns:
|
|
Dict: Pattern analysis results
|
|
"""
|
|
try:
|
|
logger.info("[StyleDetectionLogic.analyze_style_patterns] Starting pattern analysis")
|
|
|
|
main_content = content.get("main_content", "")
|
|
|
|
prompt = f"""Analyze the following content for recurring writing patterns and style characteristics.
|
|
Focus on identifying patterns in sentence structure, vocabulary usage, and writing techniques.
|
|
|
|
Content: {main_content[:3000]}
|
|
|
|
IMPORTANT: Respond ONLY with a JSON object in the following format:
|
|
{{
|
|
"patterns": {{
|
|
"sentence_length": "short/medium/long",
|
|
"vocabulary_patterns": ["list of patterns"],
|
|
"rhetorical_devices": ["list of devices used"],
|
|
"paragraph_structure": "description",
|
|
"transition_phrases": ["list of common transitions"]
|
|
}},
|
|
"style_consistency": "high/medium/low",
|
|
"unique_elements": ["list of unique style elements"]
|
|
}}
|
|
"""
|
|
|
|
analysis_text = llm_text_gen(prompt)
|
|
cleaned_json = self._clean_json_response(analysis_text)
|
|
|
|
try:
|
|
pattern_results = json.loads(cleaned_json)
|
|
return {
|
|
'success': True,
|
|
'patterns': pattern_results
|
|
}
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"[StyleDetectionLogic.analyze_style_patterns] Failed to parse JSON response: {e}")
|
|
return {
|
|
'success': False,
|
|
'error': 'Failed to parse pattern analysis response'
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"[StyleDetectionLogic.analyze_style_patterns] Error during analysis: {str(e)}")
|
|
return {
|
|
'success': False,
|
|
'error': str(e)
|
|
}
|
|
|
|
def generate_style_guidelines(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Generate comprehensive content guidelines based on enhanced style analysis.
|
|
|
|
Args:
|
|
analysis_results (Dict): Results from enhanced style analysis
|
|
|
|
Returns:
|
|
Dict: Generated comprehensive guidelines
|
|
"""
|
|
try:
|
|
logger.info("[StyleDetectionLogic.generate_style_guidelines] Generating comprehensive style guidelines")
|
|
|
|
# Extract key information from analysis
|
|
writing_style = analysis_results.get('writing_style', {})
|
|
content_characteristics = analysis_results.get('content_characteristics', {})
|
|
target_audience = analysis_results.get('target_audience', {})
|
|
brand_analysis = analysis_results.get('brand_analysis', {})
|
|
content_strategy_insights = analysis_results.get('content_strategy_insights', {})
|
|
|
|
prompt = f"""Based on the following comprehensive style analysis, generate detailed content creation guidelines for AI-powered content generation.
|
|
|
|
ANALYSIS DATA:
|
|
Writing Style: {writing_style}
|
|
Content Characteristics: {content_characteristics}
|
|
Target Audience: {target_audience}
|
|
Brand Analysis: {brand_analysis}
|
|
Content Strategy Insights: {content_strategy_insights}
|
|
|
|
REQUIREMENTS:
|
|
1. Create actionable guidelines for AI content generation
|
|
2. Provide specific recommendations for maintaining brand voice
|
|
3. Include strategies for audience engagement
|
|
4. Address content gaps and opportunities
|
|
5. Consider competitive positioning
|
|
6. Provide technical writing recommendations
|
|
7. Include SEO and conversion optimization tips
|
|
8. Address content structure and formatting
|
|
|
|
IMPORTANT: Respond ONLY with a JSON object in the following format:
|
|
{{
|
|
"guidelines": {{
|
|
"tone_recommendations": [
|
|
"specific tone guidelines with examples",
|
|
"brand voice consistency tips",
|
|
"emotional appeal strategies"
|
|
],
|
|
"structure_guidelines": [
|
|
"content structure recommendations",
|
|
"formatting best practices",
|
|
"organization strategies"
|
|
],
|
|
"vocabulary_suggestions": [
|
|
"specific vocabulary recommendations",
|
|
"industry terminology guidance",
|
|
"language complexity advice"
|
|
],
|
|
"engagement_tips": [
|
|
"audience engagement strategies",
|
|
"interaction techniques",
|
|
"conversion optimization tips"
|
|
],
|
|
"audience_considerations": [
|
|
"specific audience targeting advice",
|
|
"pain point addressing strategies",
|
|
"motivation-based content tips"
|
|
],
|
|
"brand_alignment": [
|
|
"brand voice consistency guidelines",
|
|
"brand value integration tips",
|
|
"competitive differentiation strategies"
|
|
],
|
|
"seo_optimization": [
|
|
"keyword integration strategies",
|
|
"content optimization tips",
|
|
"search visibility recommendations"
|
|
],
|
|
"conversion_optimization": [
|
|
"call-to-action strategies",
|
|
"conversion funnel optimization",
|
|
"lead generation techniques"
|
|
]
|
|
}},
|
|
"best_practices": [
|
|
"comprehensive best practices list",
|
|
"industry-specific recommendations",
|
|
"quality assurance guidelines"
|
|
],
|
|
"avoid_elements": [
|
|
"elements to avoid with explanations",
|
|
"common pitfalls to prevent",
|
|
"brand-inappropriate content types"
|
|
],
|
|
"content_strategy": "comprehensive content strategy recommendation with specific action items",
|
|
"ai_generation_tips": [
|
|
"specific tips for AI content generation",
|
|
"prompt optimization strategies",
|
|
"quality control measures"
|
|
],
|
|
"competitive_advantages": [
|
|
"identified competitive advantages",
|
|
"differentiation strategies",
|
|
"market positioning recommendations"
|
|
],
|
|
"content_calendar_suggestions": [
|
|
"content frequency recommendations",
|
|
"topic planning strategies",
|
|
"seasonal content opportunities"
|
|
]
|
|
}}
|
|
"""
|
|
|
|
guidelines_text = llm_text_gen(prompt)
|
|
cleaned_json = self._clean_json_response(guidelines_text)
|
|
|
|
try:
|
|
guidelines = json.loads(cleaned_json)
|
|
return {
|
|
'success': True,
|
|
'guidelines': guidelines
|
|
}
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"[StyleDetectionLogic.generate_style_guidelines] Failed to parse JSON response: {e}")
|
|
return {
|
|
'success': False,
|
|
'error': 'Failed to parse guidelines response'
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"[StyleDetectionLogic.generate_style_guidelines] Error generating guidelines: {str(e)}")
|
|
return {
|
|
'success': False,
|
|
'error': str(e)
|
|
}
|
|
|
|
def validate_style_analysis_request(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Validate style analysis request data.
|
|
|
|
Args:
|
|
request_data (Dict): Request data to validate
|
|
|
|
Returns:
|
|
Dict: Validation results
|
|
"""
|
|
errors = []
|
|
|
|
# Check if content is provided
|
|
if not request_data.get('content') and not request_data.get('url') and not request_data.get('text_sample'):
|
|
errors.append("Content is required for style analysis")
|
|
|
|
# Check content length
|
|
content = request_data.get('content', {})
|
|
main_content = content.get('main_content', '')
|
|
if len(main_content) < 50:
|
|
errors.append("Content must be at least 50 characters long for meaningful analysis")
|
|
|
|
# Check for required fields
|
|
if not content.get('title') and not content.get('main_content'):
|
|
errors.append("Either title or main content must be provided")
|
|
|
|
return {
|
|
'valid': len(errors) == 0,
|
|
'errors': errors
|
|
} |