Files
ALwrity/backend/services/persona_analysis_service.py
2025-08-31 08:26:51 +00:00

668 lines
28 KiB
Python

"""
Persona Analysis Service
Uses Gemini structured responses to analyze onboarding data and create writing personas.
"""
from typing import Dict, Any, List, Optional
from sqlalchemy.orm import Session
from loguru import logger
from datetime import datetime
import json
from services.database import get_db_session
from models.onboarding import OnboardingSession, WebsiteAnalysis, ResearchPreferences
from models.persona_models import WritingPersona, PlatformPersona, PersonaAnalysisResult
from services.llm_providers.gemini_provider import gemini_structured_json_response
class PersonaAnalysisService:
"""Service for analyzing onboarding data and generating writing personas using Gemini AI."""
def __init__(self):
"""Initialize the persona analysis service."""
logger.info("PersonaAnalysisService initialized")
def generate_persona_from_onboarding(self, user_id: int, onboarding_session_id: int = None) -> Dict[str, Any]:
"""
Generate a comprehensive writing persona from user's onboarding data.
Args:
user_id: User ID to generate persona for
onboarding_session_id: Optional specific onboarding session ID
Returns:
Generated persona data with platform adaptations
"""
try:
logger.info(f"Generating persona for user {user_id}")
# Get onboarding data
onboarding_data = self._collect_onboarding_data(user_id, onboarding_session_id)
if not onboarding_data:
logger.warning(f"No onboarding data found for user {user_id}")
return {"error": "No onboarding data available for persona generation"}
# Generate core persona using Gemini
core_persona = self._generate_core_persona(onboarding_data)
if "error" in core_persona:
return core_persona
# Generate platform-specific adaptations
platform_personas = self._generate_platform_adaptations(core_persona, onboarding_data)
# Save to database
saved_persona = self._save_persona_to_db(user_id, core_persona, platform_personas, onboarding_data)
return {
"persona_id": saved_persona.id,
"core_persona": core_persona,
"platform_personas": platform_personas,
"analysis_metadata": {
"confidence_score": core_persona.get("confidence_score", 0.0),
"data_sufficiency": self._calculate_data_sufficiency(onboarding_data),
"generated_at": datetime.utcnow().isoformat()
}
}
except Exception as e:
logger.error(f"Error generating persona for user {user_id}: {str(e)}")
return {"error": f"Failed to generate persona: {str(e)}"}
def _collect_onboarding_data(self, user_id: int, session_id: int = None) -> Optional[Dict[str, Any]]:
"""Collect comprehensive onboarding data for persona analysis."""
try:
session = get_db_session()
# Find onboarding session
if session_id:
onboarding_session = session.query(OnboardingSession).filter(
OnboardingSession.id == session_id,
OnboardingSession.user_id == user_id
).first()
else:
onboarding_session = session.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not onboarding_session:
return None
# Get website analysis
website_analysis = session.query(WebsiteAnalysis).filter(
WebsiteAnalysis.session_id == onboarding_session.id
).first()
# Get research preferences
research_prefs = session.query(ResearchPreferences).filter(
ResearchPreferences.session_id == onboarding_session.id
).first()
# Compile comprehensive data
onboarding_data = {
"session_info": {
"session_id": onboarding_session.id,
"current_step": onboarding_session.current_step,
"progress": onboarding_session.progress,
"started_at": onboarding_session.started_at.isoformat() if onboarding_session.started_at else None
},
"website_analysis": website_analysis.to_dict() if website_analysis else None,
"research_preferences": research_prefs.to_dict() if research_prefs else None
}
session.close()
return onboarding_data
except Exception as e:
logger.error(f"Error collecting onboarding data: {str(e)}")
return None
def _generate_core_persona(self, onboarding_data: Dict[str, Any]) -> Dict[str, Any]:
"""Generate core writing persona using Gemini structured response."""
# Build analysis prompt
prompt = self._build_persona_analysis_prompt(onboarding_data)
# Define schema for structured response
persona_schema = {
"type": "object",
"properties": {
"identity": {
"type": "object",
"properties": {
"persona_name": {"type": "string"},
"archetype": {"type": "string"},
"core_belief": {"type": "string"},
"brand_voice_description": {"type": "string"}
},
"required": ["persona_name", "archetype", "core_belief"]
},
"linguistic_fingerprint": {
"type": "object",
"properties": {
"sentence_metrics": {
"type": "object",
"properties": {
"average_sentence_length_words": {"type": "number"},
"preferred_sentence_type": {"type": "string"},
"active_to_passive_ratio": {"type": "string"},
"complexity_level": {"type": "string"}
}
},
"lexical_features": {
"type": "object",
"properties": {
"go_to_words": {"type": "array", "items": {"type": "string"}},
"go_to_phrases": {"type": "array", "items": {"type": "string"}},
"avoid_words": {"type": "array", "items": {"type": "string"}},
"contractions": {"type": "string"},
"filler_words": {"type": "string"},
"vocabulary_level": {"type": "string"}
}
},
"rhetorical_devices": {
"type": "object",
"properties": {
"metaphors": {"type": "string"},
"analogies": {"type": "string"},
"rhetorical_questions": {"type": "string"},
"storytelling_style": {"type": "string"}
}
}
}
},
"tonal_range": {
"type": "object",
"properties": {
"default_tone": {"type": "string"},
"permissible_tones": {"type": "array", "items": {"type": "string"}},
"forbidden_tones": {"type": "array", "items": {"type": "string"}},
"emotional_range": {"type": "string"}
}
},
"stylistic_constraints": {
"type": "object",
"properties": {
"punctuation": {
"type": "object",
"properties": {
"ellipses": {"type": "string"},
"em_dash": {"type": "string"},
"exclamation_points": {"type": "string"}
}
},
"formatting": {
"type": "object",
"properties": {
"paragraphs": {"type": "string"},
"lists": {"type": "string"},
"markdown": {"type": "string"}
}
}
}
},
"confidence_score": {"type": "number"},
"analysis_notes": {"type": "string"}
},
"required": ["identity", "linguistic_fingerprint", "tonal_range", "confidence_score"]
}
try:
# Generate structured response using Gemini
response = gemini_structured_json_response(
prompt=prompt,
schema=persona_schema,
temperature=0.2, # Low temperature for consistent analysis
max_tokens=8192,
system_prompt="You are an expert writing style analyst and persona developer. Analyze the provided data to create a precise, actionable writing persona."
)
if "error" in response:
logger.error(f"Gemini API error: {response['error']}")
return {"error": f"AI analysis failed: {response['error']}"}
logger.info("✅ Core persona generated successfully")
return response
except Exception as e:
logger.error(f"Error generating core persona: {str(e)}")
return {"error": f"Failed to generate core persona: {str(e)}"}
def _generate_platform_adaptations(self, core_persona: Dict[str, Any], onboarding_data: Dict[str, Any]) -> Dict[str, Any]:
"""Generate platform-specific persona adaptations."""
platforms = ["twitter", "linkedin", "instagram", "facebook", "blog", "medium", "substack"]
platform_personas = {}
for platform in platforms:
try:
platform_persona = self._generate_single_platform_persona(core_persona, platform, onboarding_data)
if "error" not in platform_persona:
platform_personas[platform] = platform_persona
else:
logger.warning(f"Failed to generate {platform} persona: {platform_persona['error']}")
except Exception as e:
logger.error(f"Error generating {platform} persona: {str(e)}")
return platform_personas
def _generate_single_platform_persona(self, core_persona: Dict[str, Any], platform: str, onboarding_data: Dict[str, Any]) -> Dict[str, Any]:
"""Generate persona adaptation for a specific platform."""
prompt = self._build_platform_adaptation_prompt(core_persona, platform, onboarding_data)
# Platform-specific schema
platform_schema = {
"type": "object",
"properties": {
"platform_type": {"type": "string"},
"sentence_metrics": {
"type": "object",
"properties": {
"max_sentence_length": {"type": "number"},
"optimal_sentence_length": {"type": "number"},
"sentence_variety": {"type": "string"}
}
},
"lexical_adaptations": {
"type": "object",
"properties": {
"platform_specific_words": {"type": "array", "items": {"type": "string"}},
"hashtag_strategy": {"type": "string"},
"emoji_usage": {"type": "string"},
"mention_strategy": {"type": "string"}
}
},
"content_format_rules": {
"type": "object",
"properties": {
"character_limit": {"type": "number"},
"paragraph_structure": {"type": "string"},
"call_to_action_style": {"type": "string"},
"link_placement": {"type": "string"}
}
},
"engagement_patterns": {
"type": "object",
"properties": {
"posting_frequency": {"type": "string"},
"optimal_posting_times": {"type": "array", "items": {"type": "string"}},
"engagement_tactics": {"type": "array", "items": {"type": "string"}},
"community_interaction": {"type": "string"}
}
},
"platform_best_practices": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["platform_type", "sentence_metrics", "content_format_rules", "engagement_patterns"]
}
try:
response = gemini_structured_json_response(
prompt=prompt,
schema=platform_schema,
temperature=0.2,
max_tokens=4096,
system_prompt=f"You are an expert in {platform} content strategy and platform-specific writing optimization."
)
return response
except Exception as e:
logger.error(f"Error generating {platform} persona: {str(e)}")
return {"error": f"Failed to generate {platform} persona: {str(e)}"}
def _build_persona_analysis_prompt(self, onboarding_data: Dict[str, Any]) -> str:
"""Build the main persona analysis prompt."""
website_analysis = onboarding_data.get("website_analysis", {})
research_prefs = onboarding_data.get("research_preferences", {})
prompt = f"""
PERSONA GENERATION TASK: Create a comprehensive writing persona based on user onboarding data.
ONBOARDING DATA ANALYSIS:
Website Analysis:
- URL: {website_analysis.get('website_url', 'Not provided')}
- Writing Style: {json.dumps(website_analysis.get('writing_style', {}), indent=2)}
- Content Characteristics: {json.dumps(website_analysis.get('content_characteristics', {}), indent=2)}
- Target Audience: {json.dumps(website_analysis.get('target_audience', {}), indent=2)}
- Content Type: {json.dumps(website_analysis.get('content_type', {}), indent=2)}
- Style Patterns: {json.dumps(website_analysis.get('style_patterns', {}), indent=2)}
Research Preferences:
- Research Depth: {research_prefs.get('research_depth', 'Not set')}
- Content Types: {research_prefs.get('content_types', [])}
- Auto Research: {research_prefs.get('auto_research', False)}
- Factual Content: {research_prefs.get('factual_content', False)}
PERSONA GENERATION REQUIREMENTS:
1. IDENTITY CREATION:
- Create a memorable persona name that captures the essence of the writing style
- Define a clear archetype (e.g., "The Pragmatic Futurist", "The Thoughtful Educator")
- Articulate a core belief that drives the writing philosophy
- Write a comprehensive brand voice description
2. LINGUISTIC FINGERPRINT (Quantitative Analysis):
- Calculate average sentence length based on website analysis
- Determine preferred sentence types (simple, compound, complex)
- Analyze active vs passive voice ratio
- Identify go-to words and phrases from the content analysis
- List words and phrases to avoid
- Determine contraction usage patterns
- Assess vocabulary complexity level
3. RHETORICAL ANALYSIS:
- Identify metaphor patterns and themes
- Analyze analogy usage
- Assess rhetorical question frequency and style
- Determine storytelling approach
4. TONAL RANGE:
- Define the default tone
- List permissible tones for different contexts
- Identify forbidden tones that don't match the brand
- Describe emotional range and expression
5. STYLISTIC CONSTRAINTS:
- Define punctuation preferences and rules
- Set formatting guidelines
- Establish paragraph structure preferences
ANALYSIS INSTRUCTIONS:
- Base your analysis on the actual data provided from the website analysis
- If data is limited, make reasonable inferences but note the confidence level
- Ensure the persona is actionable and specific enough for AI content generation
- Provide a confidence score (0-100) based on data availability and quality
- Include analysis notes explaining your reasoning
Generate a comprehensive persona profile that can be used to replicate this writing style across different platforms.
"""
return prompt
def _build_platform_adaptation_prompt(self, core_persona: Dict[str, Any], platform: str, onboarding_data: Dict[str, Any]) -> str:
"""Build prompt for platform-specific persona adaptation."""
platform_constraints = self._get_platform_constraints(platform)
prompt = f"""
PLATFORM ADAPTATION TASK: Adapt the core writing persona for {platform.upper()}.
CORE PERSONA:
{json.dumps(core_persona, indent=2)}
PLATFORM: {platform.upper()}
PLATFORM CONSTRAINTS:
{json.dumps(platform_constraints, indent=2)}
ADAPTATION REQUIREMENTS:
1. SENTENCE METRICS:
- Adjust sentence length for platform optimal performance
- Adapt sentence variety for platform engagement
- Consider platform reading patterns
2. LEXICAL ADAPTATIONS:
- Identify platform-specific vocabulary and slang
- Define hashtag strategy (if applicable)
- Set emoji usage guidelines
- Establish mention and tagging strategy
3. CONTENT FORMAT RULES:
- Respect character/word limits
- Optimize paragraph structure for platform
- Define call-to-action style
- Set link placement strategy
4. ENGAGEMENT PATTERNS:
- Determine optimal posting frequency
- Identify best posting times for audience
- Define engagement tactics
- Set community interaction guidelines
5. PLATFORM BEST PRACTICES:
- List platform-specific optimization techniques
- Consider algorithm preferences
- Include trending format adaptations
INSTRUCTIONS:
- Maintain the core persona identity while optimizing for platform performance
- Ensure all adaptations align with the original brand voice
- Consider platform-specific audience behavior
- Provide actionable, specific guidelines
Generate a platform-optimized persona adaptation that maintains brand consistency while maximizing platform performance.
"""
return prompt
def _get_platform_constraints(self, platform: str) -> Dict[str, Any]:
"""Get platform-specific constraints and best practices."""
constraints = {
"twitter": {
"character_limit": 280,
"optimal_length": "120-150 characters",
"hashtag_limit": 3,
"image_support": True,
"thread_support": True,
"link_shortening": True
},
"linkedin": {
"character_limit": 3000,
"optimal_length": "150-300 words",
"professional_tone": True,
"hashtag_limit": 5,
"rich_media": True,
"long_form": True
},
"instagram": {
"caption_limit": 2200,
"optimal_length": "125-150 words",
"hashtag_limit": 30,
"visual_first": True,
"story_support": True,
"emoji_friendly": True
},
"facebook": {
"character_limit": 63206,
"optimal_length": "40-80 words",
"algorithm_favors": "engagement",
"link_preview": True,
"event_support": True,
"group_sharing": True
},
"blog": {
"word_count": "800-2000 words",
"seo_important": True,
"header_structure": True,
"internal_linking": True,
"meta_descriptions": True,
"readability_score": True
},
"medium": {
"word_count": "1000-3000 words",
"storytelling_focus": True,
"subtitle_support": True,
"publication_support": True,
"clap_optimization": True,
"follower_building": True
},
"substack": {
"newsletter_format": True,
"email_optimization": True,
"subscription_focus": True,
"long_form": True,
"personal_connection": True,
"monetization_support": True
}
}
return constraints.get(platform, {})
def _save_persona_to_db(self, user_id: int, core_persona: Dict[str, Any], platform_personas: Dict[str, Any], onboarding_data: Dict[str, Any]) -> WritingPersona:
"""Save generated persona to database."""
try:
session = get_db_session()
# Create main persona record
writing_persona = WritingPersona(
user_id=user_id,
persona_name=core_persona.get("identity", {}).get("persona_name", "Generated Persona"),
archetype=core_persona.get("identity", {}).get("archetype"),
core_belief=core_persona.get("identity", {}).get("core_belief"),
brand_voice_description=core_persona.get("identity", {}).get("brand_voice_description"),
linguistic_fingerprint=core_persona.get("linguistic_fingerprint", {}),
platform_adaptations={"platforms": list(platform_personas.keys())},
onboarding_session_id=onboarding_data.get("session_info", {}).get("session_id"),
source_website_analysis=onboarding_data.get("website_analysis"),
source_research_preferences=onboarding_data.get("research_preferences"),
ai_analysis_version="gemini_v1.0",
confidence_score=core_persona.get("confidence_score", 0.0)
)
session.add(writing_persona)
session.commit()
session.refresh(writing_persona)
# Create platform-specific persona records
for platform, platform_data in platform_personas.items():
platform_persona = PlatformPersona(
writing_persona_id=writing_persona.id,
platform_type=platform,
sentence_metrics=platform_data.get("sentence_metrics", {}),
lexical_features=platform_data.get("lexical_adaptations", {}),
rhetorical_devices=core_persona.get("linguistic_fingerprint", {}).get("rhetorical_devices", {}),
tonal_range=core_persona.get("tonal_range", {}),
stylistic_constraints=core_persona.get("stylistic_constraints", {}),
content_format_rules=platform_data.get("content_format_rules", {}),
engagement_patterns=platform_data.get("engagement_patterns", {}),
platform_best_practices={"practices": platform_data.get("platform_best_practices", [])}
)
session.add(platform_persona)
# Save analysis result
analysis_result = PersonaAnalysisResult(
user_id=user_id,
writing_persona_id=writing_persona.id,
analysis_prompt=self._build_persona_analysis_prompt(onboarding_data)[:5000], # Truncate for storage
input_data=onboarding_data,
linguistic_analysis=core_persona.get("linguistic_fingerprint", {}),
personality_analysis=core_persona.get("identity", {}),
platform_recommendations=platform_personas,
style_guidelines=core_persona.get("stylistic_constraints", {}),
analysis_confidence=core_persona.get("confidence_score", 0.0),
data_sufficiency_score=self._calculate_data_sufficiency(onboarding_data),
ai_provider="gemini",
model_version="gemini-2.5-flash"
)
session.add(analysis_result)
session.commit()
session.close()
logger.info(f"✅ Persona saved to database with ID: {writing_persona.id}")
return writing_persona
except Exception as e:
logger.error(f"Error saving persona to database: {str(e)}")
if session:
session.rollback()
session.close()
raise
def _calculate_data_sufficiency(self, onboarding_data: Dict[str, Any]) -> float:
"""Calculate how sufficient the onboarding data is for persona generation."""
score = 0.0
website_analysis = onboarding_data.get("website_analysis", {})
research_prefs = onboarding_data.get("research_preferences", {})
# Website analysis components (70% of score)
if website_analysis.get("writing_style"):
score += 25
if website_analysis.get("content_characteristics"):
score += 20
if website_analysis.get("target_audience"):
score += 15
if website_analysis.get("style_patterns"):
score += 10
# Research preferences components (30% of score)
if research_prefs.get("research_depth"):
score += 10
if research_prefs.get("content_types"):
score += 10
if research_prefs.get("writing_style"):
score += 10
return min(score, 100.0)
def get_user_personas(self, user_id: int) -> List[Dict[str, Any]]:
"""Get all personas for a user."""
try:
session = get_db_session()
personas = session.query(WritingPersona).filter(
WritingPersona.user_id == user_id,
WritingPersona.is_active == True
).all()
result = []
for persona in personas:
persona_dict = persona.to_dict()
# Get platform personas
platform_personas = session.query(PlatformPersona).filter(
PlatformPersona.writing_persona_id == persona.id,
PlatformPersona.is_active == True
).all()
persona_dict["platforms"] = [pp.to_dict() for pp in platform_personas]
result.append(persona_dict)
session.close()
return result
except Exception as e:
logger.error(f"Error getting user personas: {str(e)}")
return []
def get_persona_for_platform(self, user_id: int, platform: str) -> Optional[Dict[str, Any]]:
"""Get the best persona for a specific platform."""
try:
session = get_db_session()
# Get the most recent active persona
persona = session.query(WritingPersona).filter(
WritingPersona.user_id == user_id,
WritingPersona.is_active == True
).order_by(WritingPersona.created_at.desc()).first()
if not persona:
return None
# Get platform-specific adaptation
platform_persona = session.query(PlatformPersona).filter(
PlatformPersona.writing_persona_id == persona.id,
PlatformPersona.platform_type == platform,
PlatformPersona.is_active == True
).first()
result = {
"core_persona": persona.to_dict(),
"platform_adaptation": platform_persona.to_dict() if platform_persona else None
}
session.close()
return result
except Exception as e:
logger.error(f"Error getting persona for platform {platform}: {str(e)}")
return None