ALwrity persona system

This commit is contained in:
ajaysi
2025-09-05 15:22:43 +05:30
parent ccbdc9e8c6
commit f82ada0361
38 changed files with 5673 additions and 1240 deletions

View File

@@ -0,0 +1,16 @@
"""
Core Persona Generation Module
This module contains the core persona generation logic extracted from persona_analysis_service.py
to improve maintainability and modularity.
"""
from .core_persona_service import CorePersonaService
from .data_collector import OnboardingDataCollector
from .prompt_builder import PersonaPromptBuilder
__all__ = [
'CorePersonaService',
'OnboardingDataCollector',
'PersonaPromptBuilder'
]

View File

@@ -0,0 +1,159 @@
"""
Core Persona Service
Handles the core persona generation logic using Gemini AI.
"""
from typing import Dict, Any, List
from loguru import logger
from datetime import datetime
from services.llm_providers.gemini_provider import gemini_structured_json_response
from .data_collector import OnboardingDataCollector
from .prompt_builder import PersonaPromptBuilder
from services.persona.linkedin.linkedin_persona_service import LinkedInPersonaService
class CorePersonaService:
"""Core service for generating writing personas using Gemini AI."""
def __init__(self):
"""Initialize the core persona service."""
self.data_collector = OnboardingDataCollector()
self.prompt_builder = PersonaPromptBuilder()
self.linkedin_service = LinkedInPersonaService()
logger.info("CorePersonaService initialized")
def generate_core_persona(self, onboarding_data: Dict[str, Any]) -> Dict[str, Any]:
"""Generate core writing persona using Gemini structured response."""
# Build analysis prompt
prompt = self.prompt_builder.build_persona_analysis_prompt(onboarding_data)
# Get schema for structured response
persona_schema = self.prompt_builder.get_persona_schema()
try:
# Generate structured response using Gemini
response = gemini_structured_json_response(
prompt=prompt,
schema=persona_schema,
temperature=0.2, # Low temperature for consistent analysis
max_tokens=8192,
system_prompt="You are an expert writing style analyst and persona developer. Analyze the provided data to create a precise, actionable writing persona."
)
if "error" in response:
logger.error(f"Gemini API error: {response['error']}")
return {"error": f"AI analysis failed: {response['error']}"}
logger.info("✅ Core persona generated successfully")
return response
except Exception as e:
logger.error(f"Error generating core persona: {str(e)}")
return {"error": f"Failed to generate core persona: {str(e)}"}
def generate_platform_adaptations(self, core_persona: Dict[str, Any], onboarding_data: Dict[str, Any]) -> Dict[str, Any]:
"""Generate platform-specific persona adaptations."""
platforms = ["twitter", "linkedin", "instagram", "facebook", "blog", "medium", "substack"]
platform_personas = {}
for platform in platforms:
try:
platform_persona = self._generate_single_platform_persona(core_persona, platform, onboarding_data)
if "error" not in platform_persona:
platform_personas[platform] = platform_persona
else:
logger.warning(f"Failed to generate {platform} persona: {platform_persona['error']}")
except Exception as e:
logger.error(f"Error generating {platform} persona: {str(e)}")
return platform_personas
def _generate_single_platform_persona(self, core_persona: Dict[str, Any], platform: str, onboarding_data: Dict[str, Any]) -> Dict[str, Any]:
"""Generate persona adaptation for a specific platform."""
# Use LinkedIn service for LinkedIn platform
if platform.lower() == "linkedin":
return self.linkedin_service.generate_linkedin_persona(core_persona, onboarding_data)
# Use generic platform adaptation for other platforms
platform_constraints = self._get_platform_constraints(platform)
prompt = self.prompt_builder.build_platform_adaptation_prompt(core_persona, platform, onboarding_data, platform_constraints)
# Get platform-specific schema
platform_schema = self.prompt_builder.get_platform_schema()
try:
response = gemini_structured_json_response(
prompt=prompt,
schema=platform_schema,
temperature=0.2,
max_tokens=4096,
system_prompt=f"You are an expert in {platform} content strategy and platform-specific writing optimization."
)
return response
except Exception as e:
logger.error(f"Error generating {platform} persona: {str(e)}")
return {"error": f"Failed to generate {platform} persona: {str(e)}"}
def _get_platform_constraints(self, platform: str) -> Dict[str, Any]:
"""Get platform-specific constraints and best practices."""
constraints = {
"twitter": {
"character_limit": 280,
"optimal_length": "120-150 characters",
"hashtag_limit": 3,
"image_support": True,
"thread_support": True,
"link_shortening": True
},
"linkedin": self.linkedin_service.get_linkedin_constraints(),
"instagram": {
"caption_limit": 2200,
"optimal_length": "125-150 words",
"hashtag_limit": 30,
"visual_first": True,
"story_support": True,
"emoji_friendly": True
},
"facebook": {
"character_limit": 63206,
"optimal_length": "40-80 words",
"algorithm_favors": "engagement",
"link_preview": True,
"event_support": True,
"group_sharing": True
},
"blog": {
"word_count": "800-2000 words",
"seo_important": True,
"header_structure": True,
"internal_linking": True,
"meta_descriptions": True,
"readability_score": True
},
"medium": {
"word_count": "1000-3000 words",
"storytelling_focus": True,
"subtitle_support": True,
"publication_support": True,
"clap_optimization": True,
"follower_building": True
},
"substack": {
"newsletter_format": True,
"email_optimization": True,
"subscription_focus": True,
"long_form": True,
"personal_connection": True,
"monetization_support": True
}
}
return constraints.get(platform, {})

View File

@@ -0,0 +1,306 @@
"""
Onboarding Data Collector
Handles comprehensive collection of onboarding data for persona generation.
"""
from typing import Dict, Any, List, Optional
from sqlalchemy.orm import Session
from loguru import logger
from services.database import get_db_session
from models.onboarding import OnboardingSession, WebsiteAnalysis, ResearchPreferences, APIKey
class OnboardingDataCollector:
"""Collects comprehensive onboarding data for persona analysis."""
def collect_onboarding_data(self, user_id: int, session_id: int = None) -> Optional[Dict[str, Any]]:
"""Collect comprehensive onboarding data for persona analysis."""
try:
session = get_db_session()
# Find onboarding session
if session_id:
onboarding_session = session.query(OnboardingSession).filter(
OnboardingSession.id == session_id,
OnboardingSession.user_id == user_id
).first()
else:
onboarding_session = session.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not onboarding_session:
return None
# Get ALL website analyses (there might be multiple)
website_analyses = session.query(WebsiteAnalysis).filter(
WebsiteAnalysis.session_id == onboarding_session.id
).order_by(WebsiteAnalysis.updated_at.desc()).all()
# Get research preferences
research_prefs = session.query(ResearchPreferences).filter(
ResearchPreferences.session_id == onboarding_session.id
).first()
# Get API keys
api_keys = session.query(APIKey).filter(
APIKey.session_id == onboarding_session.id
).all()
# Compile comprehensive data with ALL available information
onboarding_data = {
"session_info": {
"session_id": onboarding_session.id,
"user_id": onboarding_session.user_id,
"current_step": onboarding_session.current_step,
"progress": onboarding_session.progress,
"started_at": onboarding_session.started_at.isoformat() if onboarding_session.started_at else None,
"updated_at": onboarding_session.updated_at.isoformat() if onboarding_session.updated_at else None
},
"api_keys": [key.to_dict() for key in api_keys] if api_keys else [],
"website_analyses": [analysis.to_dict() for analysis in website_analyses] if website_analyses else [],
"research_preferences": research_prefs.to_dict() if research_prefs else None,
# Legacy compatibility - use the latest website analysis
"website_analysis": website_analyses[0].to_dict() if website_analyses else None,
# Enhanced data extraction for persona generation
"enhanced_analysis": self._extract_enhanced_analysis_data(website_analyses, research_prefs)
}
session.close()
return onboarding_data
except Exception as e:
logger.error(f"Error collecting onboarding data: {str(e)}")
return None
def _extract_enhanced_analysis_data(self, website_analyses: List, research_prefs) -> Dict[str, Any]:
"""Extract and structure all the rich AI analysis data for persona generation."""
enhanced_data = {
"comprehensive_style_analysis": {},
"content_insights": {},
"audience_intelligence": {},
"brand_voice_analysis": {},
"technical_writing_metrics": {},
"competitive_analysis": {},
"content_strategy_insights": {}
}
if not website_analyses:
return enhanced_data
# Use the latest (most comprehensive) website analysis
latest_analysis = website_analyses[0]
# Extract comprehensive style analysis
if latest_analysis.writing_style:
enhanced_data["comprehensive_style_analysis"] = {
"tone_analysis": latest_analysis.writing_style.get("tone", ""),
"voice_characteristics": latest_analysis.writing_style.get("voice", ""),
"complexity_assessment": latest_analysis.writing_style.get("complexity", ""),
"engagement_level": latest_analysis.writing_style.get("engagement_level", ""),
"brand_personality": latest_analysis.writing_style.get("brand_personality", ""),
"formality_level": latest_analysis.writing_style.get("formality_level", ""),
"emotional_appeal": latest_analysis.writing_style.get("emotional_appeal", "")
}
# Extract content insights
if latest_analysis.content_characteristics:
enhanced_data["content_insights"] = {
"sentence_structure_analysis": latest_analysis.content_characteristics.get("sentence_structure", ""),
"vocabulary_level": latest_analysis.content_characteristics.get("vocabulary_level", ""),
"paragraph_organization": latest_analysis.content_characteristics.get("paragraph_organization", ""),
"content_flow": latest_analysis.content_characteristics.get("content_flow", ""),
"readability_score": latest_analysis.content_characteristics.get("readability_score", ""),
"content_density": latest_analysis.content_characteristics.get("content_density", ""),
"visual_elements_usage": latest_analysis.content_characteristics.get("visual_elements_usage", "")
}
# Extract audience intelligence
if latest_analysis.target_audience:
enhanced_data["audience_intelligence"] = {
"demographics": latest_analysis.target_audience.get("demographics", []),
"expertise_level": latest_analysis.target_audience.get("expertise_level", ""),
"industry_focus": latest_analysis.target_audience.get("industry_focus", ""),
"geographic_focus": latest_analysis.target_audience.get("geographic_focus", ""),
"psychographic_profile": latest_analysis.target_audience.get("psychographic_profile", ""),
"pain_points": latest_analysis.target_audience.get("pain_points", []),
"motivations": latest_analysis.target_audience.get("motivations", [])
}
# Extract brand voice analysis
if latest_analysis.content_type:
enhanced_data["brand_voice_analysis"] = {
"primary_content_type": latest_analysis.content_type.get("primary_type", ""),
"secondary_content_types": latest_analysis.content_type.get("secondary_types", []),
"content_purpose": latest_analysis.content_type.get("purpose", ""),
"call_to_action_style": latest_analysis.content_type.get("call_to_action", ""),
"conversion_focus": latest_analysis.content_type.get("conversion_focus", ""),
"educational_value": latest_analysis.content_type.get("educational_value", "")
}
# Extract technical writing metrics
if latest_analysis.style_patterns:
enhanced_data["technical_writing_metrics"] = {
"sentence_length_preference": latest_analysis.style_patterns.get("patterns", {}).get("sentence_length", ""),
"vocabulary_patterns": latest_analysis.style_patterns.get("patterns", {}).get("vocabulary_patterns", []),
"rhetorical_devices": latest_analysis.style_patterns.get("patterns", {}).get("rhetorical_devices", []),
"paragraph_structure": latest_analysis.style_patterns.get("patterns", {}).get("paragraph_structure", ""),
"transition_phrases": latest_analysis.style_patterns.get("patterns", {}).get("transition_phrases", []),
"style_consistency": latest_analysis.style_patterns.get("style_consistency", ""),
"unique_elements": latest_analysis.style_patterns.get("unique_elements", [])
}
# Extract competitive analysis from crawl results
if latest_analysis.crawl_result:
crawl_data = latest_analysis.crawl_result
enhanced_data["competitive_analysis"] = {
"domain_info": crawl_data.get("domain_info", {}),
"social_media_presence": crawl_data.get("social_media", {}),
"brand_info": crawl_data.get("brand_info", {}),
"content_structure": crawl_data.get("content_structure", {}),
"meta_optimization": crawl_data.get("meta_tags", {})
}
# Extract content strategy insights from style guidelines
if latest_analysis.style_guidelines:
guidelines = latest_analysis.style_guidelines
enhanced_data["content_strategy_insights"] = {
"tone_recommendations": guidelines.get("guidelines", {}).get("tone_recommendations", []),
"structure_guidelines": guidelines.get("guidelines", {}).get("structure_guidelines", []),
"vocabulary_suggestions": guidelines.get("guidelines", {}).get("vocabulary_suggestions", []),
"engagement_tips": guidelines.get("guidelines", {}).get("engagement_tips", []),
"audience_considerations": guidelines.get("guidelines", {}).get("audience_considerations", []),
"brand_alignment": guidelines.get("guidelines", {}).get("brand_alignment", []),
"seo_optimization": guidelines.get("guidelines", {}).get("seo_optimization", []),
"conversion_optimization": guidelines.get("guidelines", {}).get("conversion_optimization", []),
"best_practices": guidelines.get("best_practices", []),
"avoid_elements": guidelines.get("avoid_elements", []),
"content_strategy": guidelines.get("content_strategy", ""),
"ai_generation_tips": guidelines.get("ai_generation_tips", []),
"competitive_advantages": guidelines.get("competitive_advantages", []),
"content_calendar_suggestions": guidelines.get("content_calendar_suggestions", [])
}
# Add research preferences insights
if research_prefs:
enhanced_data["research_preferences"] = {
"research_depth": research_prefs.research_depth,
"content_types": research_prefs.content_types,
"auto_research": research_prefs.auto_research,
"factual_content": research_prefs.factual_content
}
return enhanced_data
def calculate_data_sufficiency(self, onboarding_data: Dict[str, Any]) -> float:
"""Calculate how sufficient the onboarding data is for persona generation."""
score = 0.0
# Get enhanced analysis data
enhanced_analysis = onboarding_data.get("enhanced_analysis", {})
website_analysis = onboarding_data.get("website_analysis", {}) or {}
research_prefs = onboarding_data.get("research_preferences", {}) or {}
# Enhanced scoring based on comprehensive data availability
# Comprehensive Style Analysis (25% of score)
style_analysis = enhanced_analysis.get("comprehensive_style_analysis", {})
if style_analysis.get("tone_analysis"):
score += 5
if style_analysis.get("voice_characteristics"):
score += 5
if style_analysis.get("brand_personality"):
score += 5
if style_analysis.get("formality_level"):
score += 5
if style_analysis.get("emotional_appeal"):
score += 5
# Content Insights (20% of score)
content_insights = enhanced_analysis.get("content_insights", {})
if content_insights.get("sentence_structure_analysis"):
score += 4
if content_insights.get("vocabulary_level"):
score += 4
if content_insights.get("readability_score"):
score += 4
if content_insights.get("content_flow"):
score += 4
if content_insights.get("visual_elements_usage"):
score += 4
# Audience Intelligence (15% of score)
audience_intel = enhanced_analysis.get("audience_intelligence", {})
if audience_intel.get("demographics"):
score += 3
if audience_intel.get("expertise_level"):
score += 3
if audience_intel.get("industry_focus"):
score += 3
if audience_intel.get("psychographic_profile"):
score += 3
if audience_intel.get("pain_points"):
score += 3
# Technical Writing Metrics (15% of score)
tech_metrics = enhanced_analysis.get("technical_writing_metrics", {})
if tech_metrics.get("vocabulary_patterns"):
score += 3
if tech_metrics.get("rhetorical_devices"):
score += 3
if tech_metrics.get("paragraph_structure"):
score += 3
if tech_metrics.get("style_consistency"):
score += 3
if tech_metrics.get("unique_elements"):
score += 3
# Content Strategy Insights (15% of score)
strategy_insights = enhanced_analysis.get("content_strategy_insights", {})
if strategy_insights.get("tone_recommendations"):
score += 3
if strategy_insights.get("best_practices"):
score += 3
if strategy_insights.get("competitive_advantages"):
score += 3
if strategy_insights.get("content_strategy"):
score += 3
if strategy_insights.get("ai_generation_tips"):
score += 3
# Research Preferences (10% of score)
if research_prefs.get("research_depth"):
score += 5
if research_prefs.get("content_types"):
score += 5
# Legacy compatibility - add points for basic data if enhanced data is missing
if score < 50: # If enhanced data is insufficient, fall back to legacy scoring
legacy_score = 0.0
# Website analysis components (70% of legacy score)
if website_analysis.get("writing_style"):
legacy_score += 25
if website_analysis.get("content_characteristics"):
legacy_score += 20
if website_analysis.get("target_audience"):
legacy_score += 15
if website_analysis.get("style_patterns"):
legacy_score += 10
# Research preferences components (30% of legacy score)
if research_prefs.get("research_depth"):
legacy_score += 10
if research_prefs.get("content_types"):
legacy_score += 10
if research_prefs.get("writing_style"):
legacy_score += 10
# Use the higher of enhanced or legacy score
score = max(score, legacy_score)
return min(score, 100.0)

View File

@@ -0,0 +1,313 @@
"""
Persona Prompt Builder
Handles building comprehensive prompts for persona generation.
"""
from typing import Dict, Any
import json
from loguru import logger
class PersonaPromptBuilder:
"""Builds comprehensive prompts for persona generation."""
def build_persona_analysis_prompt(self, onboarding_data: Dict[str, Any]) -> str:
"""Build the main persona analysis prompt with comprehensive data."""
# Get enhanced analysis data
enhanced_analysis = onboarding_data.get("enhanced_analysis", {})
website_analysis = onboarding_data.get("website_analysis", {}) or {}
research_prefs = onboarding_data.get("research_preferences", {}) or {}
prompt = f"""
COMPREHENSIVE PERSONA GENERATION TASK: Create a highly detailed, data-driven writing persona based on extensive AI analysis of user's website and content strategy.
=== COMPREHENSIVE ONBOARDING DATA ANALYSIS ===
WEBSITE ANALYSIS OVERVIEW:
- URL: {website_analysis.get('website_url', 'Not provided')}
- Analysis Date: {website_analysis.get('analysis_date', 'Not provided')}
- Status: {website_analysis.get('status', 'Not provided')}
=== DETAILED STYLE ANALYSIS ===
{json.dumps(enhanced_analysis.get('comprehensive_style_analysis', {}), indent=2)}
=== CONTENT INSIGHTS ===
{json.dumps(enhanced_analysis.get('content_insights', {}), indent=2)}
=== AUDIENCE INTELLIGENCE ===
{json.dumps(enhanced_analysis.get('audience_intelligence', {}), indent=2)}
=== BRAND VOICE ANALYSIS ===
{json.dumps(enhanced_analysis.get('brand_voice_analysis', {}), indent=2)}
=== TECHNICAL WRITING METRICS ===
{json.dumps(enhanced_analysis.get('technical_writing_metrics', {}), indent=2)}
=== COMPETITIVE ANALYSIS ===
{json.dumps(enhanced_analysis.get('competitive_analysis', {}), indent=2)}
=== CONTENT STRATEGY INSIGHTS ===
{json.dumps(enhanced_analysis.get('content_strategy_insights', {}), indent=2)}
=== RESEARCH PREFERENCES ===
{json.dumps(enhanced_analysis.get('research_preferences', {}), indent=2)}
=== LEGACY DATA (for compatibility) ===
Website Analysis: {json.dumps(website_analysis.get('writing_style', {}), indent=2)}
Content Characteristics: {json.dumps(website_analysis.get('content_characteristics', {}) or {}, indent=2)}
Target Audience: {json.dumps(website_analysis.get('target_audience', {}), indent=2)}
Style Patterns: {json.dumps(website_analysis.get('style_patterns', {}), indent=2)}
=== COMPREHENSIVE PERSONA GENERATION REQUIREMENTS ===
1. IDENTITY CREATION (Based on Brand Analysis):
- Create a memorable persona name that captures the essence of the brand personality and writing style
- Define a clear archetype that reflects the brand's positioning and audience appeal
- Articulate a core belief that drives the writing philosophy and brand values
- Write a comprehensive brand voice description incorporating all style elements
2. LINGUISTIC FINGERPRINT (Quantitative Analysis from Technical Metrics):
- Calculate precise average sentence length from sentence structure analysis
- Determine preferred sentence types based on paragraph organization patterns
- Analyze active vs passive voice ratio from voice characteristics
- Extract go-to words and phrases from vocabulary patterns and style analysis
- List words and phrases to avoid based on brand alignment guidelines
- Determine contraction usage patterns from formality level
- Assess vocabulary complexity level from readability scores
3. RHETORICAL ANALYSIS (From Style Patterns):
- Identify metaphor patterns and themes from rhetorical devices
- Analyze analogy usage from content strategy insights
- Assess rhetorical question frequency from engagement tips
- Determine storytelling approach from content flow analysis
4. TONAL RANGE (From Comprehensive Style Analysis):
- Define the default tone from tone analysis and brand personality
- List permissible tones based on emotional appeal and audience considerations
- Identify forbidden tones from avoid elements and brand alignment
- Describe emotional range from psychographic profile and engagement level
5. STYLISTIC CONSTRAINTS (From Technical Writing Metrics):
- Define punctuation preferences from paragraph structure analysis
- Set formatting guidelines from content structure insights
- Establish paragraph structure preferences from organization patterns
- Include transition phrase preferences from style patterns
6. PLATFORM-SPECIFIC ADAPTATIONS (From Content Strategy):
- Incorporate SEO optimization strategies
- Include conversion optimization techniques
- Apply engagement tips for different platforms
- Use competitive advantages for differentiation
7. CONTENT STRATEGY INTEGRATION:
- Incorporate best practices from content strategy insights
- Include AI generation tips for consistent output
- Apply content calendar suggestions for timing
- Use competitive advantages for positioning
=== ENHANCED ANALYSIS INSTRUCTIONS ===
- Base your analysis on ALL the comprehensive data provided above
- Use the detailed technical metrics for precise linguistic analysis
- Incorporate brand voice analysis for authentic personality
- Apply audience intelligence for targeted communication
- Include competitive analysis for market positioning
- Use content strategy insights for practical application
- Ensure the persona reflects the brand's unique elements and competitive advantages
- Provide a confidence score (0-100) based on data richness and quality
- Include detailed analysis notes explaining your reasoning and data sources
Generate a comprehensive, data-driven persona profile that can be used to replicate this writing style across different platforms while maintaining brand authenticity and competitive positioning.
"""
return prompt
def build_platform_adaptation_prompt(self, core_persona: Dict[str, Any], platform: str, onboarding_data: Dict[str, Any], platform_constraints: Dict[str, Any]) -> str:
"""Build prompt for platform-specific persona adaptation."""
prompt = f"""
PLATFORM ADAPTATION TASK: Adapt the core writing persona for {platform.upper()}.
CORE PERSONA:
{json.dumps(core_persona, indent=2)}
PLATFORM: {platform.upper()}
PLATFORM CONSTRAINTS:
{json.dumps(platform_constraints, indent=2)}
ADAPTATION REQUIREMENTS:
1. SENTENCE METRICS:
- Adjust sentence length for platform optimal performance
- Adapt sentence variety for platform engagement
- Consider platform reading patterns
2. LEXICAL ADAPTATIONS:
- Identify platform-specific vocabulary and slang
- Define hashtag strategy (if applicable)
- Set emoji usage guidelines
- Establish mention and tagging strategy
3. CONTENT FORMAT RULES:
- Respect character/word limits
- Optimize paragraph structure for platform
- Define call-to-action style
- Set link placement strategy
4. ENGAGEMENT PATTERNS:
- Determine optimal posting frequency
- Identify best posting times for audience
- Define engagement tactics
- Set community interaction guidelines
5. PLATFORM BEST PRACTICES:
- List platform-specific optimization techniques
- Consider algorithm preferences
- Include trending format adaptations
INSTRUCTIONS:
- Maintain the core persona identity while optimizing for platform performance
- Ensure all adaptations align with the original brand voice
- Consider platform-specific audience behavior
- Provide actionable, specific guidelines
Generate a platform-optimized persona adaptation that maintains brand consistency while maximizing platform performance.
"""
return prompt
def get_persona_schema(self) -> Dict[str, Any]:
"""Get the schema for core persona generation."""
return {
"type": "object",
"properties": {
"identity": {
"type": "object",
"properties": {
"persona_name": {"type": "string"},
"archetype": {"type": "string"},
"core_belief": {"type": "string"},
"brand_voice_description": {"type": "string"}
},
"required": ["persona_name", "archetype", "core_belief"]
},
"linguistic_fingerprint": {
"type": "object",
"properties": {
"sentence_metrics": {
"type": "object",
"properties": {
"average_sentence_length_words": {"type": "number"},
"preferred_sentence_type": {"type": "string"},
"active_to_passive_ratio": {"type": "string"},
"complexity_level": {"type": "string"}
}
},
"lexical_features": {
"type": "object",
"properties": {
"go_to_words": {"type": "array", "items": {"type": "string"}},
"go_to_phrases": {"type": "array", "items": {"type": "string"}},
"avoid_words": {"type": "array", "items": {"type": "string"}},
"contractions": {"type": "string"},
"filler_words": {"type": "string"},
"vocabulary_level": {"type": "string"}
}
},
"rhetorical_devices": {
"type": "object",
"properties": {
"metaphors": {"type": "string"},
"analogies": {"type": "string"},
"rhetorical_questions": {"type": "string"},
"storytelling_style": {"type": "string"}
}
}
}
},
"tonal_range": {
"type": "object",
"properties": {
"default_tone": {"type": "string"},
"permissible_tones": {"type": "array", "items": {"type": "string"}},
"forbidden_tones": {"type": "array", "items": {"type": "string"}},
"emotional_range": {"type": "string"}
}
},
"stylistic_constraints": {
"type": "object",
"properties": {
"punctuation": {
"type": "object",
"properties": {
"ellipses": {"type": "string"},
"em_dash": {"type": "string"},
"exclamation_points": {"type": "string"}
}
},
"formatting": {
"type": "object",
"properties": {
"paragraphs": {"type": "string"},
"lists": {"type": "string"},
"markdown": {"type": "string"}
}
}
}
},
"confidence_score": {"type": "number"},
"analysis_notes": {"type": "string"}
},
"required": ["identity", "linguistic_fingerprint", "tonal_range", "confidence_score"]
}
def get_platform_schema(self) -> Dict[str, Any]:
"""Get the schema for platform-specific persona adaptation."""
return {
"type": "object",
"properties": {
"platform_type": {"type": "string"},
"sentence_metrics": {
"type": "object",
"properties": {
"max_sentence_length": {"type": "number"},
"optimal_sentence_length": {"type": "number"},
"sentence_variety": {"type": "string"}
}
},
"lexical_adaptations": {
"type": "object",
"properties": {
"platform_specific_words": {"type": "array", "items": {"type": "string"}},
"hashtag_strategy": {"type": "string"},
"emoji_usage": {"type": "string"},
"mention_strategy": {"type": "string"}
}
},
"content_format_rules": {
"type": "object",
"properties": {
"character_limit": {"type": "number"},
"paragraph_structure": {"type": "string"},
"call_to_action_style": {"type": "string"},
"link_placement": {"type": "string"}
}
},
"engagement_patterns": {
"type": "object",
"properties": {
"posting_frequency": {"type": "string"},
"optimal_posting_times": {"type": "array", "items": {"type": "string"}},
"engagement_tactics": {"type": "array", "items": {"type": "string"}},
"community_interaction": {"type": "string"}
}
},
"platform_best_practices": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["platform_type", "sentence_metrics", "content_format_rules", "engagement_patterns"]
}