Files
moreminimore-marketing/backend/services/persona_replication_engine.py
Kunthawat Greethong c35fa52117 Base code
2026-01-08 22:39:53 +07:00

506 lines
25 KiB
Python

"""
Persona Replication Engine
Implements the hardened persona replication system for high-fidelity content generation.
Based on quantitative analysis and structured constraints.
"""
from typing import Dict, Any, List, Optional
from loguru import logger
import json
from services.llm_providers.gemini_provider import gemini_structured_json_response
from services.persona_analysis_service import PersonaAnalysisService
class PersonaReplicationEngine:
"""
High-fidelity persona replication engine that generates content
indistinguishable from the original author's work.
"""
def __init__(self):
"""Initialize the persona replication engine."""
self.persona_service = PersonaAnalysisService()
logger.info("PersonaReplicationEngine initialized")
def generate_content_with_persona(self,
user_id: int,
platform: str,
content_request: str,
content_type: str = "post") -> Dict[str, Any]:
"""
Generate content using the hardened persona replication system.
Args:
user_id: User ID for persona lookup
platform: Target platform (twitter, linkedin, blog, etc.)
content_request: What content to generate
content_type: Type of content (post, article, thread, etc.)
Returns:
Generated content with persona fidelity metrics
"""
try:
logger.info(f"Generating {content_type} for {platform} using persona replication")
# Get platform-specific persona
persona_data = self.persona_service.get_persona_for_platform(user_id, platform)
if not persona_data:
return {"error": "No persona found for user and platform"}
# Build hardened system prompt
system_prompt = self._build_hardened_system_prompt(persona_data, platform)
# Build content generation prompt
content_prompt = self._build_content_prompt(content_request, content_type, platform, persona_data)
# Generate content with strict persona constraints
content_result = self._generate_constrained_content(
system_prompt, content_prompt, platform, persona_data
)
if "error" in content_result:
return content_result
# Validate content against persona
validation_result = self._validate_content_fidelity(
content_result["content"], persona_data, platform
)
return {
"content": content_result["content"],
"persona_fidelity_score": validation_result["fidelity_score"],
"platform_optimization_score": validation_result["platform_score"],
"persona_compliance": validation_result["compliance_check"],
"generation_metadata": {
"persona_id": persona_data["core_persona"]["id"],
"platform": platform,
"content_type": content_type,
"generated_at": content_result.get("generated_at"),
"constraints_applied": validation_result["constraints_checked"]
}
}
except Exception as e:
logger.error(f"Error in persona replication engine: {str(e)}")
return {"error": f"Content generation failed: {str(e)}"}
def _build_hardened_system_prompt(self, persona_data: Dict[str, Any], platform: str) -> str:
"""Build the hardened system prompt for persona replication."""
core_persona = persona_data["core_persona"]
platform_adaptation = persona_data.get("platform_adaptation", {})
# Extract key persona elements
identity = core_persona.get("linguistic_fingerprint", {})
sentence_metrics = identity.get("sentence_metrics", {})
lexical_features = identity.get("lexical_features", {})
rhetorical_devices = identity.get("rhetorical_devices", {})
tonal_range = core_persona.get("tonal_range", {})
# Platform-specific constraints
platform_constraints = platform_adaptation.get("content_format_rules", {})
engagement_patterns = platform_adaptation.get("engagement_patterns", {})
system_prompt = f"""# COMMAND PROTOCOL: PERSONA REPLICATION ENGINE
# MODEL: [GEMINI-2.5-FLASH]
# PERSONA: [{core_persona.get('persona_name', 'Generated Persona')}]
# PLATFORM: [{platform.upper()}]
# MODE: STRICT MIMICRY
## PRIMARY DIRECTIVE:
You are now {core_persona.get('persona_name', 'the generated persona')}. Your sole function is to generate {platform} content that is linguistically indistinguishable from the authentic writing of this persona. You must output content that passes stylometric analysis as their work.
## PERSONA PROFILE (IMMUTABLE):
- **Identity:** {core_persona.get('archetype', 'Professional Writer')}. Core belief: {core_persona.get('core_belief', 'Quality content drives engagement')}.
- **Tone:** {tonal_range.get('default_tone', 'professional')}. Permissible tones: {', '.join(tonal_range.get('permissible_tones', []))}.
- **Style:** Average sentence length: {sentence_metrics.get('average_sentence_length_words', 15)} words. Preferred type: {sentence_metrics.get('preferred_sentence_type', 'simple_and_compound')}. Active voice ratio: {sentence_metrics.get('active_to_passive_ratio', '80:20')}.
- **Lexical Command:**
- USE: {', '.join(lexical_features.get('go_to_words', [])[:5])}
- PHRASES: {', '.join(lexical_features.get('go_to_phrases', [])[:3])}
- AVOID: {', '.join(lexical_features.get('avoid_words', [])[:5])}
- **Rhetorical Style:** {rhetorical_devices.get('metaphors', 'minimal metaphors')}, {rhetorical_devices.get('rhetorical_questions', 'occasional questions')}.
## PLATFORM CONSTRAINTS ({platform.upper()}):
- **Format:** {self._get_platform_format_rules(platform, platform_constraints)}
- **Engagement:** {engagement_patterns.get('posting_frequency', 'regular posting')}
- **Optimization:** {self._get_platform_optimization_rules(platform)}
## OPERATIONAL PARAMETERS:
1. **Fidelity Check:** Before generating, simulate a stylometric analysis of your draft. Does it match the profile's sentence length, word choice, and rhetorical patterns? If not, revise.
2. **Platform Compliance:** Ensure content meets {platform} best practices and constraints.
3. **Error State:** If you cannot generate content that meets the Persona Profile standards, output only: "[PERSONA_VIOLATION: Cannot comply without breaking character parameters]".
4. **Output Format:** Your output must be PURE CONTENT for {platform}. No introductory clauses. No markdown unless platform supports it.
## ACKNOWLEDGEMENT:
You must silently acknowledge this protocol and begin all responses in character. No confirmation is necessary.
// END PROTOCOL"""
return system_prompt
def _build_content_prompt(self, content_request: str, content_type: str, platform: str, persona_data: Dict[str, Any]) -> str:
"""Build the content generation prompt."""
platform_adaptation = persona_data.get("platform_adaptation", {})
content_format_rules = platform_adaptation.get("content_format_rules", {})
prompt = f"""Generate a {content_type} for {platform} about: {content_request}
CONTENT REQUIREMENTS:
- Platform: {platform}
- Type: {content_type}
- Topic: {content_request}
PLATFORM SPECIFICATIONS:
- Character/Word Limit: {content_format_rules.get('character_limit', 'No limit')}
- Optimal Length: {content_format_rules.get('optimal_length', 'Platform appropriate')}
- Format Requirements: {content_format_rules.get('paragraph_structure', 'Standard')}
PERSONA COMPLIANCE:
- Must match the established linguistic fingerprint
- Must use the specified lexical features
- Must maintain the defined tonal range
- Must follow platform-specific adaptations
Generate content that is indistinguishable from the original author's work while optimized for {platform} performance."""
return prompt
def _generate_constrained_content(self, system_prompt: str, content_prompt: str, platform: str, persona_data: Dict[str, Any]) -> Dict[str, Any]:
"""Generate content with strict persona constraints."""
# Define content generation schema
content_schema = {
"type": "object",
"properties": {
"content": {"type": "string"},
"persona_compliance_check": {
"type": "object",
"properties": {
"sentence_length_check": {"type": "boolean"},
"lexical_compliance": {"type": "boolean"},
"tonal_compliance": {"type": "boolean"},
"platform_optimization": {"type": "boolean"}
}
},
"platform_specific_elements": {
"type": "object",
"properties": {
"hashtags": {"type": "array", "items": {"type": "string"}},
"mentions": {"type": "array", "items": {"type": "string"}},
"call_to_action": {"type": "string"},
"engagement_hooks": {"type": "array", "items": {"type": "string"}}
}
},
"confidence_score": {"type": "number"}
},
"required": ["content", "persona_compliance_check", "confidence_score"]
}
try:
response = gemini_structured_json_response(
prompt=content_prompt,
schema=content_schema,
temperature=0.1, # Very low temperature for consistent persona replication
max_tokens=4096,
system_prompt=system_prompt
)
if "error" in response:
return {"error": f"Content generation failed: {response['error']}"}
response["generated_at"] = logger.info("Content generated with persona constraints")
return response
except Exception as e:
logger.error(f"Error generating constrained content: {str(e)}")
return {"error": f"Content generation error: {str(e)}"}
def _validate_content_fidelity(self, content: str, persona_data: Dict[str, Any], platform: str) -> Dict[str, Any]:
"""Validate generated content against persona constraints."""
try:
# Basic validation metrics
validation_result = {
"fidelity_score": 0.0,
"platform_score": 0.0,
"compliance_check": {},
"constraints_checked": []
}
core_persona = persona_data["core_persona"]
platform_adaptation = persona_data.get("platform_adaptation", {})
# Check sentence length compliance
sentences = content.split('.')
avg_length = sum(len(s.split()) for s in sentences if s.strip()) / max(len([s for s in sentences if s.strip()]), 1)
target_length = core_persona.get("linguistic_fingerprint", {}).get("sentence_metrics", {}).get("average_sentence_length_words", 15)
length_compliance = abs(avg_length - target_length) <= 5 # Allow 5-word variance
validation_result["compliance_check"]["sentence_length"] = length_compliance
validation_result["constraints_checked"].append("sentence_length")
# Check lexical compliance
lexical_features = core_persona.get("linguistic_fingerprint", {}).get("lexical_features", {})
go_to_words = lexical_features.get("go_to_words", [])
avoid_words = lexical_features.get("avoid_words", [])
content_lower = content.lower()
uses_go_to_words = any(word.lower() in content_lower for word in go_to_words[:3])
avoids_bad_words = not any(word.lower() in content_lower for word in avoid_words)
lexical_compliance = uses_go_to_words and avoids_bad_words
validation_result["compliance_check"]["lexical_features"] = lexical_compliance
validation_result["constraints_checked"].append("lexical_features")
# Check platform constraints
platform_constraints = platform_adaptation.get("content_format_rules", {})
char_limit = platform_constraints.get("character_limit")
platform_compliance = True
if char_limit and len(content) > char_limit:
platform_compliance = False
validation_result["compliance_check"]["platform_constraints"] = platform_compliance
validation_result["constraints_checked"].append("platform_constraints")
# Calculate overall scores
compliance_checks = validation_result["compliance_check"]
fidelity_score = sum(compliance_checks.values()) / len(compliance_checks) * 100
platform_score = 100 if platform_compliance else 50 # Heavy penalty for platform violations
validation_result["fidelity_score"] = fidelity_score
validation_result["platform_score"] = platform_score
logger.info(f"Content validation: Fidelity={fidelity_score}%, Platform={platform_score}%")
return validation_result
except Exception as e:
logger.error(f"Error validating content fidelity: {str(e)}")
return {
"fidelity_score": 0.0,
"platform_score": 0.0,
"compliance_check": {"error": str(e)},
"constraints_checked": []
}
def _get_platform_format_rules(self, platform: str, constraints: Dict[str, Any]) -> str:
"""Get formatted platform rules for system prompt."""
char_limit = constraints.get("character_limit", "No limit")
optimal_length = constraints.get("optimal_length", "Platform appropriate")
return f"Character limit: {char_limit}, Optimal length: {optimal_length}"
def _get_platform_optimization_rules(self, platform: str) -> str:
"""Get platform optimization rules."""
rules = {
"twitter": "Use hashtags strategically (max 3), engage with questions, optimize for retweets",
"linkedin": "Professional tone, thought leadership focus, encourage professional discussion",
"instagram": "Visual-first approach, emoji usage, story-friendly format",
"facebook": "Community engagement, shareable content, algorithm-friendly",
"blog": "SEO-optimized, scannable format, internal linking",
"medium": "Storytelling focus, publication-ready, clap optimization",
"substack": "Newsletter format, subscriber value, email-friendly"
}
return rules.get(platform, "Platform-appropriate optimization")
def create_hardened_persona_prompt(self, persona_data: Dict[str, Any], platform: str) -> str:
"""
Create the hardened persona prompt for direct use in AI interfaces.
This is the fire-and-forget prompt that can be copied into any AI system.
"""
core_persona = persona_data["core_persona"]
platform_adaptation = persona_data.get("platform_adaptation", {})
# Extract quantitative data
linguistic = core_persona.get("linguistic_fingerprint", {})
sentence_metrics = linguistic.get("sentence_metrics", {})
lexical_features = linguistic.get("lexical_features", {})
rhetorical_devices = linguistic.get("rhetorical_devices", {})
tonal_range = core_persona.get("tonal_range", {})
hardened_prompt = f"""# COMMAND PROTOCOL: PERSONA REPLICATION ENGINE
# MODEL: [AI-MODEL]
# PERSONA: [{core_persona.get('persona_name', 'Generated Persona')}]
# PLATFORM: [{platform.upper()}]
# MODE: STRICT MIMICRY
## PRIMARY DIRECTIVE:
You are now {core_persona.get('persona_name', 'the persona')}. Your sole function is to generate {platform} content that is linguistically indistinguishable from the authentic writing of this persona. You must output content that passes stylometric analysis as their work.
## PERSONA PROFILE (IMMUTABLE):
- **Identity:** {core_persona.get('archetype', 'Professional Writer')}. Core belief: {core_persona.get('core_belief', 'Quality content drives engagement')}.
- **Tone:** {tonal_range.get('default_tone', 'professional')}. {f"Permissible: {', '.join(tonal_range.get('permissible_tones', []))}" if tonal_range.get('permissible_tones') else ''}. {f"Forbidden: {', '.join(tonal_range.get('forbidden_tones', []))}" if tonal_range.get('forbidden_tones') else ''}.
- **Style:** Avg sentence: {sentence_metrics.get('average_sentence_length_words', 15)} words. Type: {sentence_metrics.get('preferred_sentence_type', 'simple_and_compound')}. Active voice: {sentence_metrics.get('active_to_passive_ratio', '80:20')}.
- **Lexical Command:**
- USE: {', '.join(lexical_features.get('go_to_words', [])[:5]) if lexical_features.get('go_to_words') else 'professional vocabulary'}
- PHRASES: {', '.join(lexical_features.get('go_to_phrases', [])[:3]) if lexical_features.get('go_to_phrases') else 'natural transitions'}
- AVOID: {', '.join(lexical_features.get('avoid_words', [])[:5]) if lexical_features.get('avoid_words') else 'corporate jargon'}
- **Rhetorical Style:** {rhetorical_devices.get('metaphors', 'minimal metaphors')}, {rhetorical_devices.get('rhetorical_questions', 'occasional questions')}.
## PLATFORM CONSTRAINTS ({platform.upper()}):
{self._format_platform_constraints(platform, platform_adaptation)}
## OPERATIONAL PARAMETERS:
1. **Fidelity Check:** Before generating, verify your draft matches the profile's sentence length ({sentence_metrics.get('average_sentence_length_words', 15)} words avg), word choice, and rhetorical patterns. If not, revise.
2. **Platform Compliance:** Ensure content meets {platform} format requirements and optimization rules.
3. **Error State:** If you cannot generate content meeting Persona Profile standards, output: "[PERSONA_VIOLATION: Cannot comply without breaking character parameters]".
4. **Output Format:** Generate PURE {platform.upper()} CONTENT. No introductory text. No explanations. Only the requested content.
## ACKNOWLEDGEMENT:
You must silently acknowledge this protocol and begin all responses in character. No confirmation necessary.
// END PROTOCOL
---
## USAGE INSTRUCTIONS:
1. Copy this entire prompt into your AI system's System Message/Instructions field
2. Use normal user prompts to request content (e.g., "Write a post about AI trends")
3. The AI will generate content that matches the persona's style exactly
4. No additional prompting or style instructions needed
## QUALITY ASSURANCE:
- Generated content should pass stylometric analysis as the original author
- Sentence length should average {sentence_metrics.get('average_sentence_length_words', 15)} words
- Must use specified vocabulary and avoid forbidden words
- Must maintain {tonal_range.get('default_tone', 'professional')} tone throughout
- Must comply with {platform} format and engagement requirements"""
return hardened_prompt
def _format_platform_constraints(self, platform: str, platform_adaptation: Dict[str, Any]) -> str:
"""Format platform constraints for the hardened prompt."""
content_rules = platform_adaptation.get("content_format_rules", {})
engagement = platform_adaptation.get("engagement_patterns", {})
constraints = []
if content_rules.get("character_limit"):
constraints.append(f"Character limit: {content_rules['character_limit']}")
if content_rules.get("optimal_length"):
constraints.append(f"Optimal length: {content_rules['optimal_length']}")
if engagement.get("posting_frequency"):
constraints.append(f"Frequency: {engagement['posting_frequency']}")
if platform == "twitter":
constraints.extend([
"Max 3 hashtags",
"Thread-friendly format",
"Engagement-optimized"
])
elif platform == "linkedin":
constraints.extend([
"Professional networking focus",
"Thought leadership tone",
"Business value emphasis"
])
elif platform == "blog":
constraints.extend([
"SEO-optimized structure",
"Scannable format",
"Clear headings"
])
return "- " + "\n- ".join(constraints) if constraints else "- Standard platform optimization"
def export_persona_for_external_use(self, user_id: int, platform: str) -> Dict[str, Any]:
"""
Export a complete persona package for use in external AI systems.
This creates a self-contained persona replication system.
"""
try:
# Get persona data
persona_data = self.persona_service.get_persona_for_platform(user_id, platform)
if not persona_data:
return {"error": "No persona found"}
# Create hardened prompt
hardened_prompt = self.create_hardened_persona_prompt(persona_data, platform)
# Create usage examples
examples = self._generate_usage_examples(persona_data, platform)
# Create validation checklist
validation_checklist = self._create_validation_checklist(persona_data, platform)
export_package = {
"persona_metadata": {
"persona_id": persona_data["core_persona"]["id"],
"persona_name": persona_data["core_persona"]["persona_name"],
"platform": platform,
"generated_at": datetime.utcnow().isoformat(),
"confidence_score": persona_data["core_persona"].get("confidence_score", 0.0)
},
"hardened_system_prompt": hardened_prompt,
"usage_examples": examples,
"validation_checklist": validation_checklist,
"quick_reference": {
"avg_sentence_length": persona_data["core_persona"].get("linguistic_fingerprint", {}).get("sentence_metrics", {}).get("average_sentence_length_words", 15),
"go_to_words": persona_data["core_persona"].get("linguistic_fingerprint", {}).get("lexical_features", {}).get("go_to_words", [])[:5],
"default_tone": persona_data["core_persona"].get("tonal_range", {}).get("default_tone", "professional"),
"platform_limit": persona_data.get("platform_adaptation", {}).get("content_format_rules", {}).get("character_limit", "No limit")
}
}
logger.info(f"✅ Persona export package created for {platform}")
return export_package
except Exception as e:
logger.error(f"Error exporting persona: {str(e)}")
return {"error": f"Export failed: {str(e)}"}
def _generate_usage_examples(self, persona_data: Dict[str, Any], platform: str) -> List[Dict[str, Any]]:
"""Generate usage examples for the exported persona."""
examples = [
{
"request": f"Write a {platform} post about AI trends",
"expected_style": "Should match persona's sentence length and lexical features",
"validation_points": [
"Check average sentence length",
"Verify use of go-to words",
"Confirm tonal compliance",
f"Ensure {platform} optimization"
]
},
{
"request": f"Create {platform} content about productivity tips",
"expected_style": "Should maintain consistent voice and rhetorical patterns",
"validation_points": [
"Verify rhetorical device usage",
"Check for forbidden words",
"Confirm platform constraints",
"Validate engagement elements"
]
}
]
return examples
def _create_validation_checklist(self, persona_data: Dict[str, Any], platform: str) -> List[str]:
"""Create a validation checklist for generated content."""
core_persona = persona_data["core_persona"]
linguistic = core_persona.get("linguistic_fingerprint", {})
checklist = [
f"✓ Average sentence length ~{linguistic.get('sentence_metrics', {}).get('average_sentence_length_words', 15)} words",
f"✓ Uses go-to words: {', '.join(linguistic.get('lexical_features', {}).get('go_to_words', [])[:3])}",
f"✓ Avoids forbidden words: {', '.join(linguistic.get('lexical_features', {}).get('avoid_words', [])[:3])}",
f"✓ Maintains {core_persona.get('tonal_range', {}).get('default_tone', 'professional')} tone",
f"✓ Follows {platform} format requirements",
f"✓ Includes appropriate {platform} engagement elements"
]
return checklist