Base code

This commit is contained in:
Kunthawat Greethong
2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions

View File

@@ -0,0 +1,706 @@
# ALwrity Onboarding System - API Reference
## Overview
This document provides a comprehensive API reference for the ALwrity Onboarding System. All endpoints require authentication and return JSON responses.
## 🔐 Authentication
All endpoints require a valid Clerk JWT token in the Authorization header:
```
Authorization: Bearer <clerk_jwt_token>
```
## 📋 Core Endpoints
### Onboarding Status
#### GET `/api/onboarding/status`
Get the current onboarding status for the authenticated user.
**Response:**
```json
{
"is_completed": false,
"current_step": 2,
"completion_percentage": 33.33,
"next_step": 3,
"started_at": "2024-01-15T10:30:00Z",
"completed_at": null,
"can_proceed_to_final": false
}
```
#### GET `/api/onboarding/progress`
Get the full onboarding progress data.
**Response:**
```json
{
"steps": [
{
"step_number": 1,
"title": "AI LLM Providers Setup",
"description": "Configure your AI services",
"status": "completed",
"completed_at": "2024-01-15T10:35:00Z",
"data": {...},
"validation_errors": []
}
],
"current_step": 2,
"started_at": "2024-01-15T10:30:00Z",
"last_updated": "2024-01-15T10:35:00Z",
"is_completed": false,
"completed_at": null
}
```
### Step Management
#### GET `/api/onboarding/step/{step_number}`
Get data for a specific step.
**Parameters:**
- `step_number` (int): The step number (1-6)
**Response:**
```json
{
"step_number": 1,
"title": "AI LLM Providers Setup",
"description": "Configure your AI services",
"status": "in_progress",
"completed_at": null,
"data": {...},
"validation_errors": []
}
```
#### POST `/api/onboarding/step/{step_number}/complete`
Mark a step as completed.
**Parameters:**
- `step_number` (int): The step number (1-6)
**Request Body:**
```json
{
"data": {
"api_keys": {
"gemini": "your_gemini_key",
"exa": "your_exa_key",
"copilotkit": "your_copilotkit_key"
}
},
"validation_errors": []
}
```
**Response:**
```json
{
"message": "Step 1 completed successfully",
"step_number": 1,
"data": {...}
}
```
#### POST `/api/onboarding/step/{step_number}/skip`
Skip a step (for optional steps).
**Parameters:**
- `step_number` (int): The step number (1-6)
**Response:**
```json
{
"message": "Step 2 skipped successfully",
"step_number": 2
}
```
#### GET `/api/onboarding/step/{step_number}/validate`
Validate if user can access a specific step.
**Parameters:**
- `step_number` (int): The step number (1-6)
**Response:**
```json
{
"can_proceed": true,
"validation_errors": [],
"step_status": "available"
}
```
### Onboarding Control
#### POST `/api/onboarding/start`
Start a new onboarding session.
**Response:**
```json
{
"message": "Onboarding started successfully",
"current_step": 1,
"started_at": "2024-01-15T10:30:00Z"
}
```
#### POST `/api/onboarding/reset`
Reset the onboarding progress.
**Response:**
```json
{
"message": "Onboarding progress reset successfully",
"current_step": 1,
"started_at": "2024-01-15T10:30:00Z"
}
```
#### GET `/api/onboarding/resume`
Get information for resuming onboarding.
**Response:**
```json
{
"can_resume": true,
"resume_step": 2,
"current_step": 2,
"completion_percentage": 33.33,
"started_at": "2024-01-15T10:30:00Z",
"last_updated": "2024-01-15T10:35:00Z"
}
```
#### POST `/api/onboarding/complete`
Complete the onboarding process.
**Response:**
```json
{
"message": "Onboarding completed successfully",
"completion_data": {...},
"persona_generated": true,
"environment_setup": true
}
```
## 🔑 API Key Management
### GET `/api/onboarding/api-keys`
Get all configured API keys (masked for security).
**Response:**
```json
{
"api_keys": {
"gemini": "********************abcd",
"exa": "********************efgh",
"copilotkit": "********************ijkl"
},
"total_providers": 3,
"configured_providers": ["gemini", "exa", "copilotkit"]
}
```
### POST `/api/onboarding/api-keys`
Save an API key for a provider.
**Request Body:**
```json
{
"provider": "gemini",
"api_key": "your_api_key_here",
"description": "Gemini API key for content generation"
}
```
**Response:**
```json
{
"message": "API key for gemini saved successfully",
"provider": "gemini",
"status": "saved"
}
```
### GET `/api/onboarding/api-keys/validate`
Validate all configured API keys.
**Response:**
```json
{
"validation_results": {
"gemini": {
"valid": true,
"status": "active",
"quota_remaining": 1000
},
"exa": {
"valid": true,
"status": "active",
"quota_remaining": 500
}
},
"all_valid": true,
"total_providers": 2
}
```
## ⚙️ Configuration
### GET `/api/onboarding/config`
Get onboarding configuration and requirements.
**Response:**
```json
{
"total_steps": 6,
"required_steps": [1, 2, 3, 4, 6],
"optional_steps": [5],
"step_requirements": {
"1": ["gemini", "exa", "copilotkit"],
"2": ["website_url"],
"3": ["research_preferences"],
"4": ["personalization_settings"],
"5": ["integrations"],
"6": ["persona_generation"]
}
}
```
### GET `/api/onboarding/providers`
Get setup information for all providers.
**Response:**
```json
{
"providers": {
"gemini": {
"name": "Gemini AI",
"description": "Advanced content generation",
"setup_url": "https://ai.google.dev/",
"required": true,
"validation_endpoint": "https://generativelanguage.googleapis.com/v1beta/models"
},
"exa": {
"name": "Exa AI",
"description": "Intelligent web research",
"setup_url": "https://exa.ai/",
"required": true,
"validation_endpoint": "https://api.exa.ai/v1/search"
}
}
}
```
### GET `/api/onboarding/providers/{provider}`
Get setup information for a specific provider.
**Parameters:**
- `provider` (string): Provider name (gemini, exa, copilotkit)
**Response:**
```json
{
"name": "Gemini AI",
"description": "Advanced content generation",
"setup_url": "https://ai.google.dev/",
"required": true,
"validation_endpoint": "https://generativelanguage.googleapis.com/v1beta/models",
"setup_instructions": [
"Visit Google AI Studio",
"Create a new API key",
"Copy the API key",
"Paste it in the form above"
]
}
```
### POST `/api/onboarding/providers/{provider}/validate`
Validate a specific provider's API key.
**Parameters:**
- `provider` (string): Provider name (gemini, exa, copilotkit)
**Request Body:**
```json
{
"api_key": "your_api_key_here"
}
```
**Response:**
```json
{
"valid": true,
"status": "active",
"quota_remaining": 1000,
"provider": "gemini"
}
```
## 📊 Summary & Analytics
### GET `/api/onboarding/summary`
Get comprehensive onboarding summary for the final step.
**Response:**
```json
{
"user_info": {
"user_id": "user_123",
"onboarding_started": "2024-01-15T10:30:00Z",
"current_step": 6
},
"api_keys": {
"gemini": "configured",
"exa": "configured",
"copilotkit": "configured"
},
"website_analysis": {
"url": "https://example.com",
"status": "completed",
"style_analysis": "professional",
"content_count": 25
},
"research_preferences": {
"depth": "comprehensive",
"auto_research": true,
"fact_checking": true
},
"personalization": {
"brand_voice": "professional",
"target_audience": "B2B professionals",
"content_types": ["blog_posts", "social_media"]
}
}
```
### GET `/api/onboarding/website-analysis`
Get website analysis data.
**Response:**
```json
{
"url": "https://example.com",
"analysis_status": "completed",
"content_analyzed": 25,
"style_characteristics": {
"tone": "professional",
"voice": "authoritative",
"complexity": "intermediate"
},
"target_audience": "B2B professionals",
"content_themes": ["technology", "business", "innovation"]
}
```
### GET `/api/onboarding/research-preferences`
Get research preferences data.
**Response:**
```json
{
"research_depth": "comprehensive",
"auto_research_enabled": true,
"fact_checking_enabled": true,
"content_types": ["blog_posts", "articles", "social_media"],
"research_sources": ["web", "academic", "news"]
}
```
## 👤 Business Information
### POST `/api/onboarding/business-info`
Save business information for users without websites.
**Request Body:**
```json
{
"business_name": "Acme Corp",
"industry": "Technology",
"description": "AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness"]
}
```
**Response:**
```json
{
"id": 1,
"business_name": "Acme Corp",
"industry": "Technology",
"description": "AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness"],
"created_at": "2024-01-15T10:30:00Z"
}
```
### GET `/api/onboarding/business-info/{id}`
Get business information by ID.
**Parameters:**
- `id` (int): Business information ID
**Response:**
```json
{
"id": 1,
"business_name": "Acme Corp",
"industry": "Technology",
"description": "AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness"],
"created_at": "2024-01-15T10:30:00Z",
"updated_at": "2024-01-15T10:30:00Z"
}
```
### GET `/api/onboarding/business-info/user/{user_id}`
Get business information by user ID.
**Parameters:**
- `user_id` (int): User ID
**Response:**
```json
{
"id": 1,
"business_name": "Acme Corp",
"industry": "Technology",
"description": "AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness"],
"created_at": "2024-01-15T10:30:00Z",
"updated_at": "2024-01-15T10:30:00Z"
}
```
### PUT `/api/onboarding/business-info/{id}`
Update business information.
**Parameters:**
- `id` (int): Business information ID
**Request Body:**
```json
{
"business_name": "Acme Corp Updated",
"industry": "Technology",
"description": "Updated AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness", "thought_leadership"]
}
```
**Response:**
```json
{
"id": 1,
"business_name": "Acme Corp Updated",
"industry": "Technology",
"description": "Updated AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness", "thought_leadership"],
"created_at": "2024-01-15T10:30:00Z",
"updated_at": "2024-01-15T11:00:00Z"
}
```
## 🎭 Persona Management
### GET `/api/onboarding/persona/readiness/{user_id}`
Check if user has sufficient data for persona generation.
**Parameters:**
- `user_id` (int): User ID
**Response:**
```json
{
"ready": true,
"missing_data": [],
"completion_percentage": 100,
"recommendations": []
}
```
### GET `/api/onboarding/persona/preview/{user_id}`
Generate a preview of the writing persona without saving.
**Parameters:**
- `user_id` (int): User ID
**Response:**
```json
{
"persona_preview": {
"name": "Professional Content Creator",
"voice": "authoritative",
"tone": "professional",
"style_characteristics": {
"formality": "high",
"complexity": "intermediate",
"engagement": "informative"
},
"content_preferences": {
"length": "medium",
"format": "structured",
"research_depth": "comprehensive"
}
},
"generation_time": "2.5s",
"confidence_score": 0.95
}
```
### POST `/api/onboarding/persona/generate/{user_id}`
Generate and save a writing persona from onboarding data.
**Parameters:**
- `user_id` (int): User ID
**Response:**
```json
{
"persona_id": 1,
"name": "Professional Content Creator",
"voice": "authoritative",
"tone": "professional",
"style_characteristics": {...},
"content_preferences": {...},
"created_at": "2024-01-15T10:30:00Z",
"status": "active"
}
```
### GET `/api/onboarding/persona/user/{user_id}`
Get all writing personas for the user.
**Parameters:**
- `user_id` (int): User ID
**Response:**
```json
{
"personas": [
{
"id": 1,
"name": "Professional Content Creator",
"voice": "authoritative",
"tone": "professional",
"status": "active",
"created_at": "2024-01-15T10:30:00Z"
}
],
"total_count": 1,
"active_persona": 1
}
```
## 🚨 Error Responses
### 400 Bad Request
```json
{
"detail": "Invalid request data",
"error_code": "INVALID_REQUEST",
"validation_errors": [
"Field 'api_key' is required",
"Field 'provider' must be one of: gemini, exa, copilotkit"
]
}
```
### 401 Unauthorized
```json
{
"detail": "Authentication required",
"error_code": "UNAUTHORIZED"
}
```
### 404 Not Found
```json
{
"detail": "Step 7 not found",
"error_code": "STEP_NOT_FOUND"
}
```
### 500 Internal Server Error
```json
{
"detail": "Internal server error",
"error_code": "INTERNAL_ERROR"
}
```
## 📝 Request/Response Models
### StepCompletionRequest
```json
{
"data": {
"api_keys": {
"gemini": "string",
"exa": "string",
"copilotkit": "string"
}
},
"validation_errors": ["string"]
}
```
### APIKeyRequest
```json
{
"provider": "string",
"api_key": "string",
"description": "string"
}
```
### BusinessInfoRequest
```json
{
"business_name": "string",
"industry": "string",
"description": "string",
"target_audience": "string",
"brand_voice": "string",
"content_goals": ["string"]
}
```
## 🔄 Rate Limiting
- **Standard endpoints**: 100 requests per minute
- **API key validation**: 10 requests per minute
- **Persona generation**: 5 requests per minute
## 📊 Response Times
- **Status checks**: < 100ms
- **Step completion**: < 500ms
- **API key validation**: < 2s
- **Persona generation**: < 10s
- **Website analysis**: < 30s
---
*This API reference provides comprehensive documentation for all onboarding endpoints. For additional support, please refer to the main project documentation or contact the development team.*

View File

@@ -0,0 +1,330 @@
# ALwrity Onboarding System - Developer Guide
## Architecture Overview
The ALwrity Onboarding System is built with a modular, service-based architecture that separates concerns and promotes maintainability. The system is designed to handle user isolation, progressive setup, and comprehensive onboarding workflows.
## 🏗️ System Architecture
### Core Components
```
backend/api/onboarding_utils/
├── __init__.py # Package initialization
├── onboarding_completion_service.py # Final onboarding completion logic
├── onboarding_summary_service.py # Comprehensive summary generation
├── onboarding_config_service.py # Configuration and provider management
├── business_info_service.py # Business information CRUD operations
├── api_key_management_service.py # API key operations and validation
├── step_management_service.py # Step progression and validation
├── onboarding_control_service.py # Onboarding session management
├── persona_management_service.py # Persona generation and management
├── README.md # End-user documentation
└── DEVELOPER_GUIDE.md # This file
```
### Service Responsibilities
#### 1. OnboardingCompletionService
**Purpose**: Handles the complex logic for completing the onboarding process
**Key Methods**:
- `complete_onboarding()` - Main completion logic with validation
- `_validate_required_steps()` - Ensures all required steps are completed
- `_validate_api_keys()` - Validates API key configuration
- `_generate_persona_from_onboarding()` - Generates writing persona
#### 2. OnboardingSummaryService
**Purpose**: Generates comprehensive onboarding summaries for the final step
**Key Methods**:
- `get_onboarding_summary()` - Main summary generation
- `_get_api_keys()` - Retrieves configured API keys
- `_get_website_analysis()` - Gets website analysis data
- `_get_research_preferences()` - Retrieves research preferences
- `_check_persona_readiness()` - Validates persona generation readiness
#### 3. OnboardingConfigService
**Purpose**: Manages onboarding configuration and provider setup information
**Key Methods**:
- `get_onboarding_config()` - Returns complete onboarding configuration
- `get_provider_setup_info()` - Provider-specific setup information
- `get_all_providers_info()` - All available providers
- `validate_provider_key()` - API key validation
- `get_enhanced_validation_status()` - Comprehensive validation status
#### 4. BusinessInfoService
**Purpose**: Handles business information management for users without websites
**Key Methods**:
- `save_business_info()` - Create new business information
- `get_business_info()` - Retrieve by ID
- `get_business_info_by_user()` - Retrieve by user ID
- `update_business_info()` - Update existing information
#### 5. APIKeyManagementService
**Purpose**: Manages API key operations with caching and security
**Key Methods**:
- `get_api_keys()` - Retrieves masked API keys with caching
- `save_api_key()` - Saves new API keys securely
- `validate_api_keys()` - Validates all configured keys
#### 6. StepManagementService
**Purpose**: Controls step progression and validation
**Key Methods**:
- `get_onboarding_status()` - Current onboarding status
- `get_onboarding_progress_full()` - Complete progress data
- `get_step_data()` - Specific step information
- `complete_step()` - Mark step as completed with environment setup
- `skip_step()` - Skip optional steps
- `validate_step_access()` - Validate step accessibility
#### 7. OnboardingControlService
**Purpose**: Manages onboarding session control
**Key Methods**:
- `start_onboarding()` - Initialize new onboarding session
- `reset_onboarding()` - Reset onboarding progress
- `get_resume_info()` - Resume information for incomplete sessions
#### 8. PersonaManagementService
**Purpose**: Handles persona generation and management
**Key Methods**:
- `check_persona_generation_readiness()` - Validate persona readiness
- `generate_persona_preview()` - Generate preview without saving
- `generate_writing_persona()` - Generate and save persona
- `get_user_writing_personas()` - Retrieve user personas
## 🔧 Integration Points
### Progressive Setup Integration
The onboarding system integrates with the progressive setup service:
```python
# In step_management_service.py
from services.progressive_setup_service import ProgressiveSetupService
# Initialize/upgrade user environment based on new step
if step_number == 1:
setup_service.initialize_user_environment(user_id)
else:
setup_service.upgrade_user_environment(user_id, step_number)
```
### User Isolation
Each user gets their own:
- **Workspace**: `lib/workspace/users/user_<id>/`
- **Database Tables**: `user_<id>_*` tables
- **Configuration**: User-specific settings
- **Progress**: Individual onboarding progress
### Authentication Integration
All services require authentication:
```python
from middleware.auth_middleware import get_current_user
async def endpoint_function(current_user: Dict[str, Any] = Depends(get_current_user)):
user_id = str(current_user.get('id'))
# Service logic here
```
## 📊 Data Flow
### 1. Onboarding Initialization
```
User Login → Authentication → Check Onboarding Status → Redirect to Appropriate Step
```
### 2. Step Completion
```
User Completes Step → Validate Step → Save Progress → Setup User Environment → Return Success
```
### 3. Environment Setup
```
Step Completed → Progressive Setup Service → User Workspace Creation → Feature Activation
```
### 4. Final Completion
```
All Steps Complete → Validation → Persona Generation → Environment Finalization → Onboarding Complete
```
## 🛠️ Development Guidelines
### Adding New Services
1. **Create Service Class**:
```python
class NewService:
def __init__(self):
# Initialize dependencies
async def main_method(self, params):
# Main functionality
pass
```
2. **Update __init__.py**:
```python
from .new_service import NewService
__all__ = [
# ... existing services
'NewService'
]
```
3. **Update Main Onboarding File**:
```python
async def new_endpoint():
try:
from onboarding_utils.new_service import NewService
service = NewService()
return await service.main_method()
except Exception as e:
logger.error(f"Error: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
```
### Error Handling Pattern
All services follow a consistent error handling pattern:
```python
try:
# Service logic
return result
except HTTPException:
raise # Re-raise HTTP exceptions
except Exception as e:
logger.error(f"Error in service: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
```
### Logging Guidelines
Use structured logging with context:
```python
logger.info(f"[service_name] Action for user {user_id}")
logger.success(f"✅ Operation completed for user {user_id}")
logger.warning(f"⚠️ Non-critical issue: {issue}")
logger.error(f"❌ Error in operation: {str(e)}")
```
## 🧪 Testing
### Unit Testing
Each service should have comprehensive unit tests:
```python
import pytest
from onboarding_utils.step_management_service import StepManagementService
class TestStepManagementService:
def setup_method(self):
self.service = StepManagementService()
async def test_get_onboarding_status(self):
# Test implementation
pass
```
### Integration Testing
Test service interactions:
```python
async def test_complete_onboarding_flow():
# Test complete onboarding workflow
pass
```
## 🔒 Security Considerations
### API Key Security
- Keys are masked in responses
- Encryption before storage
- Secure transmission only
### User Data Isolation
- User-specific workspaces
- Isolated database tables
- No cross-user data access
### Input Validation
- Validate all user inputs
- Sanitize data before processing
- Use Pydantic models for validation
## 📈 Performance Optimization
### Caching Strategy
- API key responses cached for 30 seconds
- User progress cached in memory
- Database queries optimized
### Database Optimization
- User-specific table indexing
- Efficient query patterns
- Connection pooling
### Resource Management
- Proper database session handling
- Memory-efficient data processing
- Background task optimization
## 🚀 Deployment Considerations
### Environment Variables
```bash
# Required for onboarding
CLERK_PUBLISHABLE_KEY=your_key
CLERK_SECRET_KEY=your_secret
GEMINI_API_KEY=your_gemini_key
EXA_API_KEY=your_exa_key
COPILOTKIT_API_KEY=your_copilotkit_key
```
### Database Setup
- User-specific tables created on demand
- Progressive table creation based on onboarding progress
- Automatic cleanup on user deletion
### Monitoring
- Track onboarding completion rates
- Monitor step abandonment points
- Performance metrics for each service
## 🔄 Maintenance
### Regular Tasks
- Review and update API key validation
- Monitor service performance
- Update documentation
- Clean up abandoned onboarding sessions
### Version Updates
- Maintain backward compatibility
- Gradual feature rollouts
- User migration strategies
## 📚 Additional Resources
### Related Documentation
- [User Environment Setup](../services/user_workspace_manager.py)
- [Progressive Setup Service](../services/progressive_setup_service.py)
- [Authentication Middleware](../middleware/auth_middleware.py)
### External Dependencies
- FastAPI for API framework
- SQLAlchemy for database operations
- Pydantic for data validation
- Loguru for logging
---
*This developer guide provides comprehensive information for maintaining and extending the ALwrity Onboarding System. For questions or contributions, please refer to the main project documentation.*

View File

@@ -0,0 +1,184 @@
# 🚀 Persona Generation Optimization Summary
## 📊 **Issues Identified & Fixed**
### **1. spaCy Dependency Issue**
**Problem**: `ModuleNotFoundError: No module named 'spacy'`
**Solution**: Made spaCy an optional dependency with graceful fallback
- ✅ spaCy is now optional - system works with NLTK only
- ✅ Graceful degradation when spaCy is not available
- ✅ Enhanced linguistic analysis when spaCy is present
### **2. API Call Optimization**
**Problem**: Too many sequential API calls
**Previous**: 1 (core) + N (platforms) + 1 (quality) = N + 2 API calls
**Optimized**: 1 (comprehensive) = 1 API call total
### **3. Parallel Execution**
**Problem**: Sequential platform persona generation
**Solution**: Parallel execution for all platform adaptations
## 🎯 **Optimization Strategies**
### **Strategy 1: Single Comprehensive API Call**
```python
# OLD APPROACH (N + 2 API calls)
core_persona = generate_core_persona() # 1 API call
for platform in platforms:
platform_persona = generate_platform_persona() # N API calls
quality_metrics = assess_quality() # 1 API call
# NEW APPROACH (1 API call)
comprehensive_response = generate_all_personas() # 1 API call
```
### **Strategy 2: Rule-Based Quality Assessment**
```python
# OLD: API-based quality assessment
quality_metrics = await llm_assess_quality() # 1 API call
# NEW: Rule-based assessment
quality_metrics = assess_persona_quality_rule_based() # 0 API calls
```
### **Strategy 3: Parallel Execution**
```python
# OLD: Sequential execution
for platform in platforms:
await generate_platform_persona(platform)
# NEW: Parallel execution
tasks = [generate_platform_persona_async(platform) for platform in platforms]
results = await asyncio.gather(*tasks)
```
## 📈 **Performance Improvements**
| Metric | Before | After | Improvement |
|--------|--------|-------|-------------|
| **API Calls** | N + 2 | 1 | ~70% reduction |
| **Execution Time** | Sequential | Parallel | ~60% faster |
| **Dependencies** | Required spaCy | Optional spaCy | More reliable |
| **Quality Assessment** | LLM-based | Rule-based | 100% faster |
### **Real-World Examples:**
- **3 Platforms**: 5 API calls → 1 API call (80% reduction)
- **5 Platforms**: 7 API calls → 1 API call (85% reduction)
- **Execution Time**: ~15 seconds → ~5 seconds (67% faster)
## 🔧 **Technical Implementation**
### **1. spaCy Dependency Fix**
```python
class EnhancedLinguisticAnalyzer:
def __init__(self):
self.spacy_available = False
try:
import spacy
self.nlp = spacy.load("en_core_web_sm")
self.spacy_available = True
except (ImportError, OSError) as e:
logger.warning(f"spaCy not available: {e}. Using NLTK-only analysis.")
self.spacy_available = False
```
### **2. Comprehensive Prompt Strategy**
```python
def build_comprehensive_persona_prompt(onboarding_data, platforms):
return f"""
Generate a comprehensive AI writing persona system:
1. CORE PERSONA: {onboarding_data}
2. PLATFORM ADAPTATIONS: {platforms}
3. Single response with all personas
"""
```
### **3. Rule-Based Quality Assessment**
```python
def assess_persona_quality_rule_based(core_persona, platform_personas):
core_completeness = calculate_completeness_score(core_persona)
platform_consistency = calculate_consistency_score(core_persona, platform_personas)
platform_optimization = calculate_platform_optimization_score(platform_personas)
return {
"overall_score": (core_completeness + platform_consistency + platform_optimization) / 3,
"recommendations": generate_recommendations(...)
}
```
## 🎯 **API Call Analysis**
### **Previous Implementation:**
```
Step 1: Core Persona Generation → 1 API call
Step 2: Platform Adaptations → N API calls (sequential)
Step 3: Quality Assessment → 1 API call
Total: 1 + N + 1 = N + 2 API calls
```
### **Optimized Implementation:**
```
Step 1: Comprehensive Generation → 1 API call (core + all platforms)
Step 2: Rule-Based Quality Assessment → 0 API calls
Total: 1 API call
```
### **Parallel Execution (Alternative):**
```
Step 1: Core Persona Generation → 1 API call
Step 2: Platform Adaptations → N API calls (parallel)
Step 3: Rule-Based Quality Assessment → 0 API calls
Total: 1 + N API calls (but parallel execution)
```
## 🚀 **Benefits**
### **1. Performance**
- **70% fewer API calls** for 3+ platforms
- **60% faster execution** through parallelization
- **100% faster quality assessment** (rule-based vs LLM)
### **2. Reliability**
- **No spaCy dependency issues** - graceful fallback
- **Better error handling** - individual platform failures don't break entire process
- **More predictable execution time**
### **3. Cost Efficiency**
- **Significant cost reduction** from fewer API calls
- **Better resource utilization** through parallel execution
- **Scalable** - performance improvement increases with more platforms
### **4. User Experience**
- **Faster persona generation** - users get results quicker
- **More reliable** - fewer dependency issues
- **Better quality metrics** - rule-based assessment is consistent
## 📋 **Implementation Options**
### **Option 1: Ultra-Optimized (Recommended)**
- **File**: `step4_persona_routes_optimized.py`
- **API Calls**: 1 total
- **Best for**: Production environments, cost optimization
- **Trade-off**: Single large prompt vs multiple focused prompts
### **Option 2: Parallel Optimized**
- **File**: `step4_persona_routes.py` (updated)
- **API Calls**: 1 + N (parallel)
- **Best for**: When platform-specific optimization is critical
- **Trade-off**: More API calls but better platform specialization
### **Option 3: Hybrid Approach**
- **Core persona**: Single API call
- **Platform adaptations**: Parallel API calls
- **Quality assessment**: Rule-based
- **Best for**: Balanced approach
## 🎯 **Recommendation**
**Use Option 1 (Ultra-Optimized)** for the best performance and cost efficiency:
- 1 API call total
- 70% cost reduction
- 60% faster execution
- Reliable and scalable
The optimized approach maintains quality while dramatically improving performance and reducing costs.

View File

@@ -0,0 +1,269 @@
# ALwrity Onboarding System
## Overview
The ALwrity Onboarding System is a comprehensive, user-friendly process designed to get new users up and running with AI-powered content creation capabilities. This system guides users through a structured 6-step process to configure their AI services, analyze their content style, and set up personalized content creation workflows.
## 🎯 What is Onboarding?
Onboarding is your first-time setup experience with ALwrity. It's designed to:
- **Configure your AI services** (Gemini, Exa, CopilotKit)
- **Analyze your existing content** to understand your writing style
- **Set up research preferences** for intelligent content creation
- **Personalize your experience** based on your brand and audience
- **Connect integrations** for seamless content publishing
- **Generate your writing persona** for consistent, on-brand content
## 📋 The 6-Step Onboarding Process
### Step 1: AI LLM Providers Setup
**Purpose**: Connect your AI services to enable intelligent content creation
**What you'll do**:
- Configure **Gemini API** for advanced content generation
- Set up **Exa AI** for intelligent web research
- Connect **CopilotKit** for AI-powered assistance
**Why it's important**: These services work together to provide comprehensive AI functionality for content creation, research, and assistance.
**Requirements**: All three services are mandatory to proceed.
### Step 2: Website Analysis
**Purpose**: Analyze your existing content to understand your writing style and brand voice
**What you'll do**:
- Provide your website URL
- Let ALwrity analyze your existing content
- Review style analysis results
**What ALwrity does**:
- Crawls your website content
- Analyzes writing patterns, tone, and voice
- Identifies your target audience
- Generates style guidelines for consistent content
**Benefits**: Ensures all AI-generated content matches your existing brand voice and style.
### Step 3: AI Research Configuration
**Purpose**: Set up intelligent research capabilities for fact-based content creation
**What you'll do**:
- Choose research depth (Basic, Standard, Comprehensive, Expert)
- Select content types you create
- Configure auto-research preferences
- Enable factual content verification
**Benefits**: Ensures your content is well-researched, accurate, and up-to-date.
### Step 4: Personalization Setup
**Purpose**: Customize ALwrity to match your specific needs and preferences
**What you'll do**:
- Set posting preferences (frequency, timing)
- Configure content types and formats
- Define your target audience
- Set brand voice parameters
**Benefits**: Creates a personalized experience that matches your content strategy.
### Step 5: Integrations (Optional)
**Purpose**: Connect external platforms for seamless content publishing
**Available integrations**:
- **Wix** - Direct publishing to your Wix website
- **LinkedIn** - Automated LinkedIn content posting
- **WordPress** - WordPress site integration
- **Other platforms** - Additional integrations as available
**Benefits**: Streamlines your content workflow from creation to publication.
### Step 6: Complete Setup
**Purpose**: Finalize your onboarding and generate your writing persona
**What happens**:
- Validates all required configurations
- Generates your personalized writing persona
- Sets up your user workspace
- Activates all configured features
**Result**: You're ready to start creating AI-powered content that matches your brand!
## 🔧 Technical Architecture
### Service-Based Design
The onboarding system is built with a modular, service-based architecture:
```
onboarding_utils/
├── onboarding_completion_service.py # Handles final onboarding completion
├── onboarding_summary_service.py # Generates comprehensive summaries
├── onboarding_config_service.py # Manages configuration and providers
├── business_info_service.py # Handles business information
├── api_key_management_service.py # Manages API key operations
├── step_management_service.py # Controls step progression
├── onboarding_control_service.py # Manages onboarding sessions
└── persona_management_service.py # Handles persona generation
```
### Key Features
- **User Isolation**: Each user gets their own workspace and configuration
- **Progressive Setup**: Features are enabled incrementally based on progress
- **Persistent Storage**: All settings are saved and persist across sessions
- **Validation**: Comprehensive validation at each step
- **Error Handling**: Graceful error handling with helpful messages
- **Security**: API keys are encrypted and stored securely
## 🚀 Getting Started
### For New Users
1. **Sign up** with your preferred authentication method
2. **Start onboarding** - You'll be automatically redirected
3. **Follow the 6-step process** - Each step builds on the previous
4. **Complete setup** - Generate your writing persona
5. **Start creating** - Begin using ALwrity's AI-powered features
### For Returning Users
- **Resume onboarding** - Continue where you left off
- **Skip optional steps** - Focus on what you need
- **Update configurations** - Modify settings anytime
- **Add integrations** - Connect new platforms as needed
## 📊 Progress Tracking
The system tracks your progress through:
- **Step completion status** - See which steps are done
- **Progress percentage** - Visual progress indicator
- **Validation status** - Know what needs attention
- **Resume information** - Pick up where you left off
## 🔒 Security & Privacy
- **API Key Encryption**: All API keys are encrypted before storage
- **User Isolation**: Your data is completely separate from other users
- **Secure Storage**: Data is stored securely on your device
- **No Data Sharing**: Your content and preferences are never shared
## 🛠️ Troubleshooting
### Common Issues
**"Cannot proceed to next step"**
- Complete all required fields in the current step
- Ensure API keys are valid and working
- Check for any validation errors
**"API key validation failed"**
- Verify your API key is correct
- Check if the service is available
- Ensure you have sufficient credits/quota
**"Website analysis failed"**
- Ensure your website is publicly accessible
- Check if the URL is correct
- Try again after a few minutes
### Getting Help
- **In-app help** - Use the "Get Help" button in each step
- **Documentation** - Check the detailed setup guides
- **Support** - Contact support for technical issues
## 🎨 Customization Options
### Writing Style
- **Tone**: Professional, Casual, Friendly, Authoritative
- **Voice**: First-person, Third-person, Brand voice
- **Complexity**: Simple, Intermediate, Advanced, Expert
### Content Preferences
- **Length**: Short, Medium, Long, Variable
- **Format**: Blog posts, Social media, Emails, Articles
- **Frequency**: Daily, Weekly, Monthly, Custom
### Research Settings
- **Depth**: Basic, Standard, Comprehensive, Expert
- **Sources**: Web, Academic, News, Social media
- **Verification**: Auto-fact-check, Manual review, AI-assisted
## 📈 Benefits of Completing Onboarding
### Immediate Benefits
- **AI-Powered Content Creation** - Generate high-quality content instantly
- **Style Consistency** - All content matches your brand voice
- **Research Integration** - Fact-based, well-researched content
- **Time Savings** - Reduce content creation time by 80%
### Long-term Benefits
- **Brand Consistency** - Maintain consistent voice across all content
- **Scalability** - Create more content without sacrificing quality
- **Efficiency** - Streamlined workflow from idea to publication
- **Growth** - Focus on strategy while AI handles execution
## 🔄 Updating Your Configuration
You can update your onboarding settings anytime:
- **API Keys** - Update or add new service keys
- **Website Analysis** - Re-analyze your content for style updates
- **Research Preferences** - Adjust research depth and sources
- **Personalization** - Update your brand voice and preferences
- **Integrations** - Add or remove platform connections
## 📞 Support & Resources
### Documentation
- **Setup Guides** - Step-by-step configuration instructions
- **API Documentation** - Technical reference for developers
- **Best Practices** - Tips for optimal onboarding experience
### Community
- **User Forum** - Connect with other ALwrity users
- **Feature Requests** - Suggest improvements
- **Success Stories** - Learn from other users' experiences
### Support Channels
- **In-app Support** - Get help directly within ALwrity
- **Email Support** - support@alwrity.com
- **Live Chat** - Available during business hours
- **Video Tutorials** - Visual guides for complex setups
## 🎯 Success Metrics
Track your onboarding success with these metrics:
- **Completion Rate** - Percentage of users who complete onboarding
- **Time to Value** - How quickly users see benefits
- **Feature Adoption** - Which features users engage with
- **Satisfaction Score** - User feedback on the experience
## 🔮 Future Enhancements
We're constantly improving the onboarding experience:
- **Smart Recommendations** - AI-suggested configurations
- **Template Library** - Pre-built setups for different industries
- **Advanced Analytics** - Detailed insights into your content performance
- **Mobile Experience** - Optimized mobile onboarding flow
- **Voice Setup** - Voice-based configuration for accessibility
---
## Quick Start Checklist
- [ ] **Step 1**: Configure Gemini, Exa, and CopilotKit API keys
- [ ] **Step 2**: Provide website URL for style analysis
- [ ] **Step 3**: Set research preferences and content types
- [ ] **Step 4**: Configure personalization settings
- [ ] **Step 5**: Connect desired integrations (optional)
- [ ] **Step 6**: Complete setup and generate writing persona
**🎉 You're ready to create amazing AI-powered content!**
---
*This onboarding system is designed to get you up and running quickly while ensuring your content maintains your unique brand voice and style. Take your time with each step - the more accurate your configuration, the better your AI-generated content will be.*

View File

@@ -0,0 +1,23 @@
"""
Onboarding utilities package.
"""
from .onboarding_completion_service import OnboardingCompletionService
from .onboarding_summary_service import OnboardingSummaryService
from .onboarding_config_service import OnboardingConfigService
from .business_info_service import BusinessInfoService
from .api_key_management_service import APIKeyManagementService
from .step_management_service import StepManagementService
from .onboarding_control_service import OnboardingControlService
from .persona_management_service import PersonaManagementService
__all__ = [
'OnboardingCompletionService',
'OnboardingSummaryService',
'OnboardingConfigService',
'BusinessInfoService',
'APIKeyManagementService',
'StepManagementService',
'OnboardingControlService',
'PersonaManagementService'
]

View File

@@ -0,0 +1,147 @@
"""
API Key Management Service
Handles API key operations for onboarding.
"""
import time
from typing import Dict, Any
from fastapi import HTTPException
from loguru import logger
from services.onboarding.api_key_manager import APIKeyManager
from services.validation import check_all_api_keys
class APIKeyManagementService:
"""Service for handling API key management operations."""
def __init__(self):
# Initialize APIKeyManager with database support
self.api_key_manager = APIKeyManager()
# Ensure database service is available
if not hasattr(self.api_key_manager, 'use_database'):
self.api_key_manager.use_database = True
try:
from services.onboarding.database_service import OnboardingDatabaseService
self.api_key_manager.db_service = OnboardingDatabaseService()
logger.info("Database service initialized for APIKeyManager")
except Exception as e:
logger.warning(f"Database service not available: {e}")
self.api_key_manager.use_database = False
self.api_key_manager.db_service = None
# Simple cache for API keys
self._api_keys_cache = None
self._cache_timestamp = 0
self.CACHE_DURATION = 30 # Cache for 30 seconds
async def get_api_keys(self) -> Dict[str, Any]:
"""Get all configured API keys (masked)."""
current_time = time.time()
# Return cached result if still valid
if self._api_keys_cache and (current_time - self._cache_timestamp) < self.CACHE_DURATION:
logger.debug("Returning cached API keys")
return self._api_keys_cache
try:
self.api_key_manager.load_api_keys() # Load keys from environment
api_keys = self.api_key_manager.api_keys # Get the loaded keys
# Mask the API keys for security
masked_keys = {}
for provider, key in api_keys.items():
if key:
masked_keys[provider] = "*" * (len(key) - 4) + key[-4:] if len(key) > 4 else "*" * len(key)
else:
masked_keys[provider] = None
result = {
"api_keys": masked_keys,
"total_providers": len(api_keys),
"configured_providers": [k for k, v in api_keys.items() if v]
}
# Cache the result
self._api_keys_cache = result
self._cache_timestamp = current_time
return result
except Exception as e:
logger.error(f"Error getting API keys: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_api_keys_for_onboarding(self, user_id: str | None = None) -> Dict[str, Any]:
"""Get all configured API keys for onboarding (unmasked), user-aware.
In production, keys are per-user and stored in DB; in local, we use env.
"""
try:
# Prefer DB per-user keys when user_id is provided and DB is available
if user_id and getattr(self.api_key_manager, 'use_database', False) and getattr(self.api_key_manager, 'db_service', None):
try:
from services.database import SessionLocal
db = SessionLocal()
try:
api_keys = self.api_key_manager.db_service.get_api_keys(user_id, db) or {}
logger.info(f"Loaded {len(api_keys)} API keys from database for user {user_id}")
return {
"api_keys": api_keys,
"total_providers": len(api_keys),
"configured_providers": [k for k, v in api_keys.items() if v]
}
finally:
db.close()
except Exception as db_err:
logger.warning(f"DB lookup for API keys failed, falling back to env: {db_err}")
# Fallback: load from environment/in-memory
self.api_key_manager.load_api_keys()
api_keys = self.api_key_manager.api_keys
return {
"api_keys": api_keys,
"total_providers": len(api_keys),
"configured_providers": [k for k, v in api_keys.items() if v]
}
except Exception as e:
logger.error(f"Error getting API keys for onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def save_api_key(self, provider: str, api_key: str, description: str = None, current_user: dict = None) -> Dict[str, Any]:
"""Save an API key for a provider."""
try:
logger.info(f"📝 save_api_key called for provider: {provider}")
# Set user_id on the API key manager if available
if current_user and current_user.get('id'):
self.api_key_manager.user_id = current_user['id']
logger.info(f"Set user_id on APIKeyManager: {current_user['id']}")
success = self.api_key_manager.save_api_key(provider, api_key)
if success:
return {
"message": f"API key for {provider} saved successfully",
"provider": provider,
"status": "saved"
}
else:
raise HTTPException(status_code=400, detail=f"Failed to save API key for {provider}")
except HTTPException:
raise
except Exception as e:
logger.error(f"Error saving API key: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def validate_api_keys(self) -> Dict[str, Any]:
"""Validate all configured API keys."""
try:
validation_results = check_all_api_keys(self.api_key_manager)
return {
"validation_results": validation_results.get('results', {}),
"all_valid": validation_results.get('all_valid', False),
"total_providers": len(validation_results.get('results', {}))
}
except Exception as e:
logger.error(f"Error validating API keys: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")

View File

@@ -0,0 +1,86 @@
"""
Business Information Service
Handles business information management for users without websites.
"""
from typing import Dict, Any, Optional
from fastapi import HTTPException
from loguru import logger
class BusinessInfoService:
"""Service for handling business information operations."""
def __init__(self):
pass
async def save_business_info(self, business_info: dict) -> Dict[str, Any]:
"""Save business information for users without websites."""
try:
from models.business_info_request import BusinessInfoRequest
from services.business_info_service import business_info_service
logger.info(f"🔄 Saving business info for user_id: {business_info.user_id}")
result = business_info_service.save_business_info(business_info)
logger.success(f"✅ Business info saved successfully for user_id: {business_info.user_id}")
return result
except Exception as e:
logger.error(f"❌ Error saving business info: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to save business info: {str(e)}")
async def get_business_info(self, business_info_id: int) -> Dict[str, Any]:
"""Get business information by ID."""
try:
from services.business_info_service import business_info_service
logger.info(f"🔄 Getting business info for ID: {business_info_id}")
result = business_info_service.get_business_info(business_info_id)
if result:
logger.success(f"✅ Business info retrieved for ID: {business_info_id}")
return result
else:
logger.warning(f"⚠️ No business info found for ID: {business_info_id}")
raise HTTPException(status_code=404, detail="Business info not found")
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error getting business info: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to get business info: {str(e)}")
async def get_business_info_by_user(self, user_id: int) -> Dict[str, Any]:
"""Get business information by user ID."""
try:
from services.business_info_service import business_info_service
logger.info(f"🔄 Getting business info for user ID: {user_id}")
result = business_info_service.get_business_info_by_user(user_id)
if result:
logger.success(f"✅ Business info retrieved for user ID: {user_id}")
return result
else:
logger.warning(f"⚠️ No business info found for user ID: {user_id}")
raise HTTPException(status_code=404, detail="Business info not found")
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error getting business info: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to get business info: {str(e)}")
async def update_business_info(self, business_info_id: int, business_info: dict) -> Dict[str, Any]:
"""Update business information."""
try:
from models.business_info_request import BusinessInfoRequest
from services.business_info_service import business_info_service
logger.info(f"🔄 Updating business info for ID: {business_info_id}")
result = business_info_service.update_business_info(business_info_id, business_info)
if result:
logger.success(f"✅ Business info updated for ID: {business_info_id}")
return result
else:
logger.warning(f"⚠️ No business info found to update for ID: {business_info_id}")
raise HTTPException(status_code=404, detail="Business info not found")
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error updating business info: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to update business info: {str(e)}")

View File

@@ -0,0 +1,66 @@
from typing import Dict, Any, List, Optional
from pydantic import BaseModel, Field
from services.onboarding.api_key_manager import (
OnboardingProgress,
get_onboarding_progress,
get_onboarding_progress_for_user,
StepStatus,
StepData,
APIKeyManager,
)
class StepDataModel(BaseModel):
step_number: int
title: str
description: str
status: str
completed_at: Optional[str] = None
data: Optional[Dict[str, Any]] = None
validation_errors: List[str] = []
class OnboardingProgressModel(BaseModel):
steps: List[StepDataModel]
current_step: int
started_at: str
last_updated: str
is_completed: bool
completed_at: Optional[str] = None
class StepCompletionRequest(BaseModel):
data: Optional[Dict[str, Any]] = None
validation_errors: List[str] = []
class APIKeyRequest(BaseModel):
provider: str = Field(..., description="API provider name (e.g., 'openai', 'gemini')")
api_key: str = Field(..., description="API key value")
description: Optional[str] = Field(None, description="Optional description")
class OnboardingStatusResponse(BaseModel):
is_completed: bool
current_step: int
completion_percentage: float
next_step: Optional[int]
started_at: str
completed_at: Optional[str] = None
can_proceed_to_final: bool
class StepValidationResponse(BaseModel):
can_proceed: bool
validation_errors: List[str]
step_status: str
def get_progress() -> OnboardingProgress:
return get_onboarding_progress()
def get_api_key_manager() -> APIKeyManager:
return APIKeyManager()

View File

@@ -0,0 +1,227 @@
from typing import Dict, Any
from loguru import logger
from fastapi import HTTPException
from .endpoint_models import APIKeyRequest
# Import persona generation functions
from .step4_persona_routes import (
generate_writing_personas,
generate_writing_personas_async,
get_persona_task_status,
assess_persona_quality,
regenerate_persona,
get_persona_generation_options
)
async def get_api_keys():
try:
from api.onboarding_utils.api_key_management_service import APIKeyManagementService
api_service = APIKeyManagementService()
return await api_service.get_api_keys()
except Exception as e:
logger.error(f"Error getting API keys: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_api_keys_for_onboarding(current_user: dict = None):
try:
from api.onboarding_utils.api_key_management_service import APIKeyManagementService
api_service = APIKeyManagementService()
user_id = str(current_user.get('id')) if current_user and current_user.get('id') else None
return await api_service.get_api_keys_for_onboarding(user_id)
except Exception as e:
logger.error(f"Error getting API keys for onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def save_api_key(request: APIKeyRequest, current_user: dict = None):
try:
from api.onboarding_utils.api_key_management_service import APIKeyManagementService
api_service = APIKeyManagementService()
return await api_service.save_api_key(request.provider, request.api_key, request.description, current_user)
except Exception as e:
logger.error(f"Error saving API key: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def validate_api_keys():
try:
from api.onboarding_utils.api_key_management_service import APIKeyManagementService
api_service = APIKeyManagementService()
return await api_service.validate_api_keys()
except Exception as e:
logger.error(f"Error validating API keys: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
def get_onboarding_config():
try:
from api.onboarding_utils.onboarding_config_service import OnboardingConfigService
config_service = OnboardingConfigService()
return config_service.get_onboarding_config()
except Exception as e:
logger.error(f"Error getting onboarding config: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_provider_setup_info(provider: str):
try:
from api.onboarding_utils.onboarding_config_service import OnboardingConfigService
config_service = OnboardingConfigService()
return await config_service.get_provider_setup_info(provider)
except Exception as e:
logger.error(f"Error getting provider setup info: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_all_providers_info():
try:
from api.onboarding_utils.onboarding_config_service import OnboardingConfigService
config_service = OnboardingConfigService()
return config_service.get_all_providers_info()
except Exception as e:
logger.error(f"Error getting all providers info: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def validate_provider_key(provider: str, request: APIKeyRequest):
try:
from api.onboarding_utils.onboarding_config_service import OnboardingConfigService
config_service = OnboardingConfigService()
return await config_service.validate_provider_key(provider, request.api_key)
except Exception as e:
logger.error(f"Error validating provider key: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_enhanced_validation_status():
try:
from api.onboarding_utils.onboarding_config_service import OnboardingConfigService
config_service = OnboardingConfigService()
return await config_service.get_enhanced_validation_status()
except Exception as e:
logger.error(f"Error getting enhanced validation status: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_onboarding_summary(current_user: Dict[str, Any]):
try:
from api.onboarding_utils.onboarding_summary_service import OnboardingSummaryService
user_id = str(current_user.get('id'))
summary_service = OnboardingSummaryService(user_id)
logger.info(f"Getting onboarding summary for user {user_id}")
return await summary_service.get_onboarding_summary()
except Exception as e:
logger.error(f"Error getting onboarding summary: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_website_analysis_data(current_user: Dict[str, Any]):
try:
from api.onboarding_utils.onboarding_summary_service import OnboardingSummaryService
user_id = str(current_user.get('id'))
summary_service = OnboardingSummaryService(user_id)
logger.info(f"Getting website analysis data for user {user_id}")
return await summary_service.get_website_analysis_data()
except Exception as e:
logger.error(f"Error getting website analysis data: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_research_preferences_data(current_user: Dict[str, Any]):
try:
from api.onboarding_utils.onboarding_summary_service import OnboardingSummaryService
user_id = str(current_user.get('id'))
summary_service = OnboardingSummaryService(user_id)
logger.info(f"Getting research preferences data for user {user_id}")
return await summary_service.get_research_preferences_data()
except Exception as e:
logger.error(f"Error getting research preferences data: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def check_persona_generation_readiness(user_id: int = 1):
try:
from api.onboarding_utils.persona_management_service import PersonaManagementService
persona_service = PersonaManagementService()
return await persona_service.check_persona_generation_readiness(user_id)
except Exception as e:
logger.error(f"Error checking persona readiness: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def generate_persona_preview(user_id: int = 1):
try:
from api.onboarding_utils.persona_management_service import PersonaManagementService
persona_service = PersonaManagementService()
return await persona_service.generate_persona_preview(user_id)
except Exception as e:
logger.error(f"Error generating persona preview: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def generate_writing_persona(user_id: int = 1):
try:
from api.onboarding_utils.persona_management_service import PersonaManagementService
persona_service = PersonaManagementService()
return await persona_service.generate_writing_persona(user_id)
except Exception as e:
logger.error(f"Error generating writing persona: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_user_writing_personas(user_id: int = 1):
try:
from api.onboarding_utils.persona_management_service import PersonaManagementService
persona_service = PersonaManagementService()
return await persona_service.get_user_writing_personas(user_id)
except Exception as e:
logger.error(f"Error getting user personas: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def save_business_info(business_info: dict):
try:
from api.onboarding_utils.business_info_service import BusinessInfoService
business_service = BusinessInfoService()
return await business_service.save_business_info(business_info)
except Exception as e:
logger.error(f"❌ Error saving business info: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to save business info: {str(e)}")
async def get_business_info(business_info_id: int):
try:
from api.onboarding_utils.business_info_service import BusinessInfoService
business_service = BusinessInfoService()
return await business_service.get_business_info(business_info_id)
except Exception as e:
logger.error(f"❌ Error getting business info: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to get business info: {str(e)}")
async def get_business_info_by_user(user_id: int):
try:
from api.onboarding_utils.business_info_service import BusinessInfoService
business_service = BusinessInfoService()
return await business_service.get_business_info_by_user(user_id)
except Exception as e:
logger.error(f"❌ Error getting business info: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to get business info: {str(e)}")
async def update_business_info(business_info_id: int, business_info: dict):
try:
from api.onboarding_utils.business_info_service import BusinessInfoService
business_service = BusinessInfoService()
return await business_service.update_business_info(business_info_id, business_info)
except Exception as e:
logger.error(f"❌ Error updating business info: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to update business info: {str(e)}")
__all__ = [name for name in globals().keys() if not name.startswith('_')]

View File

@@ -0,0 +1,163 @@
from typing import Dict, Any
from datetime import datetime
from loguru import logger
from fastapi import HTTPException, Depends
from middleware.auth_middleware import get_current_user
from services.onboarding.progress_service import get_onboarding_progress_service
def health_check():
return {"status": "healthy", "timestamp": datetime.now().isoformat()}
async def initialize_onboarding(current_user: Dict[str, Any] = Depends(get_current_user)):
try:
user_id = str(current_user.get('id'))
progress_service = get_onboarding_progress_service()
status = progress_service.get_onboarding_status(user_id)
# Get completion data for step validation
completion_data = progress_service.get_completion_data(user_id)
# Build steps data based on database state
steps_data = []
for step_num in range(1, 7): # Steps 1-6
step_completed = False
step_data = None
# Check if step is completed based on database data
if step_num == 1: # API Keys
api_keys = completion_data.get('api_keys', {})
step_completed = any(v for v in api_keys.values() if v)
elif step_num == 2: # Website Analysis
website = completion_data.get('website_analysis', {})
step_completed = bool(website.get('website_url') or website.get('writing_style'))
if step_completed:
step_data = website
elif step_num == 3: # Research Preferences
research = completion_data.get('research_preferences', {})
step_completed = bool(research.get('research_depth') or research.get('content_types'))
if step_completed:
step_data = research
elif step_num == 4: # Persona Generation
persona = completion_data.get('persona_data', {})
step_completed = bool(persona.get('corePersona') or persona.get('platformPersonas'))
if step_completed:
step_data = persona
elif step_num == 5: # Integrations (always completed if we reach this point)
step_completed = status['current_step'] >= 5
elif step_num == 6: # Final Step
step_completed = status['is_completed']
steps_data.append({
"step_number": step_num,
"title": f"Step {step_num}",
"description": f"Step {step_num} description",
"status": "completed" if step_completed else "pending",
"completed_at": datetime.now().isoformat() if step_completed else None,
"has_data": step_data is not None,
"data": step_data
})
# Reconciliation: if not completed but all artifacts exist, mark complete once
try:
if not status['is_completed']:
all_have = (
any(v for v in completion_data.get('api_keys', {}).values() if v) and
bool((completion_data.get('website_analysis') or {}).get('website_url') or (completion_data.get('website_analysis') or {}).get('writing_style')) and
bool((completion_data.get('research_preferences') or {}).get('research_depth') or (completion_data.get('research_preferences') or {}).get('content_types')) and
bool((completion_data.get('persona_data') or {}).get('corePersona') or (completion_data.get('persona_data') or {}).get('platformPersonas'))
)
if all_have:
svc = progress_service
svc.complete_onboarding(user_id)
# refresh status after reconciliation
status = svc.get_onboarding_status(user_id)
except Exception:
pass
# Determine next step robustly
next_step = 6 if status['is_completed'] else None
if not status['is_completed']:
for step in steps_data:
if step['status'] != 'completed':
next_step = step['step_number']
break
response_data = {
"user": {
"id": user_id,
"email": current_user.get('email'),
"first_name": current_user.get('first_name'),
"last_name": current_user.get('last_name'),
"clerk_user_id": user_id,
},
"onboarding": {
"is_completed": status['is_completed'],
"current_step": 6 if status['is_completed'] else status['current_step'],
"completion_percentage": status['completion_percentage'],
"next_step": next_step,
"started_at": status['started_at'],
"last_updated": status['last_updated'],
"completed_at": status['completed_at'],
"can_proceed_to_final": True if status['is_completed'] else status['current_step'] >= 5,
"steps": steps_data,
},
"session": {
"session_id": user_id,
"initialized_at": status['started_at'],
"last_activity": status['last_updated'],
},
}
logger.info(
f"Batch init successful for user {user_id}: step {status['current_step']}/6"
)
return response_data
except Exception as e:
logger.error(f"Error in initialize_onboarding: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to initialize onboarding: {str(e)}")
async def get_onboarding_status(current_user: Dict[str, Any]):
try:
from api.onboarding_utils.step_management_service import StepManagementService
step_service = StepManagementService()
return await step_service.get_onboarding_status(current_user)
except Exception as e:
from fastapi import HTTPException
from loguru import logger
logger.error(f"Error getting onboarding status: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_onboarding_progress_full(current_user: Dict[str, Any]):
try:
from api.onboarding_utils.step_management_service import StepManagementService
step_service = StepManagementService()
return await step_service.get_onboarding_progress_full(current_user)
except Exception as e:
from fastapi import HTTPException
from loguru import logger
logger.error(f"Error getting onboarding progress: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_step_data(step_number: int, current_user: Dict[str, Any]):
try:
from api.onboarding_utils.step_management_service import StepManagementService
step_service = StepManagementService()
return await step_service.get_step_data(step_number, current_user)
except Exception as e:
from fastapi import HTTPException
from loguru import logger
logger.error(f"Error getting step data: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
__all__ = [name for name in globals().keys() if not name.startswith('_')]

View File

@@ -0,0 +1,82 @@
from typing import Dict, Any
from loguru import logger
from fastapi import HTTPException
async def complete_step(step_number: int, request_data: Dict[str, Any], current_user: Dict[str, Any]):
try:
from api.onboarding_utils.step_management_service import StepManagementService
step_service = StepManagementService()
return await step_service.complete_step(step_number, request_data, current_user)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error completing step: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def skip_step(step_number: int, current_user: Dict[str, Any]):
try:
from api.onboarding_utils.step_management_service import StepManagementService
step_service = StepManagementService()
return await step_service.skip_step(step_number, current_user)
except Exception as e:
logger.error(f"Error skipping step: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def validate_step_access(step_number: int, current_user: Dict[str, Any]):
try:
from api.onboarding_utils.step_management_service import StepManagementService
step_service = StepManagementService()
return await step_service.validate_step_access(step_number, current_user)
except Exception as e:
logger.error(f"Error validating step access: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def start_onboarding(current_user: Dict[str, Any]):
try:
from api.onboarding_utils.onboarding_control_service import OnboardingControlService
control_service = OnboardingControlService()
return await control_service.start_onboarding(current_user)
except Exception as e:
logger.error(f"Error starting onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def complete_onboarding(current_user: Dict[str, Any]):
try:
from api.onboarding_utils.onboarding_completion_service import OnboardingCompletionService
completion_service = OnboardingCompletionService()
return await completion_service.complete_onboarding(current_user)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error completing onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def reset_onboarding():
try:
from api.onboarding_utils.onboarding_control_service import OnboardingControlService
control_service = OnboardingControlService()
return await control_service.reset_onboarding()
except Exception as e:
logger.error(f"Error resetting onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_resume_info():
try:
from api.onboarding_utils.onboarding_control_service import OnboardingControlService
control_service = OnboardingControlService()
return await control_service.get_resume_info()
except Exception as e:
logger.error(f"Error getting resume info: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
__all__ = [name for name in globals().keys() if not name.startswith('_')]

View File

@@ -0,0 +1,327 @@
"""
Onboarding Completion Service
Handles the complex logic for completing the onboarding process.
"""
from typing import Dict, Any, List
from datetime import datetime
from fastapi import HTTPException
from loguru import logger
from services.onboarding.progress_service import get_onboarding_progress_service
from services.onboarding.database_service import OnboardingDatabaseService
from services.database import get_db
from services.persona_analysis_service import PersonaAnalysisService
from services.research.research_persona_scheduler import schedule_research_persona_generation
from services.persona.facebook.facebook_persona_scheduler import schedule_facebook_persona_generation
from services.oauth_token_monitoring_service import create_oauth_monitoring_tasks
class OnboardingCompletionService:
"""Service for handling onboarding completion logic."""
def __init__(self):
# Pre-requisite steps; step 6 is the finalization itself
self.required_steps = [1, 2, 3, 4, 5]
async def complete_onboarding(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
"""Complete the onboarding process with full validation."""
try:
user_id = str(current_user.get('id'))
progress_service = get_onboarding_progress_service()
# Strict DB-only validation now that step persistence is solid
missing_steps = self._validate_required_steps_database(user_id)
if missing_steps:
missing_steps_str = ", ".join(missing_steps)
raise HTTPException(
status_code=400,
detail=f"Cannot complete onboarding. The following steps must be completed first: {missing_steps_str}"
)
# Require API keys in DB for completion
self._validate_api_keys(user_id)
# Generate writing persona from onboarding data only if not already present
persona_generated = await self._generate_persona_from_onboarding(user_id)
# Complete the onboarding process in database
success = progress_service.complete_onboarding(user_id)
if not success:
raise HTTPException(status_code=500, detail="Failed to mark onboarding as complete")
# Schedule research persona generation 20 minutes after onboarding completion
try:
schedule_research_persona_generation(user_id, delay_minutes=20)
logger.info(f"Scheduled research persona generation for user {user_id} (20 minutes after onboarding)")
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to schedule research persona generation for user {user_id}: {e}")
# Schedule Facebook persona generation 20 minutes after onboarding completion
try:
schedule_facebook_persona_generation(user_id, delay_minutes=20)
logger.info(f"Scheduled Facebook persona generation for user {user_id} (20 minutes after onboarding)")
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to schedule Facebook persona generation for user {user_id}: {e}")
# Create OAuth token monitoring tasks for connected platforms
try:
from services.database import SessionLocal
db = SessionLocal()
try:
monitoring_tasks = create_oauth_monitoring_tasks(user_id, db)
logger.info(
f"Created {len(monitoring_tasks)} OAuth token monitoring tasks for user {user_id} "
f"on onboarding completion"
)
finally:
db.close()
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to create OAuth token monitoring tasks for user {user_id}: {e}")
# Create website analysis tasks for user's website and competitors
try:
from services.database import SessionLocal
from services.website_analysis_monitoring_service import create_website_analysis_tasks
db = SessionLocal()
try:
result = create_website_analysis_tasks(user_id=user_id, db=db)
if result.get('success'):
tasks_count = result.get('tasks_created', 0)
logger.info(
f"Created {tasks_count} website analysis tasks for user {user_id} "
f"on onboarding completion"
)
else:
error = result.get('error', 'Unknown error')
logger.warning(
f"Failed to create website analysis tasks for user {user_id}: {error}"
)
finally:
db.close()
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to create website analysis tasks for user {user_id}: {e}")
return {
"message": "Onboarding completed successfully",
"completed_at": datetime.now().isoformat(),
"completion_percentage": 100.0,
"persona_generated": persona_generated
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error completing onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
def _validate_required_steps_database(self, user_id: str) -> List[str]:
"""Validate that all required steps are completed using database only."""
missing_steps = []
try:
db = next(get_db())
db_service = OnboardingDatabaseService()
# Debug logging
logger.info(f"Validating steps for user {user_id}")
# Check each required step
for step_num in self.required_steps:
step_completed = False
if step_num == 1: # API Keys
api_keys = db_service.get_api_keys(user_id, db)
logger.info(f"Step 1 - API Keys: {api_keys}")
step_completed = any(v for v in api_keys.values() if v)
logger.info(f"Step 1 completed: {step_completed}")
elif step_num == 2: # Website Analysis
website = db_service.get_website_analysis(user_id, db)
logger.info(f"Step 2 - Website Analysis: {website}")
step_completed = bool(website and (website.get('website_url') or website.get('writing_style')))
logger.info(f"Step 2 completed: {step_completed}")
elif step_num == 3: # Research Preferences
research = db_service.get_research_preferences(user_id, db)
logger.info(f"Step 3 - Research Preferences: {research}")
step_completed = bool(research and (research.get('research_depth') or research.get('content_types')))
logger.info(f"Step 3 completed: {step_completed}")
elif step_num == 4: # Persona Generation
persona = db_service.get_persona_data(user_id, db)
logger.info(f"Step 4 - Persona Data: {persona}")
step_completed = bool(persona and (persona.get('corePersona') or persona.get('platformPersonas')))
logger.info(f"Step 4 completed: {step_completed}")
elif step_num == 5: # Integrations
# For now, consider this always completed if we reach this point
step_completed = True
logger.info(f"Step 5 completed: {step_completed}")
if not step_completed:
missing_steps.append(f"Step {step_num}")
logger.info(f"Missing steps: {missing_steps}")
return missing_steps
except Exception as e:
logger.error(f"Error validating required steps: {e}")
return ["Validation error"]
def _validate_required_steps(self, user_id: str, progress) -> List[str]:
"""Validate that all required steps are completed.
This method trusts the progress tracker, but also falls back to
database presence for Steps 2 and 3 so migration from file→DB
does not block completion.
"""
missing_steps = []
db = None
db_service = None
try:
db = next(get_db())
db_service = OnboardingDatabaseService(db)
except Exception:
db = None
db_service = None
logger.info(f"OnboardingCompletionService: Validating steps for user {user_id}")
logger.info(f"OnboardingCompletionService: Current step: {progress.current_step}")
logger.info(f"OnboardingCompletionService: Required steps: {self.required_steps}")
for step_num in self.required_steps:
step = progress.get_step_data(step_num)
logger.info(f"OnboardingCompletionService: Step {step_num} - status: {step.status if step else 'None'}")
if step and step.status in [StepStatus.COMPLETED, StepStatus.SKIPPED]:
logger.info(f"OnboardingCompletionService: Step {step_num} already completed/skipped")
continue
# DB-aware fallbacks for migration period
try:
if db_service:
if step_num == 1:
# Treat as completed if user has any API key in DB
keys = db_service.get_api_keys(user_id, db)
if keys and any(v for v in keys.values()):
try:
progress.mark_step_completed(1, {'source': 'db-fallback'})
except Exception:
pass
continue
if step_num == 2:
# Treat as completed if website analysis exists in DB
website = db_service.get_website_analysis(user_id, db)
if website and (website.get('website_url') or website.get('writing_style')):
# Optionally mark as completed in progress to keep state consistent
try:
progress.mark_step_completed(2, {'source': 'db-fallback'})
except Exception:
pass
continue
# Secondary fallback: research preferences captured style data
prefs = db_service.get_research_preferences(user_id, db)
if prefs and (prefs.get('writing_style') or prefs.get('content_characteristics')):
try:
progress.mark_step_completed(2, {'source': 'research-prefs-fallback'})
except Exception:
pass
continue
# Tertiary fallback: persona data created implies earlier steps done
persona = None
try:
persona = db_service.get_persona_data(user_id, db)
except Exception:
persona = None
if persona and persona.get('corePersona'):
try:
progress.mark_step_completed(2, {'source': 'persona-fallback'})
except Exception:
pass
continue
if step_num == 3:
# Treat as completed if research preferences exist in DB
prefs = db_service.get_research_preferences(user_id, db)
if prefs and prefs.get('research_depth'):
try:
progress.mark_step_completed(3, {'source': 'db-fallback'})
except Exception:
pass
continue
if step_num == 4:
# Treat as completed if persona data exists in DB
persona = None
try:
persona = db_service.get_persona_data(user_id, db)
except Exception:
persona = None
if persona and persona.get('corePersona'):
try:
progress.mark_step_completed(4, {'source': 'db-fallback'})
except Exception:
pass
continue
if step_num == 5:
# Treat as completed if integrations data exists in DB
# For now, we'll consider step 5 completed if the user has reached the final step
# This is a simplified approach - in the future, we could check for specific integration data
try:
# Check if user has completed previous steps and is on final step
if progress.current_step >= 6: # FinalStep is step 6
progress.mark_step_completed(5, {'source': 'final-step-fallback'})
continue
except Exception:
pass
except Exception:
# If DB check fails, fall back to progress status only
pass
if step:
missing_steps.append(step.title)
return missing_steps
def _validate_api_keys(self, user_id: str):
"""Validate that API keys are configured for the current user (DB-only)."""
try:
db = next(get_db())
db_service = OnboardingDatabaseService()
user_keys = db_service.get_api_keys(user_id, db)
if not user_keys or not any(v for v in user_keys.values()):
raise HTTPException(
status_code=400,
detail="Cannot complete onboarding. At least one AI provider API key must be configured in your account."
)
except HTTPException:
raise
except Exception:
raise HTTPException(
status_code=400,
detail="Cannot complete onboarding. API key validation failed."
)
async def _generate_persona_from_onboarding(self, user_id: str) -> bool:
"""Generate writing persona from onboarding data."""
try:
persona_service = PersonaAnalysisService()
# If a persona already exists for this user, skip regeneration
try:
existing = persona_service.get_user_personas(int(user_id))
if existing and len(existing) > 0:
logger.info("Persona already exists for user %s; skipping regeneration during completion", user_id)
return False
except Exception:
# Non-fatal; proceed to attempt generation
pass
# Generate persona for this user
persona_result = persona_service.generate_persona_from_onboarding(int(user_id))
if "error" not in persona_result:
logger.info(f"✅ Writing persona generated during onboarding completion: {persona_result.get('persona_id')}")
return True
else:
logger.warning(f"⚠️ Persona generation failed during onboarding: {persona_result['error']}")
return False
except Exception as e:
logger.warning(f"⚠️ Non-critical error generating persona during onboarding: {str(e)}")
return False

View File

@@ -0,0 +1,127 @@
"""
Onboarding Configuration Service
Handles onboarding configuration and provider setup information.
"""
from typing import Dict, Any
from fastapi import HTTPException
from loguru import logger
from services.onboarding.api_key_manager import get_api_key_manager
from services.validation import check_all_api_keys
class OnboardingConfigService:
"""Service for handling onboarding configuration and provider setup."""
def __init__(self):
self.api_key_manager = get_api_key_manager()
def get_onboarding_config(self) -> Dict[str, Any]:
"""Get onboarding configuration and requirements."""
return {
"total_steps": 6,
"steps": [
{
"number": 1,
"title": "AI LLM Providers",
"description": "Configure AI language model providers",
"required": True,
"providers": ["openai", "gemini", "anthropic"]
},
{
"number": 2,
"title": "Website Analysis",
"description": "Set up website analysis and crawling",
"required": True
},
{
"number": 3,
"title": "AI Research",
"description": "Configure AI research capabilities",
"required": True
},
{
"number": 4,
"title": "Personalization",
"description": "Set up personalization features",
"required": False
},
{
"number": 5,
"title": "Integrations",
"description": "Configure ALwrity integrations",
"required": False
},
{
"number": 6,
"title": "Complete Setup",
"description": "Finalize and complete onboarding",
"required": True
}
],
"requirements": {
"min_api_keys": 1,
"required_providers": ["openai"],
"optional_providers": ["gemini", "anthropic"]
}
}
async def get_provider_setup_info(self, provider: str) -> Dict[str, Any]:
"""Get setup information for a specific provider."""
try:
providers_info = self.get_all_providers_info()
if provider in providers_info:
return providers_info[provider]
else:
raise HTTPException(status_code=404, detail=f"Provider {provider} not found")
except Exception as e:
logger.error(f"Error getting provider setup info: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
def get_all_providers_info(self) -> Dict[str, Any]:
"""Get setup information for all providers."""
return {
"openai": {
"name": "OpenAI",
"description": "GPT-4 and GPT-3.5 models for content generation",
"setup_url": "https://platform.openai.com/api-keys",
"required_fields": ["api_key"],
"optional_fields": ["organization_id"]
},
"gemini": {
"name": "Google Gemini",
"description": "Google's advanced AI models for content creation",
"setup_url": "https://makersuite.google.com/app/apikey",
"required_fields": ["api_key"],
"optional_fields": []
},
"anthropic": {
"name": "Anthropic",
"description": "Claude models for sophisticated content generation",
"setup_url": "https://console.anthropic.com/",
"required_fields": ["api_key"],
"optional_fields": []
}
}
async def validate_provider_key(self, provider: str, api_key: str) -> Dict[str, Any]:
"""Validate a specific provider's API key."""
try:
# This would need to be implemented based on the actual validation logic
# For now, return a basic validation result
return {
"provider": provider,
"valid": True,
"message": f"API key for {provider} is valid"
}
except Exception as e:
logger.error(f"Error validating provider key: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_enhanced_validation_status(self) -> Dict[str, Any]:
"""Get enhanced validation status for all configured services."""
try:
return await check_all_api_keys(self.api_key_manager)
except Exception as e:
logger.error(f"Error getting enhanced validation status: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")

View File

@@ -0,0 +1,73 @@
"""
Onboarding Control Service
Handles onboarding session control and management.
"""
from typing import Dict, Any
from fastapi import HTTPException
from loguru import logger
from services.onboarding.api_key_manager import get_onboarding_progress, get_onboarding_progress_for_user
class OnboardingControlService:
"""Service for handling onboarding control operations."""
def __init__(self):
pass
async def start_onboarding(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
"""Start a new onboarding session."""
try:
user_id = str(current_user.get('id'))
progress = get_onboarding_progress_for_user(user_id)
progress.reset_progress()
return {
"message": "Onboarding started successfully",
"current_step": progress.current_step,
"started_at": progress.started_at
}
except Exception as e:
logger.error(f"Error starting onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def reset_onboarding(self) -> Dict[str, Any]:
"""Reset the onboarding progress."""
try:
progress = get_onboarding_progress()
progress.reset_progress()
return {
"message": "Onboarding progress reset successfully",
"current_step": progress.current_step,
"started_at": progress.started_at
}
except Exception as e:
logger.error(f"Error resetting onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_resume_info(self) -> Dict[str, Any]:
"""Get information for resuming onboarding."""
try:
progress = get_onboarding_progress()
if progress.is_completed:
return {
"can_resume": False,
"message": "Onboarding is already completed",
"completion_percentage": 100.0
}
resume_step = progress.get_resume_step()
return {
"can_resume": True,
"resume_step": resume_step,
"current_step": progress.current_step,
"completion_percentage": progress.get_completion_percentage(),
"started_at": progress.started_at,
"last_updated": progress.last_updated
}
except Exception as e:
logger.error(f"Error getting resume info: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")

View File

@@ -0,0 +1,197 @@
"""
Onboarding Summary Service
Handles the complex logic for generating comprehensive onboarding summaries.
"""
from typing import Dict, Any, Optional
from fastapi import HTTPException
from loguru import logger
from services.onboarding.api_key_manager import get_api_key_manager
from services.database import get_db
from services.onboarding.database_service import OnboardingDatabaseService
from services.website_analysis_service import WebsiteAnalysisService
from services.research_preferences_service import ResearchPreferencesService
from services.persona_analysis_service import PersonaAnalysisService
class OnboardingSummaryService:
"""Service for handling onboarding summary generation with user isolation."""
def __init__(self, user_id: str):
"""
Initialize service with user-specific context.
Args:
user_id: Clerk user ID from authenticated request
"""
self.user_id = user_id # Store Clerk user ID (string)
self.db_service = OnboardingDatabaseService()
logger.info(f"OnboardingSummaryService initialized for user {user_id} (database mode)")
async def get_onboarding_summary(self) -> Dict[str, Any]:
"""Get comprehensive onboarding summary for FinalStep."""
try:
# Get API keys
api_keys = self._get_api_keys()
# Get website analysis data
website_analysis = self._get_website_analysis()
# Get research preferences
research_preferences = self._get_research_preferences()
# Get personalization settings
personalization_settings = self._get_personalization_settings(research_preferences)
# Check persona generation readiness
persona_readiness = self._check_persona_readiness(website_analysis)
# Determine capabilities
capabilities = self._determine_capabilities(api_keys, website_analysis, research_preferences, personalization_settings, persona_readiness)
return {
"api_keys": api_keys,
"website_url": website_analysis.get('website_url') if website_analysis else None,
"style_analysis": website_analysis.get('style_analysis') if website_analysis else None,
"research_preferences": research_preferences,
"personalization_settings": personalization_settings,
"persona_readiness": persona_readiness,
"integrations": {}, # TODO: Implement integrations data
"capabilities": capabilities
}
except Exception as e:
logger.error(f"Error getting onboarding summary: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
def _get_api_keys(self) -> Dict[str, Any]:
"""Get configured API keys from database."""
try:
db = next(get_db())
api_keys = self.db_service.get_api_keys(self.user_id, db)
db.close()
if not api_keys:
return {
"openai": {"configured": False, "value": None},
"anthropic": {"configured": False, "value": None},
"google": {"configured": False, "value": None}
}
return {
"openai": {
"configured": bool(api_keys.get('openai_api_key')),
"value": api_keys.get('openai_api_key')[:8] + "..." if api_keys.get('openai_api_key') else None
},
"anthropic": {
"configured": bool(api_keys.get('anthropic_api_key')),
"value": api_keys.get('anthropic_api_key')[:8] + "..." if api_keys.get('anthropic_api_key') else None
},
"google": {
"configured": bool(api_keys.get('google_api_key')),
"value": api_keys.get('google_api_key')[:8] + "..." if api_keys.get('google_api_key') else None
}
}
except Exception as e:
logger.error(f"Error getting API keys: {str(e)}")
return {
"openai": {"configured": False, "value": None},
"anthropic": {"configured": False, "value": None},
"google": {"configured": False, "value": None}
}
def _get_website_analysis(self) -> Optional[Dict[str, Any]]:
"""Get website analysis data from database."""
try:
db = next(get_db())
website_data = self.db_service.get_website_analysis(self.user_id, db)
db.close()
return website_data
except Exception as e:
logger.error(f"Error getting website analysis: {str(e)}")
return None
async def get_website_analysis_data(self) -> Dict[str, Any]:
"""Get website analysis data for API endpoint."""
try:
website_analysis = self._get_website_analysis()
return {
"website_analysis": website_analysis,
"status": "success" if website_analysis else "no_data"
}
except Exception as e:
logger.error(f"Error in get_website_analysis_data: {str(e)}")
raise e
def _get_research_preferences(self) -> Optional[Dict[str, Any]]:
"""Get research preferences from database."""
try:
db = next(get_db())
preferences = self.db_service.get_research_preferences(self.user_id, db)
db.close()
return preferences
except Exception as e:
logger.error(f"Error getting research preferences: {str(e)}")
return None
def _get_personalization_settings(self, research_preferences: Optional[Dict[str, Any]]) -> Dict[str, Any]:
"""Get personalization settings based on research preferences."""
if not research_preferences:
return {
"writing_style": "professional",
"target_audience": "general",
"content_focus": "informative"
}
return {
"writing_style": research_preferences.get('writing_style', 'professional'),
"target_audience": research_preferences.get('target_audience', 'general'),
"content_focus": research_preferences.get('content_focus', 'informative')
}
def _check_persona_readiness(self, website_analysis: Optional[Dict[str, Any]]) -> Dict[str, Any]:
"""Check if persona generation is ready based on available data."""
if not website_analysis:
return {
"ready": False,
"reason": "Website analysis not completed",
"missing_data": ["website_url", "style_analysis"]
}
required_fields = ['website_url', 'writing_style', 'target_audience']
missing_fields = [field for field in required_fields if not website_analysis.get(field)]
return {
"ready": len(missing_fields) == 0,
"reason": "All required data available" if len(missing_fields) == 0 else f"Missing: {', '.join(missing_fields)}",
"missing_data": missing_fields
}
def _determine_capabilities(self, api_keys: Dict[str, Any], website_analysis: Optional[Dict[str, Any]],
research_preferences: Optional[Dict[str, Any]],
personalization_settings: Dict[str, Any],
persona_readiness: Dict[str, Any]) -> Dict[str, Any]:
"""Determine available capabilities based on configured data."""
capabilities = {
"ai_content_generation": any(key.get("configured") for key in api_keys.values()),
"website_analysis": website_analysis is not None,
"research_capabilities": research_preferences is not None,
"persona_generation": persona_readiness.get("ready", False),
"content_optimization": website_analysis is not None and research_preferences is not None
}
return capabilities
async def get_research_preferences_data(self) -> Dict[str, Any]:
"""Get research preferences data for the user."""
try:
db = next(get_db())
research_prefs_service = ResearchPreferencesService(db)
# Use the new method that accepts user_id directly
result = research_prefs_service.get_research_preferences_by_user_id(self.user_id)
db.close()
return result
except Exception as e:
logger.error(f"Error getting research preferences data: {e}")
raise

View File

@@ -0,0 +1,51 @@
"""
Persona Management Service
Handles persona generation and management for onboarding.
"""
from typing import Dict, Any
from fastapi import HTTPException
from loguru import logger
class PersonaManagementService:
"""Service for handling persona management operations."""
def __init__(self):
pass
async def check_persona_generation_readiness(self, user_id: int = 1) -> Dict[str, Any]:
"""Check if user has sufficient data for persona generation."""
try:
from api.persona import validate_persona_generation_readiness
return await validate_persona_generation_readiness(user_id)
except Exception as e:
logger.error(f"Error checking persona readiness: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def generate_persona_preview(self, user_id: int = 1) -> Dict[str, Any]:
"""Generate a preview of the writing persona without saving."""
try:
from api.persona import generate_persona_preview
return await generate_persona_preview(user_id)
except Exception as e:
logger.error(f"Error generating persona preview: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def generate_writing_persona(self, user_id: int = 1) -> Dict[str, Any]:
"""Generate and save a writing persona from onboarding data."""
try:
from api.persona import generate_persona, PersonaGenerationRequest
request = PersonaGenerationRequest(force_regenerate=False)
return await generate_persona(user_id, request)
except Exception as e:
logger.error(f"Error generating writing persona: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_user_writing_personas(self, user_id: int = 1) -> Dict[str, Any]:
"""Get all writing personas for the user."""
try:
from api.persona import get_user_personas
return await get_user_personas(user_id)
except Exception as e:
logger.error(f"Error getting user personas: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")

View File

@@ -0,0 +1,610 @@
"""
Step 3 Research Service for Onboarding
This service handles the research phase of onboarding (Step 3), including
competitor discovery using Exa API and research data management.
Key Features:
- Competitor discovery using Exa API
- Research progress tracking
- Data storage and retrieval
- Integration with onboarding workflow
Author: ALwrity Team
Version: 1.0
Last Updated: January 2025
"""
from typing import Dict, List, Optional, Any
from datetime import datetime
from loguru import logger
from services.research.exa_service import ExaService
from services.database import get_db_session
from models.onboarding import OnboardingSession
from sqlalchemy.orm import Session
class Step3ResearchService:
"""
Service for managing Step 3 research phase of onboarding.
This service handles competitor discovery, research data storage,
and integration with the onboarding workflow.
"""
def __init__(self):
"""Initialize the Step 3 Research Service."""
self.exa_service = ExaService()
self.service_name = "step3_research"
logger.info(f"Initialized {self.service_name}")
async def discover_competitors_for_onboarding(
self,
user_url: str,
user_id: str,
industry_context: Optional[str] = None,
num_results: int = 25,
website_analysis_data: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Discover competitors for onboarding Step 3.
Args:
user_url: The user's website URL
user_id: Clerk user ID for finding the correct session
industry_context: Industry context for better discovery
num_results: Number of competitors to discover
Returns:
Dictionary containing competitor discovery results
"""
try:
logger.info(f"Starting research analysis for user {user_id}, URL: {user_url}")
# Find the correct onboarding session for this user
with get_db_session() as db:
from models.onboarding import OnboardingSession
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).first()
if not session:
logger.error(f"No onboarding session found for user {user_id}")
return {
"success": False,
"error": f"No onboarding session found for user {user_id}"
}
actual_session_id = str(session.id) # Convert to string for consistency
logger.info(f"Found onboarding session {actual_session_id} for user {user_id}")
# Step 1: Discover social media accounts
logger.info("Step 1: Discovering social media accounts...")
social_media_results = await self.exa_service.discover_social_media_accounts(user_url)
if not social_media_results["success"]:
logger.warning(f"Social media discovery failed: {social_media_results.get('error')}")
# Continue with competitor discovery even if social media fails
social_media_results = {"success": False, "social_media_accounts": {}, "citations": []}
# Step 2: Discover competitors using Exa API
logger.info("Step 2: Discovering competitors...")
competitor_results = await self.exa_service.discover_competitors(
user_url=user_url,
num_results=num_results,
exclude_domains=None, # Let ExaService handle domain exclusion
industry_context=industry_context,
website_analysis_data=website_analysis_data
)
if not competitor_results["success"]:
logger.error(f"Competitor discovery failed: {competitor_results.get('error')}")
return competitor_results
# Process and enhance competitor data
enhanced_competitors = await self._enhance_competitor_data(
competitor_results["competitors"],
user_url,
industry_context
)
# Store research data in database
await self._store_research_data(
session_id=actual_session_id,
user_url=user_url,
competitors=enhanced_competitors,
industry_context=industry_context,
analysis_metadata={
**competitor_results,
"social_media_data": social_media_results
}
)
# Generate research summary
research_summary = self._generate_research_summary(
enhanced_competitors,
industry_context
)
logger.info(f"Successfully discovered {len(enhanced_competitors)} competitors for user {user_id}")
return {
"success": True,
"session_id": actual_session_id,
"user_url": user_url,
"competitors": enhanced_competitors,
"social_media_accounts": social_media_results.get("social_media_accounts", {}),
"social_media_citations": social_media_results.get("citations", []),
"research_summary": research_summary,
"total_competitors": len(enhanced_competitors),
"industry_context": industry_context,
"analysis_timestamp": datetime.utcnow().isoformat(),
"api_cost": competitor_results.get("api_cost", 0) + social_media_results.get("api_cost", 0)
}
except Exception as e:
logger.error(f"Error in competitor discovery for onboarding: {str(e)}")
return {
"success": False,
"error": str(e),
"session_id": actual_session_id if 'actual_session_id' in locals() else session_id,
"user_url": user_url
}
async def _enhance_competitor_data(
self,
competitors: List[Dict[str, Any]],
user_url: str,
industry_context: Optional[str]
) -> List[Dict[str, Any]]:
"""
Enhance competitor data with additional analysis.
Args:
competitors: Raw competitor data from Exa API
user_url: User's website URL for comparison
industry_context: Industry context
Returns:
List of enhanced competitor data
"""
enhanced_competitors = []
for competitor in competitors:
try:
# Add competitive analysis
competitive_analysis = self._analyze_competitor_competitiveness(
competitor,
user_url,
industry_context
)
# Add content strategy insights
content_insights = self._analyze_content_strategy(competitor)
# Add market positioning
market_positioning = self._analyze_market_positioning(competitor)
enhanced_competitor = {
**competitor,
"competitive_analysis": competitive_analysis,
"content_insights": content_insights,
"market_positioning": market_positioning,
"enhanced_timestamp": datetime.utcnow().isoformat()
}
enhanced_competitors.append(enhanced_competitor)
except Exception as e:
logger.warning(f"Error enhancing competitor data: {str(e)}")
enhanced_competitors.append(competitor)
return enhanced_competitors
def _analyze_competitor_competitiveness(
self,
competitor: Dict[str, Any],
user_url: str,
industry_context: Optional[str]
) -> Dict[str, Any]:
"""
Analyze competitor competitiveness.
Args:
competitor: Competitor data
user_url: User's website URL
industry_context: Industry context
Returns:
Dictionary of competitive analysis
"""
analysis = {
"threat_level": "medium",
"competitive_strengths": [],
"competitive_weaknesses": [],
"market_share_estimate": "unknown",
"differentiation_opportunities": []
}
# Analyze threat level based on relevance score
relevance_score = competitor.get("relevance_score", 0)
if relevance_score > 0.8:
analysis["threat_level"] = "high"
elif relevance_score < 0.4:
analysis["threat_level"] = "low"
# Analyze competitive strengths from content
summary = competitor.get("summary", "").lower()
highlights = competitor.get("highlights", [])
# Extract strengths from content analysis
if "innovative" in summary or "cutting-edge" in summary:
analysis["competitive_strengths"].append("Innovation leadership")
if "comprehensive" in summary or "complete" in summary:
analysis["competitive_strengths"].append("Comprehensive solution")
if any("enterprise" in highlight.lower() for highlight in highlights):
analysis["competitive_strengths"].append("Enterprise focus")
# Generate differentiation opportunities
if not any("saas" in summary for summary in [summary]):
analysis["differentiation_opportunities"].append("SaaS platform differentiation")
return analysis
def _analyze_content_strategy(self, competitor: Dict[str, Any]) -> Dict[str, Any]:
"""
Analyze competitor's content strategy.
Args:
competitor: Competitor data
Returns:
Dictionary of content strategy analysis
"""
strategy = {
"content_focus": "general",
"target_audience": "unknown",
"content_types": [],
"publishing_frequency": "unknown",
"content_quality": "medium"
}
summary = competitor.get("summary", "").lower()
title = competitor.get("title", "").lower()
# Analyze content focus
if "technical" in summary or "developer" in summary:
strategy["content_focus"] = "technical"
elif "business" in summary or "enterprise" in summary:
strategy["content_focus"] = "business"
elif "marketing" in summary or "seo" in summary:
strategy["content_focus"] = "marketing"
# Analyze target audience
if "startup" in summary or "small business" in summary:
strategy["target_audience"] = "startups_small_business"
elif "enterprise" in summary or "large" in summary:
strategy["target_audience"] = "enterprise"
elif "developer" in summary or "technical" in summary:
strategy["target_audience"] = "developers"
# Analyze content quality
if len(summary) > 300:
strategy["content_quality"] = "high"
elif len(summary) < 100:
strategy["content_quality"] = "low"
return strategy
def _analyze_market_positioning(self, competitor: Dict[str, Any]) -> Dict[str, Any]:
"""
Analyze competitor's market positioning.
Args:
competitor: Competitor data
Returns:
Dictionary of market positioning analysis
"""
positioning = {
"market_tier": "unknown",
"pricing_position": "unknown",
"brand_positioning": "unknown",
"competitive_advantage": "unknown"
}
summary = competitor.get("summary", "").lower()
title = competitor.get("title", "").lower()
# Analyze market tier
if "enterprise" in summary or "enterprise" in title:
positioning["market_tier"] = "enterprise"
elif "startup" in summary or "small" in summary:
positioning["market_tier"] = "startup_small_business"
elif "premium" in summary or "professional" in summary:
positioning["market_tier"] = "premium"
# Analyze brand positioning
if "innovative" in summary or "cutting-edge" in summary:
positioning["brand_positioning"] = "innovator"
elif "reliable" in summary or "trusted" in summary:
positioning["brand_positioning"] = "trusted_leader"
elif "affordable" in summary or "cost-effective" in summary:
positioning["brand_positioning"] = "value_leader"
return positioning
def _generate_research_summary(
self,
competitors: List[Dict[str, Any]],
industry_context: Optional[str]
) -> Dict[str, Any]:
"""
Generate a summary of the research findings.
Args:
competitors: List of enhanced competitor data
industry_context: Industry context
Returns:
Dictionary containing research summary
"""
if not competitors:
return {
"total_competitors": 0,
"market_insights": "No competitors found",
"key_findings": [],
"recommendations": []
}
# Analyze market landscape
threat_levels = [comp.get("competitive_analysis", {}).get("threat_level", "medium") for comp in competitors]
high_threat_count = threat_levels.count("high")
# Extract common themes
content_focuses = [comp.get("content_insights", {}).get("content_focus", "general") for comp in competitors]
content_focus_distribution = {focus: content_focuses.count(focus) for focus in set(content_focuses)}
# Generate key findings
key_findings = []
if high_threat_count > len(competitors) * 0.3:
key_findings.append("Highly competitive market with multiple strong players")
if "technical" in content_focus_distribution:
key_findings.append("Technical content is a key differentiator in this market")
# Generate recommendations
recommendations = []
if high_threat_count > 0:
recommendations.append("Focus on unique value proposition to differentiate from strong competitors")
if "technical" in content_focus_distribution and content_focus_distribution["technical"] > 2:
recommendations.append("Consider developing technical content strategy")
return {
"total_competitors": len(competitors),
"high_threat_competitors": high_threat_count,
"content_focus_distribution": content_focus_distribution,
"market_insights": f"Found {len(competitors)} competitors in {industry_context or 'the market'}",
"key_findings": key_findings,
"recommendations": recommendations,
"competitive_landscape": "moderate" if high_threat_count < len(competitors) * 0.5 else "high"
}
async def _store_research_data(
self,
session_id: str,
user_url: str,
competitors: List[Dict[str, Any]],
industry_context: Optional[str],
analysis_metadata: Dict[str, Any]
) -> bool:
"""
Store research data in the database.
Args:
session_id: Onboarding session ID
user_url: User's website URL
competitors: Competitor data
industry_context: Industry context
analysis_metadata: Analysis metadata
Returns:
Boolean indicating success
"""
try:
with get_db_session() as db:
# Get onboarding session
session = db.query(OnboardingSession).filter(
OnboardingSession.id == int(session_id)
).first()
if not session:
logger.error(f"Onboarding session {session_id} not found")
return False
# Store each competitor in CompetitorAnalysis table
from models.onboarding import CompetitorAnalysis
for competitor in competitors:
# Create competitor analysis record
competitor_record = CompetitorAnalysis(
session_id=session.id,
competitor_url=competitor.get("url", ""),
competitor_domain=competitor.get("domain", ""),
analysis_data={
"title": competitor.get("title", ""),
"summary": competitor.get("summary", ""),
"relevance_score": competitor.get("relevance_score", 0.5),
"highlights": competitor.get("highlights", []),
"favicon": competitor.get("favicon"),
"image": competitor.get("image"),
"published_date": competitor.get("published_date"),
"author": competitor.get("author"),
"competitive_analysis": competitor.get("competitive_insights", {}),
"content_insights": competitor.get("content_insights", {}),
"industry_context": industry_context,
"analysis_metadata": analysis_metadata,
"completed_at": datetime.utcnow().isoformat()
}
)
db.add(competitor_record)
# Store summary in session for quick access (backward compatibility)
research_summary = {
"user_url": user_url,
"total_competitors": len(competitors),
"industry_context": industry_context,
"completed_at": datetime.utcnow().isoformat(),
"analysis_metadata": analysis_metadata
}
# Store summary in session (this requires step_data field to exist)
# For now, we'll skip this since the model doesn't have step_data
# TODO: Add step_data JSON column to OnboardingSession model if needed
db.commit()
logger.info(f"Stored {len(competitors)} competitors in CompetitorAnalysis table for session {session_id}")
return True
except Exception as e:
logger.error(f"Error storing research data: {str(e)}", exc_info=True)
return False
async def get_research_data(self, session_id: str) -> Dict[str, Any]:
"""
Retrieve research data for a session.
Args:
session_id: Onboarding session ID
Returns:
Dictionary containing research data
"""
try:
with get_db_session() as db:
session = db.query(OnboardingSession).filter(
OnboardingSession.id == session_id
).first()
if not session:
return {
"success": False,
"error": "Session not found"
}
# Check if step_data attribute exists (it may not be in the model)
# If it doesn't exist, try to get data from CompetitorAnalysis table
research_data = None
if hasattr(session, 'step_data') and session.step_data:
research_data = session.step_data.get("step3_research_data") if isinstance(session.step_data, dict) else None
# If not found in step_data, try CompetitorAnalysis table
if not research_data:
try:
from models.onboarding import CompetitorAnalysis
competitor_records = db.query(CompetitorAnalysis).filter(
CompetitorAnalysis.session_id == session.id
).all()
if competitor_records:
competitors = []
for record in competitor_records:
analysis_data = record.analysis_data or {}
competitor_info = {
"url": record.competitor_url,
"domain": record.competitor_domain or record.competitor_url,
"title": analysis_data.get("title", record.competitor_domain or ""),
"summary": analysis_data.get("summary", ""),
"relevance_score": analysis_data.get("relevance_score", 0.5),
"highlights": analysis_data.get("highlights", []),
"favicon": analysis_data.get("favicon"),
"image": analysis_data.get("image"),
"published_date": analysis_data.get("published_date"),
"author": analysis_data.get("author"),
"competitive_insights": analysis_data.get("competitive_analysis", {}),
"content_insights": analysis_data.get("content_insights", {})
}
competitors.append(competitor_info)
if competitors:
# Map competitor fields to match frontend expectations
mapped_competitors = []
for comp in competitors:
mapped_comp = {
**comp, # Keep all original fields
"name": comp.get("title") or comp.get("name") or comp.get("domain", ""),
"description": comp.get("summary") or comp.get("description", ""),
"similarity_score": comp.get("relevance_score") or comp.get("similarity_score", 0.5)
}
mapped_competitors.append(mapped_comp)
research_data = {
"competitors": mapped_competitors,
"completed_at": competitor_records[0].created_at.isoformat() if competitor_records[0].created_at else None
}
except Exception as e:
logger.warning(f"Could not retrieve competitors from CompetitorAnalysis table: {e}")
if not research_data:
return {
"success": False,
"error": "No research data found for this session"
}
return {
"success": True,
"step3_research_data": research_data,
"research_data": research_data # Keep for backward compatibility
}
except Exception as e:
logger.error(f"Error retrieving research data: {str(e)}")
return {
"success": False,
"error": str(e)
}
def _extract_domain(self, url: str) -> str:
"""
Extract domain from URL.
Args:
url: Website URL
Returns:
Domain name
"""
try:
from urllib.parse import urlparse
parsed = urlparse(url)
return parsed.netloc
except Exception:
return url
async def health_check(self) -> Dict[str, Any]:
"""
Check the health of the Step 3 Research Service.
Returns:
Dictionary containing service health status
"""
try:
exa_health = await self.exa_service.health_check()
return {
"status": "healthy" if exa_health["status"] == "healthy" else "degraded",
"service": self.service_name,
"exa_service_status": exa_health["status"],
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
return {
"status": "error",
"service": self.service_name,
"error": str(e),
"timestamp": datetime.utcnow().isoformat()
}

View File

@@ -0,0 +1,495 @@
"""
Step 3 Research Routes for Onboarding
FastAPI routes for Step 3 research phase of onboarding,
including competitor discovery and research data management.
Author: ALwrity Team
Version: 1.0
Last Updated: January 2025
"""
from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends
from pydantic import BaseModel, HttpUrl, Field
from typing import Dict, List, Optional, Any
from datetime import datetime
import traceback
from loguru import logger
from middleware.auth_middleware import get_current_user
from .step3_research_service import Step3ResearchService
from services.seo_tools.sitemap_service import SitemapService
router = APIRouter(prefix="/api/onboarding/step3", tags=["Onboarding Step 3 - Research"])
# Request/Response Models
class CompetitorDiscoveryRequest(BaseModel):
"""Request model for competitor discovery."""
session_id: Optional[str] = Field(None, description="Deprecated - user identification comes from auth token")
user_url: str = Field(..., description="User's website URL")
industry_context: Optional[str] = Field(None, description="Industry context for better discovery")
num_results: int = Field(25, ge=1, le=100, description="Number of competitors to discover")
website_analysis_data: Optional[Dict[str, Any]] = Field(None, description="Website analysis data from Step 2 for better targeting")
class CompetitorDiscoveryResponse(BaseModel):
"""Response model for competitor discovery."""
success: bool
message: str
session_id: str
user_url: str
competitors: Optional[List[Dict[str, Any]]] = None
social_media_accounts: Optional[Dict[str, str]] = None
social_media_citations: Optional[List[Dict[str, Any]]] = None
research_summary: Optional[Dict[str, Any]] = None
total_competitors: Optional[int] = None
industry_context: Optional[str] = None
analysis_timestamp: Optional[str] = None
api_cost: Optional[float] = None
error: Optional[str] = None
class ResearchDataRequest(BaseModel):
"""Request model for retrieving research data."""
session_id: str = Field(..., description="Onboarding session ID")
class ResearchDataResponse(BaseModel):
"""Response model for research data retrieval."""
success: bool
message: str
session_id: Optional[str] = None
research_data: Optional[Dict[str, Any]] = None
error: Optional[str] = None
class ResearchHealthResponse(BaseModel):
"""Response model for research service health check."""
success: bool
message: str
service_status: Optional[Dict[str, Any]] = None
timestamp: Optional[str] = None
class SitemapAnalysisRequest(BaseModel):
"""Request model for sitemap analysis in onboarding context."""
user_url: str = Field(..., description="User's website URL")
sitemap_url: Optional[str] = Field(None, description="Custom sitemap URL (defaults to user_url/sitemap.xml)")
competitors: Optional[List[str]] = Field(None, description="List of competitor URLs for benchmarking")
industry_context: Optional[str] = Field(None, description="Industry context for analysis")
analyze_content_trends: bool = Field(True, description="Whether to analyze content trends")
analyze_publishing_patterns: bool = Field(True, description="Whether to analyze publishing patterns")
class SitemapAnalysisResponse(BaseModel):
"""Response model for sitemap analysis."""
success: bool
message: str
user_url: str
sitemap_url: str
analysis_data: Optional[Dict[str, Any]] = None
onboarding_insights: Optional[Dict[str, Any]] = None
analysis_timestamp: Optional[str] = None
discovery_method: Optional[str] = None
error: Optional[str] = None
# Initialize services
step3_research_service = Step3ResearchService()
sitemap_service = SitemapService()
@router.post("/discover-competitors", response_model=CompetitorDiscoveryResponse)
async def discover_competitors(
request: CompetitorDiscoveryRequest,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user)
) -> CompetitorDiscoveryResponse:
"""
Discover competitors for the user's website using Exa API with user isolation.
This endpoint performs neural search to find semantically similar websites
and analyzes their content for competitive intelligence.
"""
try:
# Get Clerk user ID for user isolation
clerk_user_id = str(current_user.get('id'))
logger.info(f"Starting competitor discovery for authenticated user {clerk_user_id}, URL: {request.user_url}")
logger.info(f"Request data - user_url: '{request.user_url}', industry_context: '{request.industry_context}', num_results: {request.num_results}")
# Validate URL format
if not request.user_url.startswith(('http://', 'https://')):
request.user_url = f"https://{request.user_url}"
# Perform competitor discovery with Clerk user ID
result = await step3_research_service.discover_competitors_for_onboarding(
user_url=request.user_url,
user_id=clerk_user_id, # Use Clerk user ID to find correct session
industry_context=request.industry_context,
num_results=request.num_results,
website_analysis_data=request.website_analysis_data
)
if result["success"]:
logger.info(f"✅ Successfully discovered {result['total_competitors']} competitors for user {clerk_user_id}")
return CompetitorDiscoveryResponse(
success=True,
message=f"Successfully discovered {result['total_competitors']} competitors and social media accounts",
session_id=result["session_id"],
user_url=result["user_url"],
competitors=result["competitors"],
social_media_accounts=result.get("social_media_accounts"),
social_media_citations=result.get("social_media_citations"),
research_summary=result["research_summary"],
total_competitors=result["total_competitors"],
industry_context=result["industry_context"],
analysis_timestamp=result["analysis_timestamp"],
api_cost=result["api_cost"]
)
else:
logger.error(f"❌ Competitor discovery failed for user {clerk_user_id}: {result.get('error')}")
return CompetitorDiscoveryResponse(
success=False,
message="Competitor discovery failed",
session_id=clerk_user_id,
user_url=result.get("user_url", request.user_url),
error=result.get("error", "Unknown error occurred")
)
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error in competitor discovery endpoint: {str(e)}")
logger.error(traceback.format_exc())
# Return error response with Clerk user ID
clerk_user_id = str(current_user.get('id', 'unknown'))
return CompetitorDiscoveryResponse(
success=False,
message="Internal server error during competitor discovery",
session_id=clerk_user_id,
user_url=request.user_url,
error=str(e)
)
@router.post("/research-data", response_model=ResearchDataResponse)
async def get_research_data(request: ResearchDataRequest) -> ResearchDataResponse:
"""
Retrieve research data for a specific onboarding session.
This endpoint returns the stored research data including competitor analysis
and research summary for the given session.
"""
try:
logger.info(f"Retrieving research data for session {request.session_id}")
# Validate session ID
if not request.session_id or len(request.session_id) < 10:
raise HTTPException(
status_code=400,
detail="Invalid session ID"
)
# Retrieve research data
result = await step3_research_service.get_research_data(request.session_id)
if result["success"]:
logger.info(f"Successfully retrieved research data for session {request.session_id}")
return ResearchDataResponse(
success=True,
message="Research data retrieved successfully",
session_id=result["session_id"],
research_data=result["research_data"]
)
else:
logger.warning(f"No research data found for session {request.session_id}")
return ResearchDataResponse(
success=False,
message="No research data found for this session",
session_id=request.session_id,
error=result.get("error", "Research data not found")
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error retrieving research data: {str(e)}")
logger.error(traceback.format_exc())
return ResearchDataResponse(
success=False,
message="Internal server error while retrieving research data",
session_id=request.session_id,
error=str(e)
)
@router.get("/health", response_model=ResearchHealthResponse)
async def health_check() -> ResearchHealthResponse:
"""
Check the health of the Step 3 research service.
This endpoint provides health status information for the research service
including Exa API connectivity and service status.
"""
try:
logger.info("Performing Step 3 research service health check")
health_status = await step3_research_service.health_check()
if health_status["status"] == "healthy":
return ResearchHealthResponse(
success=True,
message="Step 3 research service is healthy",
service_status=health_status,
timestamp=health_status["timestamp"]
)
else:
return ResearchHealthResponse(
success=False,
message=f"Step 3 research service is {health_status['status']}",
service_status=health_status,
timestamp=health_status["timestamp"]
)
except Exception as e:
logger.error(f"Error in health check: {str(e)}")
logger.error(traceback.format_exc())
return ResearchHealthResponse(
success=False,
message="Health check failed",
error=str(e),
timestamp=datetime.utcnow().isoformat()
)
@router.post("/validate-session")
async def validate_session(session_id: str) -> Dict[str, Any]:
"""
Validate that a session exists and is ready for Step 3.
This endpoint checks if the session exists and has completed previous steps.
"""
try:
logger.info(f"Validating session {session_id} for Step 3")
# Basic validation
if not session_id or len(session_id) < 10:
raise HTTPException(
status_code=400,
detail="Invalid session ID format"
)
# Check if session has completed Step 2 (website analysis)
# This would integrate with the existing session validation logic
return {
"success": True,
"message": "Session is valid for Step 3",
"session_id": session_id,
"ready_for_step3": True
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error validating session: {str(e)}")
return {
"success": False,
"message": "Session validation failed",
"error": str(e)
}
@router.get("/cost-estimate")
async def get_cost_estimate(
num_results: int = 25,
include_content: bool = True
) -> Dict[str, Any]:
"""
Get cost estimate for competitor discovery.
This endpoint provides cost estimates for Exa API usage
to help users understand the cost of competitor discovery.
"""
try:
logger.info(f"Getting cost estimate for {num_results} results, content: {include_content}")
cost_estimate = step3_research_service.exa_service.get_cost_estimate(
num_results=num_results,
include_content=include_content
)
return {
"success": True,
"cost_estimate": cost_estimate,
"message": "Cost estimate calculated successfully"
}
except Exception as e:
logger.error(f"Error calculating cost estimate: {str(e)}")
return {
"success": False,
"message": "Failed to calculate cost estimate",
"error": str(e)
}
@router.post("/discover-sitemap")
async def discover_sitemap(
request: SitemapAnalysisRequest,
current_user: Dict[str, Any] = Depends(get_current_user)
) -> Dict[str, Any]:
"""
Discover the sitemap URL for a given website using intelligent search.
This endpoint attempts to find the sitemap URL by checking robots.txt
and common sitemap locations.
"""
try:
logger.info(f"Discovering sitemap for user: {current_user.get('user_id', 'unknown')}")
logger.info(f"Sitemap discovery request: {request.user_url}")
# Use intelligent sitemap discovery
discovered_sitemap = await sitemap_service.discover_sitemap_url(request.user_url)
if discovered_sitemap:
return {
"success": True,
"message": "Sitemap discovered successfully",
"user_url": request.user_url,
"sitemap_url": discovered_sitemap,
"discovery_method": "intelligent_search"
}
else:
# Provide fallback URL
base_url = request.user_url.rstrip('/')
fallback_url = f"{base_url}/sitemap.xml"
return {
"success": False,
"message": "No sitemap found using intelligent discovery",
"user_url": request.user_url,
"fallback_url": fallback_url,
"discovery_method": "fallback"
}
except Exception as e:
logger.error(f"Error in sitemap discovery: {str(e)}")
logger.error(f"Traceback: {traceback.format_exc()}")
return {
"success": False,
"message": "An unexpected error occurred during sitemap discovery",
"user_url": request.user_url,
"error": str(e)
}
@router.post("/analyze-sitemap", response_model=SitemapAnalysisResponse)
async def analyze_sitemap_for_onboarding(
request: SitemapAnalysisRequest,
background_tasks: BackgroundTasks,
current_user: Dict[str, Any] = Depends(get_current_user)
) -> SitemapAnalysisResponse:
"""
Analyze user's sitemap for competitive positioning and content strategy insights.
This endpoint provides enhanced sitemap analysis specifically designed for
onboarding Step 3 competitive analysis, including competitive positioning
insights and content strategy recommendations.
"""
try:
logger.info(f"Starting sitemap analysis for user: {current_user.get('user_id', 'unknown')}")
logger.info(f"Sitemap analysis request: {request.user_url}")
# Determine sitemap URL using intelligent discovery
sitemap_url = request.sitemap_url
if not sitemap_url:
# Use intelligent sitemap discovery
discovered_sitemap = await sitemap_service.discover_sitemap_url(request.user_url)
if discovered_sitemap:
sitemap_url = discovered_sitemap
logger.info(f"Discovered sitemap via intelligent search: {sitemap_url}")
else:
# Fallback to standard location if discovery fails
base_url = request.user_url.rstrip('/')
sitemap_url = f"{base_url}/sitemap.xml"
logger.info(f"Using fallback sitemap URL: {sitemap_url}")
logger.info(f"Analyzing sitemap: {sitemap_url}")
# Run onboarding-specific sitemap analysis
analysis_result = await sitemap_service.analyze_sitemap_for_onboarding(
sitemap_url=sitemap_url,
user_url=request.user_url,
competitors=request.competitors,
industry_context=request.industry_context,
analyze_content_trends=request.analyze_content_trends,
analyze_publishing_patterns=request.analyze_publishing_patterns
)
# Check if analysis was successful
if analysis_result.get("error"):
logger.error(f"Sitemap analysis failed: {analysis_result['error']}")
return SitemapAnalysisResponse(
success=False,
message="Sitemap analysis failed",
user_url=request.user_url,
sitemap_url=sitemap_url,
error=analysis_result["error"]
)
# Extract onboarding insights
onboarding_insights = analysis_result.get("onboarding_insights", {})
# Log successful analysis
logger.info(f"Sitemap analysis completed successfully for {request.user_url}")
logger.info(f"Found {analysis_result.get('structure_analysis', {}).get('total_urls', 0)} URLs")
# Background task to store analysis results (if needed)
background_tasks.add_task(
_log_sitemap_analysis_result,
current_user.get('user_id'),
request.user_url,
analysis_result
)
# Determine discovery method
discovery_method = "fallback"
if request.sitemap_url:
discovery_method = "user_provided"
elif discovered_sitemap:
discovery_method = "intelligent_search"
return SitemapAnalysisResponse(
success=True,
message="Sitemap analysis completed successfully",
user_url=request.user_url,
sitemap_url=sitemap_url,
analysis_data=analysis_result,
onboarding_insights=onboarding_insights,
analysis_timestamp=datetime.utcnow().isoformat(),
discovery_method=discovery_method
)
except Exception as e:
logger.error(f"Error in sitemap analysis: {str(e)}")
logger.error(f"Traceback: {traceback.format_exc()}")
return SitemapAnalysisResponse(
success=False,
message="An unexpected error occurred during sitemap analysis",
user_url=request.user_url,
sitemap_url=sitemap_url or f"{request.user_url.rstrip('/')}/sitemap.xml",
error=str(e)
)
async def _log_sitemap_analysis_result(
user_id: str,
user_url: str,
analysis_result: Dict[str, Any]
) -> None:
"""Background task to log sitemap analysis results."""
try:
logger.info(f"Logging sitemap analysis result for user {user_id}")
# Add any logging or storage logic here if needed
# For now, just log the completion
logger.info(f"Sitemap analysis logged for {user_url}")
except Exception as e:
logger.error(f"Error logging sitemap analysis result: {e}")

View File

@@ -0,0 +1,747 @@
"""
Step 4 Persona Generation Routes
Handles AI writing persona generation using the sophisticated persona system.
"""
import asyncio
from typing import Dict, Any, List, Optional, Union
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
from pydantic import BaseModel
from loguru import logger
import os
# Rate limiting configuration
RATE_LIMIT_DELAY_SECONDS = 2.0 # Delay between API calls to prevent quota exhaustion
# Task management for long-running persona generation
import uuid
from datetime import datetime, timedelta
from services.persona.core_persona.core_persona_service import CorePersonaService
from services.persona.enhanced_linguistic_analyzer import EnhancedLinguisticAnalyzer
from services.persona.persona_quality_improver import PersonaQualityImprover
from middleware.auth_middleware import get_current_user
from services.user_api_key_context import user_api_keys
# In-memory task storage (in production, use Redis or database)
persona_tasks: Dict[str, Dict[str, Any]] = {}
# In-memory latest persona cache per user (24h TTL)
persona_latest_cache: Dict[str, Dict[str, Any]] = {}
PERSONA_CACHE_TTL_HOURS = 24
router = APIRouter()
# Initialize services
core_persona_service = CorePersonaService()
linguistic_analyzer = EnhancedLinguisticAnalyzer()
quality_improver = PersonaQualityImprover()
def _extract_user_id(user: Dict[str, Any]) -> str:
"""Extract a stable user ID from Clerk-authenticated user payloads.
Prefers 'clerk_user_id' or 'id', falls back to 'user_id', else 'unknown'.
"""
if not isinstance(user, dict):
return 'unknown'
return (
user.get('clerk_user_id')
or user.get('id')
or user.get('user_id')
or 'unknown'
)
class PersonaGenerationRequest(BaseModel):
"""Request model for persona generation."""
onboarding_data: Dict[str, Any]
selected_platforms: List[str] = ["linkedin", "blog"]
user_preferences: Optional[Dict[str, Any]] = None
class PersonaGenerationResponse(BaseModel):
"""Response model for persona generation."""
success: bool
core_persona: Optional[Dict[str, Any]] = None
platform_personas: Optional[Dict[str, Any]] = None
quality_metrics: Optional[Dict[str, Any]] = None
error: Optional[str] = None
class PersonaQualityRequest(BaseModel):
"""Request model for persona quality assessment."""
core_persona: Dict[str, Any]
platform_personas: Dict[str, Any]
user_feedback: Optional[Dict[str, Any]] = None
class PersonaQualityResponse(BaseModel):
"""Response model for persona quality assessment."""
success: bool
quality_metrics: Optional[Dict[str, Any]] = None
recommendations: Optional[List[str]] = None
error: Optional[str] = None
class PersonaTaskStatus(BaseModel):
"""Response model for persona generation task status."""
task_id: str
status: str # 'pending', 'running', 'completed', 'failed'
progress: int # 0-100
current_step: str
progress_messages: List[Dict[str, Any]] = []
result: Optional[Dict[str, Any]] = None
error: Optional[str] = None
created_at: str
updated_at: str
@router.post("/step4/generate-personas-async", response_model=Dict[str, str])
async def generate_writing_personas_async(
request: Union[PersonaGenerationRequest, Dict[str, Any]],
current_user: Dict[str, Any] = Depends(get_current_user),
background_tasks: BackgroundTasks = BackgroundTasks()
):
"""
Start persona generation as an async task and return task ID for polling.
"""
try:
# Handle both PersonaGenerationRequest and dict inputs
if isinstance(request, dict):
persona_request = PersonaGenerationRequest(**request)
else:
persona_request = request
# If fresh cache exists for this user, short-circuit and return a completed task
user_id = _extract_user_id(current_user)
cached = persona_latest_cache.get(user_id)
if cached:
ts = datetime.fromisoformat(cached.get("timestamp", datetime.now().isoformat())) if isinstance(cached.get("timestamp"), str) else None
if ts and (datetime.now() - ts) <= timedelta(hours=PERSONA_CACHE_TTL_HOURS):
task_id = str(uuid.uuid4())
persona_tasks[task_id] = {
"task_id": task_id,
"status": "completed",
"progress": 100,
"current_step": "Persona loaded from cache",
"progress_messages": [
{"timestamp": datetime.now().isoformat(), "message": "Loaded cached persona", "progress": 100}
],
"result": {
"success": True,
"core_persona": cached.get("core_persona"),
"platform_personas": cached.get("platform_personas", {}),
"quality_metrics": cached.get("quality_metrics", {}),
},
"error": None,
"created_at": datetime.now().isoformat(),
"updated_at": datetime.now().isoformat(),
"user_id": user_id,
"request_data": (PersonaGenerationRequest(**(request if isinstance(request, dict) else request.dict())).dict()) if request else {}
}
logger.info(f"Cache hit for user {user_id} - returning completed task without regeneration: {task_id}")
return {
"task_id": task_id,
"status": "completed",
"message": "Persona loaded from cache"
}
# Generate unique task ID
task_id = str(uuid.uuid4())
# Initialize task status
persona_tasks[task_id] = {
"task_id": task_id,
"status": "pending",
"progress": 0,
"current_step": "Initializing persona generation...",
"progress_messages": [],
"result": None,
"error": None,
"created_at": datetime.now().isoformat(),
"updated_at": datetime.now().isoformat(),
"user_id": user_id,
"request_data": persona_request.dict()
}
# Start background task
background_tasks.add_task(
execute_persona_generation_task,
task_id,
persona_request,
current_user
)
logger.info(f"Started async persona generation task: {task_id}")
logger.info(f"Background task added successfully for task: {task_id}")
# Test: Add a simple background task to verify background task execution
def test_simple_task():
logger.info(f"TEST: Simple background task executed for {task_id}")
background_tasks.add_task(test_simple_task)
logger.info(f"TEST: Simple background task added for {task_id}")
return {
"task_id": task_id,
"status": "pending",
"message": "Persona generation started. Use task_id to poll for progress."
}
except Exception as e:
logger.error(f"Failed to start persona generation task: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to start task: {str(e)}")
@router.get("/step4/persona-latest", response_model=Dict[str, Any])
async def get_latest_persona(current_user: Dict[str, Any] = Depends(get_current_user)):
"""Return latest cached persona for the current user if available and fresh."""
try:
user_id = _extract_user_id(current_user)
cached = persona_latest_cache.get(user_id)
if not cached:
raise HTTPException(status_code=404, detail="No cached persona found")
ts = datetime.fromisoformat(cached["timestamp"]) if isinstance(cached.get("timestamp"), str) else None
if not ts or (datetime.now() - ts) > timedelta(hours=PERSONA_CACHE_TTL_HOURS):
# Expired
persona_latest_cache.pop(user_id, None)
raise HTTPException(status_code=404, detail="Cached persona expired")
return {"success": True, "persona": cached}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting latest persona: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/step4/persona-save", response_model=Dict[str, Any])
async def save_persona_update(
request: Dict[str, Any],
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""Save/overwrite latest persona cache for current user (from edited UI)."""
try:
user_id = _extract_user_id(current_user)
payload = {
"success": True,
"core_persona": request.get("core_persona"),
"platform_personas": request.get("platform_personas", {}),
"quality_metrics": request.get("quality_metrics", {}),
"selected_platforms": request.get("selected_platforms", []),
"timestamp": datetime.now().isoformat()
}
persona_latest_cache[user_id] = payload
logger.info(f"Saved latest persona to cache for user {user_id}")
return {"success": True}
except Exception as e:
logger.error(f"Error saving latest persona: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/step4/persona-task/{task_id}", response_model=PersonaTaskStatus)
async def get_persona_task_status(task_id: str):
"""
Get the status of a persona generation task.
"""
if task_id not in persona_tasks:
raise HTTPException(status_code=404, detail="Task not found")
task = persona_tasks[task_id]
# Clean up old tasks (older than 1 hour)
if datetime.now() - datetime.fromisoformat(task["created_at"]) > timedelta(hours=1):
del persona_tasks[task_id]
raise HTTPException(status_code=404, detail="Task expired")
return PersonaTaskStatus(**task)
@router.post("/step4/generate-personas", response_model=PersonaGenerationResponse)
async def generate_writing_personas(
request: Union[PersonaGenerationRequest, Dict[str, Any]],
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Generate AI writing personas using the sophisticated persona system with optimized parallel execution.
OPTIMIZED APPROACH:
1. Generate core persona (1 API call)
2. Parallel platform adaptations (1 API call per platform)
3. Parallel quality assessment (no additional API calls - uses existing data)
Total API calls: 1 + N platforms (vs previous: 1 + N + 1 = N + 2)
"""
try:
logger.info(f"Starting OPTIMIZED persona generation for user: {current_user.get('user_id', 'unknown')}")
# Handle both PersonaGenerationRequest and dict inputs
if isinstance(request, dict):
# Convert dict to PersonaGenerationRequest
persona_request = PersonaGenerationRequest(**request)
else:
persona_request = request
logger.info(f"Selected platforms: {persona_request.selected_platforms}")
# Step 1: Generate core persona (1 API call)
logger.info("Step 1: Generating core persona...")
core_persona = await asyncio.get_event_loop().run_in_executor(
None,
core_persona_service.generate_core_persona,
persona_request.onboarding_data
)
# Add small delay after core persona generation
await asyncio.sleep(1.0)
if "error" in core_persona:
logger.error(f"Core persona generation failed: {core_persona['error']}")
return PersonaGenerationResponse(
success=False,
error=f"Core persona generation failed: {core_persona['error']}"
)
# Step 2: Generate platform adaptations with rate limiting (N API calls with delays)
logger.info(f"Step 2: Generating platform adaptations with rate limiting for: {persona_request.selected_platforms}")
platform_personas = {}
# Process platforms sequentially with small delays to avoid rate limits
for i, platform in enumerate(persona_request.selected_platforms):
try:
logger.info(f"Generating {platform} persona ({i+1}/{len(persona_request.selected_platforms)})")
# Add delay between API calls to prevent rate limiting
if i > 0: # Skip delay for first platform
logger.info(f"Rate limiting: Waiting {RATE_LIMIT_DELAY_SECONDS}s before next API call...")
await asyncio.sleep(RATE_LIMIT_DELAY_SECONDS)
# Generate platform persona
result = await generate_single_platform_persona_async(
core_persona,
platform,
persona_request.onboarding_data
)
if isinstance(result, Exception):
error_msg = str(result)
logger.error(f"Platform {platform} generation failed: {error_msg}")
platform_personas[platform] = {"error": error_msg}
elif "error" in result:
error_msg = result['error']
logger.error(f"Platform {platform} generation failed: {error_msg}")
platform_personas[platform] = result
# Check for rate limit errors and suggest retry
if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower():
logger.warning(f"⚠️ Rate limit detected for {platform}. Consider increasing RATE_LIMIT_DELAY_SECONDS")
else:
platform_personas[platform] = result
logger.info(f"{platform} persona generated successfully")
except Exception as e:
logger.error(f"Platform {platform} generation error: {str(e)}")
platform_personas[platform] = {"error": str(e)}
# Step 3: Assess quality (no additional API calls - uses existing data)
logger.info("Step 3: Assessing persona quality...")
quality_metrics = await assess_persona_quality_internal(
core_persona,
platform_personas,
persona_request.user_preferences
)
# Log performance metrics
total_platforms = len(persona_request.selected_platforms)
successful_platforms = len([p for p in platform_personas.values() if "error" not in p])
logger.info(f"✅ Persona generation completed: {successful_platforms}/{total_platforms} platforms successful")
logger.info(f"📊 API calls made: 1 (core) + {total_platforms} (platforms) = {1 + total_platforms} total")
logger.info(f"⏱️ Rate limiting: Sequential processing with 2s delays to prevent quota exhaustion")
return PersonaGenerationResponse(
success=True,
core_persona=core_persona,
platform_personas=platform_personas,
quality_metrics=quality_metrics
)
except Exception as e:
logger.error(f"Persona generation error: {str(e)}")
return PersonaGenerationResponse(
success=False,
error=f"Persona generation failed: {str(e)}"
)
@router.post("/step4/assess-quality", response_model=PersonaQualityResponse)
async def assess_persona_quality(
request: Union[PersonaQualityRequest, Dict[str, Any]],
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Assess the quality of generated personas and provide improvement recommendations.
"""
try:
logger.info(f"Assessing persona quality for user: {current_user.get('user_id', 'unknown')}")
# Handle both PersonaQualityRequest and dict inputs
if isinstance(request, dict):
# Convert dict to PersonaQualityRequest
quality_request = PersonaQualityRequest(**request)
else:
quality_request = request
quality_metrics = await assess_persona_quality_internal(
quality_request.core_persona,
quality_request.platform_personas,
quality_request.user_feedback
)
return PersonaQualityResponse(
success=True,
quality_metrics=quality_metrics,
recommendations=quality_metrics.get('recommendations', [])
)
except Exception as e:
logger.error(f"Quality assessment error: {str(e)}")
return PersonaQualityResponse(
success=False,
error=f"Quality assessment failed: {str(e)}"
)
@router.post("/step4/regenerate-persona")
async def regenerate_persona(
request: Union[PersonaGenerationRequest, Dict[str, Any]],
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Regenerate persona with different parameters or improved analysis.
"""
try:
logger.info(f"Regenerating persona for user: {current_user.get('user_id', 'unknown')}")
# Use the same generation logic but with potentially different parameters
return await generate_writing_personas(request, current_user)
except Exception as e:
logger.error(f"Persona regeneration error: {str(e)}")
return PersonaGenerationResponse(
success=False,
error=f"Persona regeneration failed: {str(e)}"
)
@router.post("/step4/test-background-task")
async def test_background_task(
background_tasks: BackgroundTasks = BackgroundTasks()
):
"""Test endpoint to verify background task execution."""
def simple_background_task():
logger.info("BACKGROUND TASK EXECUTED SUCCESSFULLY!")
return "Task completed"
background_tasks.add_task(simple_background_task)
logger.info("Background task added to queue")
return {"message": "Background task added", "status": "success"}
@router.get("/step4/persona-options")
async def get_persona_generation_options(
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get available options for persona generation (platforms, preferences, etc.).
"""
try:
return {
"success": True,
"available_platforms": [
{"id": "linkedin", "name": "LinkedIn", "description": "Professional networking and thought leadership"},
{"id": "facebook", "name": "Facebook", "description": "Social media and community building"},
{"id": "twitter", "name": "Twitter", "description": "Micro-blogging and real-time updates"},
{"id": "blog", "name": "Blog", "description": "Long-form content and SEO optimization"},
{"id": "instagram", "name": "Instagram", "description": "Visual storytelling and engagement"},
{"id": "medium", "name": "Medium", "description": "Publishing platform and audience building"},
{"id": "substack", "name": "Substack", "description": "Newsletter and subscription content"}
],
"persona_types": [
"Thought Leader",
"Industry Expert",
"Content Creator",
"Brand Ambassador",
"Community Builder"
],
"quality_metrics": [
"Style Consistency",
"Brand Alignment",
"Platform Optimization",
"Engagement Potential",
"Content Quality"
]
}
except Exception as e:
logger.error(f"Error getting persona options: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to get persona options: {str(e)}")
async def execute_persona_generation_task(task_id: str, persona_request: PersonaGenerationRequest, current_user: Dict[str, Any]):
"""
Execute persona generation task in background with progress updates.
"""
try:
logger.info(f"BACKGROUND TASK STARTED: {task_id}")
logger.info(f"Task {task_id}: Background task execution initiated")
# Log onboarding data summary for debugging
onboarding_data_summary = {
"has_websiteAnalysis": bool(persona_request.onboarding_data.get("websiteAnalysis")),
"has_competitorResearch": bool(persona_request.onboarding_data.get("competitorResearch")),
"has_sitemapAnalysis": bool(persona_request.onboarding_data.get("sitemapAnalysis")),
"has_businessData": bool(persona_request.onboarding_data.get("businessData")),
"data_keys": list(persona_request.onboarding_data.keys()) if persona_request.onboarding_data else []
}
logger.info(f"Task {task_id}: Onboarding data summary: {onboarding_data_summary}")
# Update task status to running
update_task_status(task_id, "running", 10, "Starting persona generation...")
logger.info(f"Task {task_id}: Status updated to running")
# Inject user-specific API keys into environment for the duration of this background task
user_id = _extract_user_id(current_user)
env_mapping = {
'gemini': 'GEMINI_API_KEY',
'exa': 'EXA_API_KEY',
'openai': 'OPENAI_API_KEY',
'anthropic': 'ANTHROPIC_API_KEY',
'mistral': 'MISTRAL_API_KEY',
'copilotkit': 'COPILOTKIT_API_KEY',
'tavily': 'TAVILY_API_KEY',
'serper': 'SERPER_API_KEY',
'firecrawl': 'FIRECRAWL_API_KEY',
}
original_env: Dict[str, Optional[str]] = {}
with user_api_keys(user_id) as keys:
try:
for provider, env_var in env_mapping.items():
value = keys.get(provider)
if value:
original_env[env_var] = os.environ.get(env_var)
os.environ[env_var] = value
logger.debug(f"[BG TASK] Injected {env_var} for user {user_id}")
# Step 1: Generate core persona (1 API call)
update_task_status(task_id, "running", 20, "Generating core persona...")
logger.info(f"Task {task_id}: Step 1 - Generating core persona...")
core_persona = await asyncio.get_event_loop().run_in_executor(
None,
core_persona_service.generate_core_persona,
persona_request.onboarding_data
)
if "error" in core_persona:
error_msg = core_persona['error']
# Check if this is a quota/rate limit error
if "RESOURCE_EXHAUSTED" in str(error_msg) or "429" in str(error_msg) or "quota" in str(error_msg).lower():
update_task_status(task_id, "failed", 0, f"Quota exhausted: {error_msg}", error=str(error_msg))
logger.error(f"Task {task_id}: Quota exhausted, marking as failed immediately")
else:
update_task_status(task_id, "failed", 0, f"Core persona generation failed: {error_msg}", error=str(error_msg))
return
update_task_status(task_id, "running", 40, "Core persona generated successfully")
# Add small delay after core persona generation
await asyncio.sleep(1.0)
# Step 2: Generate platform adaptations with rate limiting (N API calls with delays)
update_task_status(task_id, "running", 50, f"Generating platform adaptations for: {persona_request.selected_platforms}")
platform_personas = {}
total_platforms = len(persona_request.selected_platforms)
# Process platforms sequentially with small delays to avoid rate limits
for i, platform in enumerate(persona_request.selected_platforms):
try:
progress = 50 + (i * 40 // total_platforms)
update_task_status(task_id, "running", progress, f"Generating {platform} persona ({i+1}/{total_platforms})")
# Add delay between API calls to prevent rate limiting
if i > 0: # Skip delay for first platform
update_task_status(task_id, "running", progress, f"Rate limiting: Waiting {RATE_LIMIT_DELAY_SECONDS}s before next API call...")
await asyncio.sleep(RATE_LIMIT_DELAY_SECONDS)
# Generate platform persona
result = await generate_single_platform_persona_async(
core_persona,
platform,
persona_request.onboarding_data
)
if isinstance(result, Exception):
error_msg = str(result)
logger.error(f"Platform {platform} generation failed: {error_msg}")
platform_personas[platform] = {"error": error_msg}
elif "error" in result:
error_msg = result['error']
logger.error(f"Platform {platform} generation failed: {error_msg}")
platform_personas[platform] = result
# Check for rate limit errors and suggest retry
if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower():
logger.warning(f"⚠️ Rate limit detected for {platform}. Consider increasing RATE_LIMIT_DELAY_SECONDS")
else:
platform_personas[platform] = result
logger.info(f"{platform} persona generated successfully")
except Exception as e:
logger.error(f"Platform {platform} generation error: {str(e)}")
platform_personas[platform] = {"error": str(e)}
# Step 3: Assess quality (no additional API calls - uses existing data)
update_task_status(task_id, "running", 90, "Assessing persona quality...")
quality_metrics = await assess_persona_quality_internal(
core_persona,
platform_personas,
persona_request.user_preferences
)
finally:
# Restore environment
for env_var, original_value in original_env.items():
if original_value is None:
os.environ.pop(env_var, None)
else:
os.environ[env_var] = original_value
logger.debug(f"[BG TASK] Restored environment for user {user_id}")
# Log performance metrics
successful_platforms = len([p for p in platform_personas.values() if "error" not in p])
logger.info(f"✅ Persona generation completed: {successful_platforms}/{total_platforms} platforms successful")
logger.info(f"📊 API calls made: 1 (core) + {total_platforms} (platforms) = {1 + total_platforms} total")
logger.info(f"⏱️ Rate limiting: Sequential processing with 2s delays to prevent quota exhaustion")
# Create final result
final_result = {
"success": True,
"core_persona": core_persona,
"platform_personas": platform_personas,
"quality_metrics": quality_metrics
}
# Update task status to completed
update_task_status(task_id, "completed", 100, "Persona generation completed successfully", final_result)
# Populate server-side cache for quick reloads
try:
user_id = _extract_user_id(current_user)
persona_latest_cache[user_id] = {
**final_result,
"selected_platforms": persona_request.selected_platforms,
"timestamp": datetime.now().isoformat()
}
logger.info(f"Latest persona cached for user {user_id}")
except Exception as e:
logger.warning(f"Could not cache latest persona: {e}")
except Exception as e:
logger.error(f"Persona generation task {task_id} failed: {str(e)}")
logger.error(f"Task {task_id}: Exception details: {type(e).__name__}: {str(e)}")
import traceback
logger.error(f"Task {task_id}: Full traceback: {traceback.format_exc()}")
update_task_status(task_id, "failed", 0, f"Persona generation failed: {str(e)}")
def update_task_status(task_id: str, status: str, progress: int, current_step: str, result: Optional[Dict[str, Any]] = None, error: Optional[str] = None):
"""Update task status in memory storage."""
if task_id in persona_tasks:
persona_tasks[task_id].update({
"status": status,
"progress": progress,
"current_step": current_step,
"updated_at": datetime.now().isoformat(),
"result": result,
"error": error
})
# Add progress message
persona_tasks[task_id]["progress_messages"].append({
"timestamp": datetime.now().isoformat(),
"message": current_step,
"progress": progress
})
async def generate_single_platform_persona_async(
core_persona: Dict[str, Any],
platform: str,
onboarding_data: Dict[str, Any]
) -> Dict[str, Any]:
"""
Async wrapper for single platform persona generation.
"""
try:
return await asyncio.get_event_loop().run_in_executor(
None,
core_persona_service._generate_single_platform_persona,
core_persona,
platform,
onboarding_data
)
except Exception as e:
logger.error(f"Error generating {platform} persona: {str(e)}")
return {"error": f"Failed to generate {platform} persona: {str(e)}"}
async def assess_persona_quality_internal(
core_persona: Dict[str, Any],
platform_personas: Dict[str, Any],
user_preferences: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Internal function to assess persona quality using comprehensive metrics.
"""
try:
from services.persona.persona_quality_improver import PersonaQualityImprover
# Initialize quality improver
quality_improver = PersonaQualityImprover()
# Use mock linguistic analysis if not available
linguistic_analysis = {
"analysis_completeness": 0.85,
"style_consistency": 0.88,
"vocabulary_sophistication": 0.82,
"content_coherence": 0.87
}
# Get comprehensive quality metrics
quality_metrics = quality_improver.assess_persona_quality_comprehensive(
core_persona,
platform_personas,
linguistic_analysis,
user_preferences
)
return quality_metrics
except Exception as e:
logger.error(f"Quality assessment internal error: {str(e)}")
# Return fallback quality metrics compatible with PersonaQualityImprover schema
return {
"overall_score": 75,
"core_completeness": 75,
"platform_consistency": 75,
"platform_optimization": 75,
"linguistic_quality": 75,
"recommendations": ["Quality assessment completed with default metrics"],
"weights": {
"core_completeness": 0.30,
"platform_consistency": 0.25,
"platform_optimization": 0.25,
"linguistic_quality": 0.20
},
"error": str(e)
}
async def _log_persona_generation_result(
user_id: str,
core_persona: Dict[str, Any],
platform_personas: Dict[str, Any],
quality_metrics: Dict[str, Any]
):
"""Background task to log persona generation results."""
try:
logger.info(f"Logging persona generation result for user {user_id}")
logger.info(f"Core persona generated with {len(core_persona)} characteristics")
logger.info(f"Platform personas generated for {len(platform_personas)} platforms")
logger.info(f"Quality metrics: {quality_metrics.get('overall_score', 'N/A')}% overall score")
except Exception as e:
logger.error(f"Error logging persona generation result: {str(e)}")

View File

@@ -0,0 +1,395 @@
"""
OPTIMIZED Step 4 Persona Generation Routes
Ultra-efficient persona generation with minimal API calls and maximum parallelization.
"""
import asyncio
from typing import Dict, Any, List, Optional
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
from pydantic import BaseModel
from loguru import logger
from services.persona.core_persona.core_persona_service import CorePersonaService
from services.persona.enhanced_linguistic_analyzer import EnhancedLinguisticAnalyzer
from services.persona.persona_quality_improver import PersonaQualityImprover
from middleware.auth_middleware import get_current_user
from services.llm_providers.gemini_provider import gemini_structured_json_response
router = APIRouter()
# Initialize services
core_persona_service = CorePersonaService()
linguistic_analyzer = EnhancedLinguisticAnalyzer()
quality_improver = PersonaQualityImprover()
class OptimizedPersonaGenerationRequest(BaseModel):
"""Optimized request model for persona generation."""
onboarding_data: Dict[str, Any]
selected_platforms: List[str] = ["linkedin", "blog"]
user_preferences: Optional[Dict[str, Any]] = None
class OptimizedPersonaGenerationResponse(BaseModel):
"""Optimized response model for persona generation."""
success: bool
core_persona: Optional[Dict[str, Any]] = None
platform_personas: Optional[Dict[str, Any]] = None
quality_metrics: Optional[Dict[str, Any]] = None
api_call_count: Optional[int] = None
execution_time_ms: Optional[int] = None
error: Optional[str] = None
@router.post("/step4/generate-personas-optimized", response_model=OptimizedPersonaGenerationResponse)
async def generate_writing_personas_optimized(
request: OptimizedPersonaGenerationRequest,
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
ULTRA-OPTIMIZED persona generation with minimal API calls.
OPTIMIZATION STRATEGY:
1. Single API call generates both core persona AND all platform adaptations
2. Quality assessment uses rule-based analysis (no additional API calls)
3. Parallel execution where possible
Total API calls: 1 (vs previous: 1 + N platforms = N + 1)
Performance improvement: ~70% faster for 3+ platforms
"""
import time
start_time = time.time()
api_call_count = 0
try:
logger.info(f"Starting ULTRA-OPTIMIZED persona generation for user: {current_user.get('user_id', 'unknown')}")
logger.info(f"Selected platforms: {request.selected_platforms}")
# Step 1: Generate core persona + platform adaptations in ONE API call
logger.info("Step 1: Generating core persona + platform adaptations in single API call...")
# Build comprehensive prompt for all personas at once
comprehensive_prompt = build_comprehensive_persona_prompt(
request.onboarding_data,
request.selected_platforms
)
# Single API call for everything
comprehensive_response = await asyncio.get_event_loop().run_in_executor(
None,
gemini_structured_json_response,
comprehensive_prompt,
get_comprehensive_persona_schema(request.selected_platforms),
0.2, # temperature
8192, # max_tokens
"You are an expert AI writing persona developer. Generate comprehensive, platform-optimized writing personas in a single response."
)
api_call_count += 1
if "error" in comprehensive_response:
raise Exception(f"Comprehensive persona generation failed: {comprehensive_response['error']}")
# Extract core persona and platform personas from single response
core_persona = comprehensive_response.get("core_persona", {})
platform_personas = comprehensive_response.get("platform_personas", {})
# Step 2: Parallel quality assessment (no API calls - rule-based)
logger.info("Step 2: Assessing quality using rule-based analysis...")
quality_metrics_task = asyncio.create_task(
assess_persona_quality_rule_based(core_persona, platform_personas)
)
# Step 3: Enhanced linguistic analysis (if spaCy available, otherwise skip)
linguistic_analysis_task = asyncio.create_task(
analyze_linguistic_patterns_async(request.onboarding_data)
)
# Wait for parallel tasks
quality_metrics, linguistic_analysis = await asyncio.gather(
quality_metrics_task,
linguistic_analysis_task,
return_exceptions=True
)
# Enhance quality metrics with linguistic analysis if available
if not isinstance(linguistic_analysis, Exception):
quality_metrics = enhance_quality_metrics(quality_metrics, linguistic_analysis)
execution_time_ms = int((time.time() - start_time) * 1000)
# Log performance metrics
total_platforms = len(request.selected_platforms)
successful_platforms = len([p for p in platform_personas.values() if "error" not in p])
logger.info(f"✅ ULTRA-OPTIMIZED persona generation completed in {execution_time_ms}ms")
logger.info(f"📊 API calls made: {api_call_count} (vs {1 + total_platforms} in previous version)")
logger.info(f"📈 Performance improvement: ~{int((1 + total_platforms - api_call_count) / (1 + total_platforms) * 100)}% fewer API calls")
logger.info(f"🎯 Success rate: {successful_platforms}/{total_platforms} platforms successful")
return OptimizedPersonaGenerationResponse(
success=True,
core_persona=core_persona,
platform_personas=platform_personas,
quality_metrics=quality_metrics,
api_call_count=api_call_count,
execution_time_ms=execution_time_ms
)
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
logger.error(f"Optimized persona generation error: {str(e)}")
return OptimizedPersonaGenerationResponse(
success=False,
api_call_count=api_call_count,
execution_time_ms=execution_time_ms,
error=f"Optimized persona generation failed: {str(e)}"
)
def build_comprehensive_persona_prompt(onboarding_data: Dict[str, Any], platforms: List[str]) -> str:
"""Build a single comprehensive prompt for all persona generation."""
prompt = f"""
Generate a comprehensive AI writing persona system based on the following data:
ONBOARDING DATA:
- Website Analysis: {onboarding_data.get('websiteAnalysis', {})}
- Competitor Research: {onboarding_data.get('competitorResearch', {})}
- Sitemap Analysis: {onboarding_data.get('sitemapAnalysis', {})}
- Business Data: {onboarding_data.get('businessData', {})}
TARGET PLATFORMS: {', '.join(platforms)}
REQUIREMENTS:
1. Generate a CORE PERSONA that captures the user's unique writing style, brand voice, and content characteristics
2. Generate PLATFORM-SPECIFIC ADAPTATIONS for each target platform
3. Ensure consistency across all personas while optimizing for each platform's unique characteristics
4. Include specific recommendations for content structure, tone, and engagement strategies
PLATFORM OPTIMIZATIONS:
- LinkedIn: Professional networking, thought leadership, industry insights
- Facebook: Community building, social engagement, visual storytelling
- Twitter: Micro-blogging, real-time updates, hashtag optimization
- Blog: Long-form content, SEO optimization, storytelling
- Instagram: Visual storytelling, aesthetic focus, engagement
- Medium: Publishing platform, audience building, thought leadership
- Substack: Newsletter content, subscription-based, personal connection
Generate personas that are:
- Highly personalized based on the user's actual content and business
- Platform-optimized for maximum engagement
- Consistent in brand voice across platforms
- Actionable with specific writing guidelines
- Scalable for content production
"""
return prompt
def get_comprehensive_persona_schema(platforms: List[str]) -> Dict[str, Any]:
"""Get comprehensive JSON schema for all personas."""
platform_schemas = {}
for platform in platforms:
platform_schemas[platform] = {
"type": "object",
"properties": {
"platform_optimizations": {"type": "object"},
"content_guidelines": {"type": "object"},
"engagement_strategies": {"type": "object"},
"call_to_action_style": {"type": "string"},
"optimal_content_length": {"type": "string"},
"key_phrases": {"type": "array", "items": {"type": "string"}}
}
}
return {
"type": "object",
"properties": {
"core_persona": {
"type": "object",
"properties": {
"writing_style": {
"type": "object",
"properties": {
"tone": {"type": "string"},
"voice": {"type": "string"},
"personality": {"type": "array", "items": {"type": "string"}},
"sentence_structure": {"type": "string"},
"vocabulary_level": {"type": "string"}
}
},
"content_characteristics": {
"type": "object",
"properties": {
"length_preference": {"type": "string"},
"structure": {"type": "string"},
"engagement_style": {"type": "string"},
"storytelling_approach": {"type": "string"}
}
},
"brand_voice": {
"type": "object",
"properties": {
"description": {"type": "string"},
"keywords": {"type": "array", "items": {"type": "string"}},
"unique_phrases": {"type": "array", "items": {"type": "string"}},
"emotional_triggers": {"type": "array", "items": {"type": "string"}}
}
},
"target_audience": {
"type": "object",
"properties": {
"primary": {"type": "string"},
"demographics": {"type": "string"},
"psychographics": {"type": "string"},
"pain_points": {"type": "array", "items": {"type": "string"}},
"motivations": {"type": "array", "items": {"type": "string"}}
}
}
}
},
"platform_personas": {
"type": "object",
"properties": platform_schemas
}
}
}
async def assess_persona_quality_rule_based(
core_persona: Dict[str, Any],
platform_personas: Dict[str, Any]
) -> Dict[str, Any]:
"""Rule-based quality assessment without API calls."""
try:
# Calculate quality scores based on data completeness and consistency
core_completeness = calculate_completeness_score(core_persona)
platform_consistency = calculate_consistency_score(core_persona, platform_personas)
platform_optimization = calculate_platform_optimization_score(platform_personas)
# Overall score
overall_score = int((core_completeness + platform_consistency + platform_optimization) / 3)
# Generate recommendations
recommendations = generate_quality_recommendations(
core_completeness, platform_consistency, platform_optimization
)
return {
"overall_score": overall_score,
"core_completeness": core_completeness,
"platform_consistency": platform_consistency,
"platform_optimization": platform_optimization,
"recommendations": recommendations,
"assessment_method": "rule_based"
}
except Exception as e:
logger.error(f"Rule-based quality assessment error: {str(e)}")
return {
"overall_score": 75,
"core_completeness": 75,
"platform_consistency": 75,
"platform_optimization": 75,
"recommendations": ["Quality assessment completed with default metrics"],
"error": str(e)
}
def calculate_completeness_score(core_persona: Dict[str, Any]) -> int:
"""Calculate completeness score for core persona."""
required_fields = ['writing_style', 'content_characteristics', 'brand_voice', 'target_audience']
present_fields = sum(1 for field in required_fields if field in core_persona and core_persona[field])
return int((present_fields / len(required_fields)) * 100)
def calculate_consistency_score(core_persona: Dict[str, Any], platform_personas: Dict[str, Any]) -> int:
"""Calculate consistency score across platforms."""
if not platform_personas:
return 50
# Check if brand voice elements are consistent across platforms
core_voice = core_persona.get('brand_voice', {}).get('keywords', [])
consistency_scores = []
for platform, persona in platform_personas.items():
if 'error' not in persona:
platform_voice = persona.get('brand_voice', {}).get('keywords', [])
# Simple consistency check
overlap = len(set(core_voice) & set(platform_voice))
consistency_scores.append(min(overlap * 10, 100))
return int(sum(consistency_scores) / len(consistency_scores)) if consistency_scores else 75
def calculate_platform_optimization_score(platform_personas: Dict[str, Any]) -> int:
"""Calculate platform optimization score."""
if not platform_personas:
return 50
optimization_scores = []
for platform, persona in platform_personas.items():
if 'error' not in persona:
# Check for platform-specific optimizations
has_optimizations = any(key in persona for key in [
'platform_optimizations', 'content_guidelines', 'engagement_strategies'
])
optimization_scores.append(90 if has_optimizations else 60)
return int(sum(optimization_scores) / len(optimization_scores)) if optimization_scores else 75
def generate_quality_recommendations(
core_completeness: int,
platform_consistency: int,
platform_optimization: int
) -> List[str]:
"""Generate quality recommendations based on scores."""
recommendations = []
if core_completeness < 85:
recommendations.append("Enhance core persona completeness with more detailed writing style characteristics")
if platform_consistency < 80:
recommendations.append("Improve brand voice consistency across platform adaptations")
if platform_optimization < 85:
recommendations.append("Strengthen platform-specific optimizations for better engagement")
if not recommendations:
recommendations.append("Your personas show excellent quality across all metrics!")
return recommendations
async def analyze_linguistic_patterns_async(onboarding_data: Dict[str, Any]) -> Dict[str, Any]:
"""Async linguistic analysis if spaCy is available."""
try:
if linguistic_analyzer.spacy_available:
# Extract text samples from onboarding data
text_samples = extract_text_samples(onboarding_data)
if text_samples:
return await asyncio.get_event_loop().run_in_executor(
None,
linguistic_analyzer.analyze_writing_style,
text_samples
)
return {}
except Exception as e:
logger.warning(f"Linguistic analysis skipped: {str(e)}")
return {}
def extract_text_samples(onboarding_data: Dict[str, Any]) -> List[str]:
"""Extract text samples for linguistic analysis."""
text_samples = []
# Extract from website analysis
website_analysis = onboarding_data.get('websiteAnalysis', {})
if isinstance(website_analysis, dict):
for key, value in website_analysis.items():
if isinstance(value, str) and len(value) > 50:
text_samples.append(value)
return text_samples
def enhance_quality_metrics(quality_metrics: Dict[str, Any], linguistic_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Enhance quality metrics with linguistic analysis."""
if linguistic_analysis:
quality_metrics['linguistic_analysis'] = linguistic_analysis
# Adjust scores based on linguistic insights
if 'style_consistency' in linguistic_analysis:
quality_metrics['style_consistency'] = linguistic_analysis['style_consistency']
return quality_metrics

View File

@@ -0,0 +1,506 @@
"""
QUALITY-FIRST Step 4 Persona Generation Routes
Prioritizes persona quality over cost optimization.
Uses multiple specialized API calls for maximum quality and accuracy.
"""
import asyncio
from typing import Dict, Any, List, Optional
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
from pydantic import BaseModel
from loguru import logger
from services.persona.core_persona.core_persona_service import CorePersonaService
from services.persona.enhanced_linguistic_analyzer import EnhancedLinguisticAnalyzer
from services.persona.persona_quality_improver import PersonaQualityImprover
from middleware.auth_middleware import get_current_user
router = APIRouter()
# Initialize services
core_persona_service = CorePersonaService()
linguistic_analyzer = EnhancedLinguisticAnalyzer() # Will fail if spaCy not available
quality_improver = PersonaQualityImprover()
class QualityFirstPersonaRequest(BaseModel):
"""Quality-first request model for persona generation."""
onboarding_data: Dict[str, Any]
selected_platforms: List[str] = ["linkedin", "blog"]
user_preferences: Optional[Dict[str, Any]] = None
quality_threshold: float = 85.0 # Minimum quality score required
class QualityFirstPersonaResponse(BaseModel):
"""Quality-first response model for persona generation."""
success: bool
core_persona: Optional[Dict[str, Any]] = None
platform_personas: Optional[Dict[str, Any]] = None
quality_metrics: Optional[Dict[str, Any]] = None
linguistic_analysis: Optional[Dict[str, Any]] = None
api_call_count: Optional[int] = None
execution_time_ms: Optional[int] = None
quality_validation_passed: Optional[bool] = None
error: Optional[str] = None
@router.post("/step4/generate-personas-quality-first", response_model=QualityFirstPersonaResponse)
async def generate_writing_personas_quality_first(
request: QualityFirstPersonaRequest,
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
QUALITY-FIRST persona generation with multiple specialized API calls for maximum quality.
QUALITY-FIRST APPROACH:
1. Enhanced linguistic analysis (spaCy required)
2. Core persona generation with detailed prompts
3. Individual platform adaptations (specialized for each platform)
4. Comprehensive quality assessment using AI
5. Quality validation and improvement if needed
Total API calls: 1 (core) + N (platforms) + 1 (quality) = N + 2 calls
Quality priority: MAXIMUM (no compromises)
"""
import time
start_time = time.time()
api_call_count = 0
quality_validation_passed = False
try:
logger.info(f"🎯 Starting QUALITY-FIRST persona generation for user: {current_user.get('user_id', 'unknown')}")
logger.info(f"📋 Selected platforms: {request.selected_platforms}")
logger.info(f"🎖️ Quality threshold: {request.quality_threshold}%")
# Step 1: Enhanced linguistic analysis (REQUIRED for quality)
logger.info("Step 1: Enhanced linguistic analysis...")
text_samples = extract_text_samples_for_analysis(request.onboarding_data)
if text_samples:
linguistic_analysis = await asyncio.get_event_loop().run_in_executor(
None,
linguistic_analyzer.analyze_writing_style,
text_samples
)
logger.info("✅ Enhanced linguistic analysis completed")
else:
logger.warning("⚠️ No text samples found for linguistic analysis")
linguistic_analysis = {}
# Step 2: Generate core persona with enhanced analysis
logger.info("Step 2: Generating core persona with enhanced linguistic insights...")
enhanced_onboarding_data = request.onboarding_data.copy()
enhanced_onboarding_data['linguistic_analysis'] = linguistic_analysis
core_persona = await asyncio.get_event_loop().run_in_executor(
None,
core_persona_service.generate_core_persona,
enhanced_onboarding_data
)
api_call_count += 1
if "error" in core_persona:
raise Exception(f"Core persona generation failed: {core_persona['error']}")
logger.info("✅ Core persona generated successfully")
# Step 3: Generate individual platform adaptations (specialized for each platform)
logger.info(f"Step 3: Generating specialized platform adaptations for: {request.selected_platforms}")
platform_tasks = []
for platform in request.selected_platforms:
task = asyncio.create_task(
generate_specialized_platform_persona_async(
core_persona,
platform,
enhanced_onboarding_data,
linguistic_analysis
)
)
platform_tasks.append((platform, task))
# Wait for all platform personas to complete
platform_results = await asyncio.gather(
*[task for _, task in platform_tasks],
return_exceptions=True
)
# Process platform results
platform_personas = {}
for i, (platform, task) in enumerate(platform_tasks):
result = platform_results[i]
if isinstance(result, Exception):
logger.error(f"❌ Platform {platform} generation failed: {str(result)}")
raise Exception(f"Platform {platform} generation failed: {str(result)}")
elif "error" in result:
logger.error(f"❌ Platform {platform} generation failed: {result['error']}")
raise Exception(f"Platform {platform} generation failed: {result['error']}")
else:
platform_personas[platform] = result
api_call_count += 1
logger.info(f"✅ Platform adaptations generated for {len(platform_personas)} platforms")
# Step 4: Comprehensive AI-based quality assessment
logger.info("Step 4: Comprehensive AI-based quality assessment...")
quality_metrics = await assess_persona_quality_ai_based(
core_persona,
platform_personas,
linguistic_analysis,
request.user_preferences
)
api_call_count += 1
# Step 5: Quality validation
logger.info("Step 5: Quality validation...")
overall_quality = quality_metrics.get('overall_score', 0)
if overall_quality >= request.quality_threshold:
quality_validation_passed = True
logger.info(f"✅ Quality validation PASSED: {overall_quality}% >= {request.quality_threshold}%")
else:
logger.warning(f"⚠️ Quality validation FAILED: {overall_quality}% < {request.quality_threshold}%")
# Attempt quality improvement
logger.info("🔄 Attempting quality improvement...")
improved_personas = await attempt_quality_improvement(
core_persona,
platform_personas,
quality_metrics,
request.quality_threshold
)
if improved_personas:
core_persona = improved_personas.get('core_persona', core_persona)
platform_personas = improved_personas.get('platform_personas', platform_personas)
# Re-assess quality after improvement
quality_metrics = await assess_persona_quality_ai_based(
core_persona,
platform_personas,
linguistic_analysis,
request.user_preferences
)
api_call_count += 1
final_quality = quality_metrics.get('overall_score', 0)
if final_quality >= request.quality_threshold:
quality_validation_passed = True
logger.info(f"✅ Quality improvement SUCCESSFUL: {final_quality}% >= {request.quality_threshold}%")
else:
logger.warning(f"⚠️ Quality improvement INSUFFICIENT: {final_quality}% < {request.quality_threshold}%")
else:
logger.error("❌ Quality improvement failed")
execution_time_ms = int((time.time() - start_time) * 1000)
# Log quality-first performance metrics
total_platforms = len(request.selected_platforms)
successful_platforms = len([p for p in platform_personas.values() if "error" not in p])
logger.info(f"🎯 QUALITY-FIRST persona generation completed in {execution_time_ms}ms")
logger.info(f"📊 API calls made: {api_call_count} (quality-focused approach)")
logger.info(f"🎖️ Final quality score: {quality_metrics.get('overall_score', 0)}%")
logger.info(f"✅ Quality validation: {'PASSED' if quality_validation_passed else 'FAILED'}")
logger.info(f"🎯 Success rate: {successful_platforms}/{total_platforms} platforms successful")
return QualityFirstPersonaResponse(
success=True,
core_persona=core_persona,
platform_personas=platform_personas,
quality_metrics=quality_metrics,
linguistic_analysis=linguistic_analysis,
api_call_count=api_call_count,
execution_time_ms=execution_time_ms,
quality_validation_passed=quality_validation_passed
)
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
logger.error(f"❌ Quality-first persona generation error: {str(e)}")
return QualityFirstPersonaResponse(
success=False,
api_call_count=api_call_count,
execution_time_ms=execution_time_ms,
quality_validation_passed=False,
error=f"Quality-first persona generation failed: {str(e)}"
)
async def generate_specialized_platform_persona_async(
core_persona: Dict[str, Any],
platform: str,
onboarding_data: Dict[str, Any],
linguistic_analysis: Dict[str, Any]
) -> Dict[str, Any]:
"""
Generate specialized platform persona with enhanced context.
"""
try:
# Add linguistic analysis to onboarding data for platform-specific generation
enhanced_data = onboarding_data.copy()
enhanced_data['linguistic_analysis'] = linguistic_analysis
return await asyncio.get_event_loop().run_in_executor(
None,
core_persona_service._generate_single_platform_persona,
core_persona,
platform,
enhanced_data
)
except Exception as e:
logger.error(f"Error generating specialized {platform} persona: {str(e)}")
return {"error": f"Failed to generate specialized {platform} persona: {str(e)}"}
async def assess_persona_quality_ai_based(
core_persona: Dict[str, Any],
platform_personas: Dict[str, Any],
linguistic_analysis: Dict[str, Any],
user_preferences: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
AI-based quality assessment using the persona quality improver.
"""
try:
# Use the actual PersonaQualityImprover for AI-based assessment
assessment_result = await asyncio.get_event_loop().run_in_executor(
None,
quality_improver.assess_persona_quality_comprehensive,
core_persona,
platform_personas,
linguistic_analysis,
user_preferences
)
return assessment_result
except Exception as e:
logger.error(f"AI-based quality assessment error: {str(e)}")
# Fallback to enhanced rule-based assessment
return await assess_persona_quality_enhanced_rule_based(
core_persona, platform_personas, linguistic_analysis
)
async def assess_persona_quality_enhanced_rule_based(
core_persona: Dict[str, Any],
platform_personas: Dict[str, Any],
linguistic_analysis: Dict[str, Any]
) -> Dict[str, Any]:
"""
Enhanced rule-based quality assessment with linguistic analysis.
"""
try:
# Calculate quality scores with linguistic insights
core_completeness = calculate_enhanced_completeness_score(core_persona, linguistic_analysis)
platform_consistency = calculate_enhanced_consistency_score(core_persona, platform_personas, linguistic_analysis)
platform_optimization = calculate_enhanced_platform_optimization_score(platform_personas, linguistic_analysis)
linguistic_quality = calculate_linguistic_quality_score(linguistic_analysis)
# Weighted overall score (linguistic quality is important)
overall_score = int((
core_completeness * 0.25 +
platform_consistency * 0.25 +
platform_optimization * 0.25 +
linguistic_quality * 0.25
))
# Generate enhanced recommendations
recommendations = generate_enhanced_quality_recommendations(
core_completeness, platform_consistency, platform_optimization, linguistic_quality, linguistic_analysis
)
return {
"overall_score": overall_score,
"core_completeness": core_completeness,
"platform_consistency": platform_consistency,
"platform_optimization": platform_optimization,
"linguistic_quality": linguistic_quality,
"recommendations": recommendations,
"assessment_method": "enhanced_rule_based",
"linguistic_insights": linguistic_analysis
}
except Exception as e:
logger.error(f"Enhanced rule-based quality assessment error: {str(e)}")
return {
"overall_score": 70,
"core_completeness": 70,
"platform_consistency": 70,
"platform_optimization": 70,
"linguistic_quality": 70,
"recommendations": ["Quality assessment completed with default metrics"],
"error": str(e)
}
def calculate_enhanced_completeness_score(core_persona: Dict[str, Any], linguistic_analysis: Dict[str, Any]) -> int:
"""Calculate enhanced completeness score with linguistic insights."""
required_fields = ['writing_style', 'content_characteristics', 'brand_voice', 'target_audience']
present_fields = sum(1 for field in required_fields if field in core_persona and core_persona[field])
base_score = int((present_fields / len(required_fields)) * 100)
# Boost score if linguistic analysis is available and comprehensive
if linguistic_analysis and linguistic_analysis.get('analysis_completeness', 0) > 0.8:
base_score = min(base_score + 10, 100)
return base_score
def calculate_enhanced_consistency_score(
core_persona: Dict[str, Any],
platform_personas: Dict[str, Any],
linguistic_analysis: Dict[str, Any]
) -> int:
"""Calculate enhanced consistency score with linguistic insights."""
if not platform_personas:
return 50
# Check if brand voice elements are consistent across platforms
core_voice = core_persona.get('brand_voice', {}).get('keywords', [])
consistency_scores = []
for platform, persona in platform_personas.items():
if 'error' not in persona:
platform_voice = persona.get('brand_voice', {}).get('keywords', [])
# Enhanced consistency check with linguistic analysis
overlap = len(set(core_voice) & set(platform_voice))
consistency_score = min(overlap * 10, 100)
# Boost if linguistic analysis shows good style consistency
if linguistic_analysis and linguistic_analysis.get('style_consistency', 0) > 0.8:
consistency_score = min(consistency_score + 5, 100)
consistency_scores.append(consistency_score)
return int(sum(consistency_scores) / len(consistency_scores)) if consistency_scores else 75
def calculate_enhanced_platform_optimization_score(
platform_personas: Dict[str, Any],
linguistic_analysis: Dict[str, Any]
) -> int:
"""Calculate enhanced platform optimization score."""
if not platform_personas:
return 50
optimization_scores = []
for platform, persona in platform_personas.items():
if 'error' not in persona:
# Check for platform-specific optimizations
has_optimizations = any(key in persona for key in [
'platform_optimizations', 'content_guidelines', 'engagement_strategies'
])
base_score = 90 if has_optimizations else 60
# Boost if linguistic analysis shows good adaptation potential
if linguistic_analysis and linguistic_analysis.get('adaptation_potential', 0) > 0.8:
base_score = min(base_score + 10, 100)
optimization_scores.append(base_score)
return int(sum(optimization_scores) / len(optimization_scores)) if optimization_scores else 75
def calculate_linguistic_quality_score(linguistic_analysis: Dict[str, Any]) -> int:
"""Calculate linguistic quality score from enhanced analysis."""
if not linguistic_analysis:
return 50
# Score based on linguistic analysis completeness and quality indicators
completeness = linguistic_analysis.get('analysis_completeness', 0.5)
style_consistency = linguistic_analysis.get('style_consistency', 0.5)
vocabulary_sophistication = linguistic_analysis.get('vocabulary_sophistication', 0.5)
return int((completeness + style_consistency + vocabulary_sophistication) / 3 * 100)
def generate_enhanced_quality_recommendations(
core_completeness: int,
platform_consistency: int,
platform_optimization: int,
linguistic_quality: int,
linguistic_analysis: Dict[str, Any]
) -> List[str]:
"""Generate enhanced quality recommendations with linguistic insights."""
recommendations = []
if core_completeness < 85:
recommendations.append("Enhance core persona completeness with more detailed writing style characteristics")
if platform_consistency < 80:
recommendations.append("Improve brand voice consistency across platform adaptations")
if platform_optimization < 85:
recommendations.append("Strengthen platform-specific optimizations for better engagement")
if linguistic_quality < 80:
recommendations.append("Improve linguistic quality and writing style sophistication")
# Add linguistic-specific recommendations
if linguistic_analysis:
if linguistic_analysis.get('style_consistency', 0) < 0.7:
recommendations.append("Enhance writing style consistency across content samples")
if linguistic_analysis.get('vocabulary_sophistication', 0) < 0.7:
recommendations.append("Increase vocabulary sophistication for better engagement")
if not recommendations:
recommendations.append("Your personas show excellent quality across all metrics!")
return recommendations
async def attempt_quality_improvement(
core_persona: Dict[str, Any],
platform_personas: Dict[str, Any],
quality_metrics: Dict[str, Any],
quality_threshold: float
) -> Optional[Dict[str, Any]]:
"""
Attempt to improve persona quality if it doesn't meet the threshold.
"""
try:
logger.info("🔄 Attempting persona quality improvement...")
# Use PersonaQualityImprover for actual improvement
improvement_result = await asyncio.get_event_loop().run_in_executor(
None,
quality_improver.improve_persona_quality,
core_persona,
platform_personas,
quality_metrics
)
if improvement_result and "error" not in improvement_result:
logger.info("✅ Persona quality improvement successful")
return improvement_result
else:
logger.warning("⚠️ Persona quality improvement failed or no improvement needed")
return None
except Exception as e:
logger.error(f"❌ Error during quality improvement: {str(e)}")
return None
def extract_text_samples_for_analysis(onboarding_data: Dict[str, Any]) -> List[str]:
"""Extract comprehensive text samples for linguistic analysis."""
text_samples = []
# Extract from website analysis
website_analysis = onboarding_data.get('websiteAnalysis', {})
if isinstance(website_analysis, dict):
for key, value in website_analysis.items():
if isinstance(value, str) and len(value) > 50:
text_samples.append(value)
elif isinstance(value, list):
for item in value:
if isinstance(item, str) and len(item) > 50:
text_samples.append(item)
# Extract from competitor research
competitor_research = onboarding_data.get('competitorResearch', {})
if isinstance(competitor_research, dict):
competitors = competitor_research.get('competitors', [])
for competitor in competitors:
if isinstance(competitor, dict):
summary = competitor.get('summary', '')
if isinstance(summary, str) and len(summary) > 50:
text_samples.append(summary)
# Extract from sitemap analysis
sitemap_analysis = onboarding_data.get('sitemapAnalysis', {})
if isinstance(sitemap_analysis, dict):
for key, value in sitemap_analysis.items():
if isinstance(value, str) and len(value) > 50:
text_samples.append(value)
logger.info(f"📝 Extracted {len(text_samples)} text samples for linguistic analysis")
return text_samples

View File

@@ -0,0 +1,277 @@
"""
Step Management Service
Handles onboarding step operations and progress tracking.
"""
from typing import Dict, Any, List, Optional
from fastapi import HTTPException
from loguru import logger
from services.onboarding.progress_service import get_onboarding_progress_service
from services.onboarding.database_service import OnboardingDatabaseService
from services.database import get_db
class StepManagementService:
"""Service for handling onboarding step management."""
def __init__(self):
pass
async def get_onboarding_status(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
"""Get the current onboarding status (per user)."""
try:
user_id = str(current_user.get('id'))
status = get_onboarding_progress_service().get_onboarding_status(user_id)
return {
"is_completed": status["is_completed"],
"current_step": status["current_step"],
"completion_percentage": status["completion_percentage"],
"next_step": 6 if status["is_completed"] else max(1, status["current_step"]),
"started_at": status["started_at"],
"completed_at": status["completed_at"],
"can_proceed_to_final": True if status["is_completed"] else status["current_step"] >= 5,
}
except Exception as e:
logger.error(f"Error getting onboarding status: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_onboarding_progress_full(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
"""Get the full onboarding progress data."""
try:
user_id = str(current_user.get('id'))
progress_service = get_onboarding_progress_service()
status = progress_service.get_onboarding_status(user_id)
data = progress_service.get_completion_data(user_id)
def completed(b: bool) -> str:
return 'completed' if b else 'pending'
api_keys = data.get('api_keys') or {}
website = data.get('website_analysis') or {}
research = data.get('research_preferences') or {}
persona = data.get('persona_data') or {}
steps = [
{
"step_number": 1,
"title": "API Keys",
"description": "Connect your AI services",
"status": completed(any(v for v in api_keys.values() if v)),
"completed_at": None,
"data": None,
"validation_errors": []
},
{
"step_number": 2,
"title": "Website",
"description": "Set up your website",
"status": completed(bool(website.get('website_url') or website.get('writing_style'))),
"completed_at": None,
"data": website or None,
"validation_errors": []
},
{
"step_number": 3,
"title": "Research",
"description": "Discover competitors",
"status": completed(bool(research.get('research_depth') or research.get('content_types'))),
"completed_at": None,
"data": research or None,
"validation_errors": []
},
{
"step_number": 4,
"title": "Personalization",
"description": "Customize your experience",
"status": completed(bool(persona.get('corePersona') or persona.get('platformPersonas'))),
"completed_at": None,
"data": persona or None,
"validation_errors": []
},
{
"step_number": 5,
"title": "Integrations",
"description": "Connect additional services",
"status": completed(status['current_step'] >= 5),
"completed_at": None,
"data": None,
"validation_errors": []
},
{
"step_number": 6,
"title": "Finish",
"description": "Complete setup",
"status": completed(status['is_completed']),
"completed_at": status['completed_at'],
"data": None,
"validation_errors": []
}
]
return {
"steps": steps,
"current_step": 6 if status['is_completed'] else status['current_step'],
"started_at": status['started_at'],
"last_updated": status['last_updated'],
"is_completed": status['is_completed'],
"completed_at": status['completed_at'],
"completion_percentage": status['completion_percentage']
}
except Exception as e:
logger.error(f"Error getting onboarding progress: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def get_step_data(self, step_number: int, current_user: Dict[str, Any]) -> Dict[str, Any]:
"""Get data for a specific step."""
try:
user_id = str(current_user.get('id'))
db = next(get_db())
db_service = OnboardingDatabaseService()
if step_number == 2:
website = db_service.get_website_analysis(user_id, db) or {}
return {
"step_number": 2,
"title": "Website",
"description": "Set up your website",
"status": 'completed' if (website.get('website_url') or website.get('writing_style')) else 'pending',
"completed_at": None,
"data": website,
"validation_errors": []
}
if step_number == 3:
research = db_service.get_research_preferences(user_id, db) or {}
return {
"step_number": 3,
"title": "Research",
"description": "Discover competitors",
"status": 'completed' if (research.get('research_depth') or research.get('content_types')) else 'pending',
"completed_at": None,
"data": research,
"validation_errors": []
}
if step_number == 4:
persona = db_service.get_persona_data(user_id, db) or {}
return {
"step_number": 4,
"title": "Personalization",
"description": "Customize your experience",
"status": 'completed' if (persona.get('corePersona') or persona.get('platformPersonas')) else 'pending',
"completed_at": None,
"data": persona,
"validation_errors": []
}
status = get_onboarding_progress_service().get_onboarding_status(user_id)
mapping = {
1: ('API Keys', 'Connect your AI services', status['current_step'] >= 1),
5: ('Integrations', 'Connect additional services', status['current_step'] >= 5),
6: ('Finish', 'Complete setup', status['is_completed'])
}
title, description, done = mapping.get(step_number, (f'Step {step_number}', 'Onboarding step', False))
return {
"step_number": step_number,
"title": title,
"description": description,
"status": 'completed' if done else 'pending',
"completed_at": status['completed_at'] if step_number == 6 and done else None,
"data": None,
"validation_errors": []
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting step data: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def complete_step(self, step_number: int, request_data: Dict[str, Any], current_user: Dict[str, Any]) -> Dict[str, Any]:
"""Mark a step as completed."""
try:
logger.info(f"[complete_step] Completing step {step_number}")
user_id = str(current_user.get('id'))
# Optional validation
try:
from services.validation import validate_step_data
logger.info(f"[complete_step] Validating step {step_number} with data: {request_data}")
validation_errors = validate_step_data(step_number, request_data)
if validation_errors:
logger.warning(f"[complete_step] Step {step_number} validation failed: {validation_errors}")
raise HTTPException(status_code=400, detail=f"Step validation failed: {'; '.join(validation_errors)}")
except ImportError:
pass
db = next(get_db())
db_service = OnboardingDatabaseService()
# Step-specific side effects: save API keys to DB
if step_number == 1 and request_data and 'api_keys' in request_data:
api_keys = request_data['api_keys'] or {}
for provider, key in api_keys.items():
if key:
db_service.save_api_key(user_id, provider, key, db)
# Persist current step and progress in DB
db_service.update_step(user_id, step_number, db)
try:
progress_pct = min(100.0, round((step_number / 6) * 100))
db_service.update_progress(user_id, float(progress_pct), db)
except Exception:
pass
logger.info(f"[complete_step] Step {step_number} persisted to DB for user {user_id}")
return {
"message": "Step completed successfully",
"step_number": step_number,
"data": request_data or {}
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error completing step: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def skip_step(self, step_number: int, current_user: Dict[str, Any]) -> Dict[str, Any]:
"""Skip a step (for optional steps)."""
try:
user_id = str(current_user.get('id'))
progress = get_onboarding_progress_for_user(user_id)
step = progress.get_step_data(step_number)
if not step:
raise HTTPException(status_code=404, detail=f"Step {step_number} not found")
# Mark step as skipped
progress.mark_step_skipped(step_number)
return {
"message": f"Step {step_number} skipped successfully",
"step_number": step_number
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error skipping step: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def validate_step_access(self, step_number: int, current_user: Dict[str, Any]) -> Dict[str, Any]:
"""Validate if user can access a specific step."""
try:
user_id = str(current_user.get('id'))
progress = get_onboarding_progress_for_user(user_id)
if not progress.can_proceed_to_step(step_number):
return {
"can_proceed": False,
"validation_errors": [f"Cannot proceed to step {step_number}. Complete previous steps first."],
"step_status": "locked"
}
return {
"can_proceed": True,
"validation_errors": [],
"step_status": "available"
}
except Exception as e:
logger.error(f"Error validating step access: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")