Added documentation for the auto-population feature and the analytics integration.

This commit is contained in:
ajaysi
2026-01-17 11:01:10 +05:30
parent 8193cdba67
commit 1db10ccd0f
61 changed files with 6773 additions and 579 deletions

View File

@@ -1,706 +0,0 @@
# ALwrity Onboarding System - API Reference
## Overview
This document provides a comprehensive API reference for the ALwrity Onboarding System. All endpoints require authentication and return JSON responses.
## 🔐 Authentication
All endpoints require a valid Clerk JWT token in the Authorization header:
```
Authorization: Bearer <clerk_jwt_token>
```
## 📋 Core Endpoints
### Onboarding Status
#### GET `/api/onboarding/status`
Get the current onboarding status for the authenticated user.
**Response:**
```json
{
"is_completed": false,
"current_step": 2,
"completion_percentage": 33.33,
"next_step": 3,
"started_at": "2024-01-15T10:30:00Z",
"completed_at": null,
"can_proceed_to_final": false
}
```
#### GET `/api/onboarding/progress`
Get the full onboarding progress data.
**Response:**
```json
{
"steps": [
{
"step_number": 1,
"title": "AI LLM Providers Setup",
"description": "Configure your AI services",
"status": "completed",
"completed_at": "2024-01-15T10:35:00Z",
"data": {...},
"validation_errors": []
}
],
"current_step": 2,
"started_at": "2024-01-15T10:30:00Z",
"last_updated": "2024-01-15T10:35:00Z",
"is_completed": false,
"completed_at": null
}
```
### Step Management
#### GET `/api/onboarding/step/{step_number}`
Get data for a specific step.
**Parameters:**
- `step_number` (int): The step number (1-6)
**Response:**
```json
{
"step_number": 1,
"title": "AI LLM Providers Setup",
"description": "Configure your AI services",
"status": "in_progress",
"completed_at": null,
"data": {...},
"validation_errors": []
}
```
#### POST `/api/onboarding/step/{step_number}/complete`
Mark a step as completed.
**Parameters:**
- `step_number` (int): The step number (1-6)
**Request Body:**
```json
{
"data": {
"api_keys": {
"gemini": "your_gemini_key",
"exa": "your_exa_key",
"copilotkit": "your_copilotkit_key"
}
},
"validation_errors": []
}
```
**Response:**
```json
{
"message": "Step 1 completed successfully",
"step_number": 1,
"data": {...}
}
```
#### POST `/api/onboarding/step/{step_number}/skip`
Skip a step (for optional steps).
**Parameters:**
- `step_number` (int): The step number (1-6)
**Response:**
```json
{
"message": "Step 2 skipped successfully",
"step_number": 2
}
```
#### GET `/api/onboarding/step/{step_number}/validate`
Validate if user can access a specific step.
**Parameters:**
- `step_number` (int): The step number (1-6)
**Response:**
```json
{
"can_proceed": true,
"validation_errors": [],
"step_status": "available"
}
```
### Onboarding Control
#### POST `/api/onboarding/start`
Start a new onboarding session.
**Response:**
```json
{
"message": "Onboarding started successfully",
"current_step": 1,
"started_at": "2024-01-15T10:30:00Z"
}
```
#### POST `/api/onboarding/reset`
Reset the onboarding progress.
**Response:**
```json
{
"message": "Onboarding progress reset successfully",
"current_step": 1,
"started_at": "2024-01-15T10:30:00Z"
}
```
#### GET `/api/onboarding/resume`
Get information for resuming onboarding.
**Response:**
```json
{
"can_resume": true,
"resume_step": 2,
"current_step": 2,
"completion_percentage": 33.33,
"started_at": "2024-01-15T10:30:00Z",
"last_updated": "2024-01-15T10:35:00Z"
}
```
#### POST `/api/onboarding/complete`
Complete the onboarding process.
**Response:**
```json
{
"message": "Onboarding completed successfully",
"completion_data": {...},
"persona_generated": true,
"environment_setup": true
}
```
## 🔑 API Key Management
### GET `/api/onboarding/api-keys`
Get all configured API keys (masked for security).
**Response:**
```json
{
"api_keys": {
"gemini": "********************abcd",
"exa": "********************efgh",
"copilotkit": "********************ijkl"
},
"total_providers": 3,
"configured_providers": ["gemini", "exa", "copilotkit"]
}
```
### POST `/api/onboarding/api-keys`
Save an API key for a provider.
**Request Body:**
```json
{
"provider": "gemini",
"api_key": "your_api_key_here",
"description": "Gemini API key for content generation"
}
```
**Response:**
```json
{
"message": "API key for gemini saved successfully",
"provider": "gemini",
"status": "saved"
}
```
### GET `/api/onboarding/api-keys/validate`
Validate all configured API keys.
**Response:**
```json
{
"validation_results": {
"gemini": {
"valid": true,
"status": "active",
"quota_remaining": 1000
},
"exa": {
"valid": true,
"status": "active",
"quota_remaining": 500
}
},
"all_valid": true,
"total_providers": 2
}
```
## ⚙️ Configuration
### GET `/api/onboarding/config`
Get onboarding configuration and requirements.
**Response:**
```json
{
"total_steps": 6,
"required_steps": [1, 2, 3, 4, 6],
"optional_steps": [5],
"step_requirements": {
"1": ["gemini", "exa", "copilotkit"],
"2": ["website_url"],
"3": ["research_preferences"],
"4": ["personalization_settings"],
"5": ["integrations"],
"6": ["persona_generation"]
}
}
```
### GET `/api/onboarding/providers`
Get setup information for all providers.
**Response:**
```json
{
"providers": {
"gemini": {
"name": "Gemini AI",
"description": "Advanced content generation",
"setup_url": "https://ai.google.dev/",
"required": true,
"validation_endpoint": "https://generativelanguage.googleapis.com/v1beta/models"
},
"exa": {
"name": "Exa AI",
"description": "Intelligent web research",
"setup_url": "https://exa.ai/",
"required": true,
"validation_endpoint": "https://api.exa.ai/v1/search"
}
}
}
```
### GET `/api/onboarding/providers/{provider}`
Get setup information for a specific provider.
**Parameters:**
- `provider` (string): Provider name (gemini, exa, copilotkit)
**Response:**
```json
{
"name": "Gemini AI",
"description": "Advanced content generation",
"setup_url": "https://ai.google.dev/",
"required": true,
"validation_endpoint": "https://generativelanguage.googleapis.com/v1beta/models",
"setup_instructions": [
"Visit Google AI Studio",
"Create a new API key",
"Copy the API key",
"Paste it in the form above"
]
}
```
### POST `/api/onboarding/providers/{provider}/validate`
Validate a specific provider's API key.
**Parameters:**
- `provider` (string): Provider name (gemini, exa, copilotkit)
**Request Body:**
```json
{
"api_key": "your_api_key_here"
}
```
**Response:**
```json
{
"valid": true,
"status": "active",
"quota_remaining": 1000,
"provider": "gemini"
}
```
## 📊 Summary & Analytics
### GET `/api/onboarding/summary`
Get comprehensive onboarding summary for the final step.
**Response:**
```json
{
"user_info": {
"user_id": "user_123",
"onboarding_started": "2024-01-15T10:30:00Z",
"current_step": 6
},
"api_keys": {
"gemini": "configured",
"exa": "configured",
"copilotkit": "configured"
},
"website_analysis": {
"url": "https://example.com",
"status": "completed",
"style_analysis": "professional",
"content_count": 25
},
"research_preferences": {
"depth": "comprehensive",
"auto_research": true,
"fact_checking": true
},
"personalization": {
"brand_voice": "professional",
"target_audience": "B2B professionals",
"content_types": ["blog_posts", "social_media"]
}
}
```
### GET `/api/onboarding/website-analysis`
Get website analysis data.
**Response:**
```json
{
"url": "https://example.com",
"analysis_status": "completed",
"content_analyzed": 25,
"style_characteristics": {
"tone": "professional",
"voice": "authoritative",
"complexity": "intermediate"
},
"target_audience": "B2B professionals",
"content_themes": ["technology", "business", "innovation"]
}
```
### GET `/api/onboarding/research-preferences`
Get research preferences data.
**Response:**
```json
{
"research_depth": "comprehensive",
"auto_research_enabled": true,
"fact_checking_enabled": true,
"content_types": ["blog_posts", "articles", "social_media"],
"research_sources": ["web", "academic", "news"]
}
```
## 👤 Business Information
### POST `/api/onboarding/business-info`
Save business information for users without websites.
**Request Body:**
```json
{
"business_name": "Acme Corp",
"industry": "Technology",
"description": "AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness"]
}
```
**Response:**
```json
{
"id": 1,
"business_name": "Acme Corp",
"industry": "Technology",
"description": "AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness"],
"created_at": "2024-01-15T10:30:00Z"
}
```
### GET `/api/onboarding/business-info/{id}`
Get business information by ID.
**Parameters:**
- `id` (int): Business information ID
**Response:**
```json
{
"id": 1,
"business_name": "Acme Corp",
"industry": "Technology",
"description": "AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness"],
"created_at": "2024-01-15T10:30:00Z",
"updated_at": "2024-01-15T10:30:00Z"
}
```
### GET `/api/onboarding/business-info/user/{user_id}`
Get business information by user ID.
**Parameters:**
- `user_id` (int): User ID
**Response:**
```json
{
"id": 1,
"business_name": "Acme Corp",
"industry": "Technology",
"description": "AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness"],
"created_at": "2024-01-15T10:30:00Z",
"updated_at": "2024-01-15T10:30:00Z"
}
```
### PUT `/api/onboarding/business-info/{id}`
Update business information.
**Parameters:**
- `id` (int): Business information ID
**Request Body:**
```json
{
"business_name": "Acme Corp Updated",
"industry": "Technology",
"description": "Updated AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness", "thought_leadership"]
}
```
**Response:**
```json
{
"id": 1,
"business_name": "Acme Corp Updated",
"industry": "Technology",
"description": "Updated AI-powered solutions",
"target_audience": "B2B professionals",
"brand_voice": "professional",
"content_goals": ["lead_generation", "brand_awareness", "thought_leadership"],
"created_at": "2024-01-15T10:30:00Z",
"updated_at": "2024-01-15T11:00:00Z"
}
```
## 🎭 Persona Management
### GET `/api/onboarding/persona/readiness/{user_id}`
Check if user has sufficient data for persona generation.
**Parameters:**
- `user_id` (int): User ID
**Response:**
```json
{
"ready": true,
"missing_data": [],
"completion_percentage": 100,
"recommendations": []
}
```
### GET `/api/onboarding/persona/preview/{user_id}`
Generate a preview of the writing persona without saving.
**Parameters:**
- `user_id` (int): User ID
**Response:**
```json
{
"persona_preview": {
"name": "Professional Content Creator",
"voice": "authoritative",
"tone": "professional",
"style_characteristics": {
"formality": "high",
"complexity": "intermediate",
"engagement": "informative"
},
"content_preferences": {
"length": "medium",
"format": "structured",
"research_depth": "comprehensive"
}
},
"generation_time": "2.5s",
"confidence_score": 0.95
}
```
### POST `/api/onboarding/persona/generate/{user_id}`
Generate and save a writing persona from onboarding data.
**Parameters:**
- `user_id` (int): User ID
**Response:**
```json
{
"persona_id": 1,
"name": "Professional Content Creator",
"voice": "authoritative",
"tone": "professional",
"style_characteristics": {...},
"content_preferences": {...},
"created_at": "2024-01-15T10:30:00Z",
"status": "active"
}
```
### GET `/api/onboarding/persona/user/{user_id}`
Get all writing personas for the user.
**Parameters:**
- `user_id` (int): User ID
**Response:**
```json
{
"personas": [
{
"id": 1,
"name": "Professional Content Creator",
"voice": "authoritative",
"tone": "professional",
"status": "active",
"created_at": "2024-01-15T10:30:00Z"
}
],
"total_count": 1,
"active_persona": 1
}
```
## 🚨 Error Responses
### 400 Bad Request
```json
{
"detail": "Invalid request data",
"error_code": "INVALID_REQUEST",
"validation_errors": [
"Field 'api_key' is required",
"Field 'provider' must be one of: gemini, exa, copilotkit"
]
}
```
### 401 Unauthorized
```json
{
"detail": "Authentication required",
"error_code": "UNAUTHORIZED"
}
```
### 404 Not Found
```json
{
"detail": "Step 7 not found",
"error_code": "STEP_NOT_FOUND"
}
```
### 500 Internal Server Error
```json
{
"detail": "Internal server error",
"error_code": "INTERNAL_ERROR"
}
```
## 📝 Request/Response Models
### StepCompletionRequest
```json
{
"data": {
"api_keys": {
"gemini": "string",
"exa": "string",
"copilotkit": "string"
}
},
"validation_errors": ["string"]
}
```
### APIKeyRequest
```json
{
"provider": "string",
"api_key": "string",
"description": "string"
}
```
### BusinessInfoRequest
```json
{
"business_name": "string",
"industry": "string",
"description": "string",
"target_audience": "string",
"brand_voice": "string",
"content_goals": ["string"]
}
```
## 🔄 Rate Limiting
- **Standard endpoints**: 100 requests per minute
- **API key validation**: 10 requests per minute
- **Persona generation**: 5 requests per minute
## 📊 Response Times
- **Status checks**: < 100ms
- **Step completion**: < 500ms
- **API key validation**: < 2s
- **Persona generation**: < 10s
- **Website analysis**: < 30s
---
*This API reference provides comprehensive documentation for all onboarding endpoints. For additional support, please refer to the main project documentation or contact the development team.*

View File

@@ -1,330 +0,0 @@
# ALwrity Onboarding System - Developer Guide
## Architecture Overview
The ALwrity Onboarding System is built with a modular, service-based architecture that separates concerns and promotes maintainability. The system is designed to handle user isolation, progressive setup, and comprehensive onboarding workflows.
## 🏗️ System Architecture
### Core Components
```
backend/api/onboarding_utils/
├── __init__.py # Package initialization
├── onboarding_completion_service.py # Final onboarding completion logic
├── onboarding_summary_service.py # Comprehensive summary generation
├── onboarding_config_service.py # Configuration and provider management
├── business_info_service.py # Business information CRUD operations
├── api_key_management_service.py # API key operations and validation
├── step_management_service.py # Step progression and validation
├── onboarding_control_service.py # Onboarding session management
├── persona_management_service.py # Persona generation and management
├── README.md # End-user documentation
└── DEVELOPER_GUIDE.md # This file
```
### Service Responsibilities
#### 1. OnboardingCompletionService
**Purpose**: Handles the complex logic for completing the onboarding process
**Key Methods**:
- `complete_onboarding()` - Main completion logic with validation
- `_validate_required_steps()` - Ensures all required steps are completed
- `_validate_api_keys()` - Validates API key configuration
- `_generate_persona_from_onboarding()` - Generates writing persona
#### 2. OnboardingSummaryService
**Purpose**: Generates comprehensive onboarding summaries for the final step
**Key Methods**:
- `get_onboarding_summary()` - Main summary generation
- `_get_api_keys()` - Retrieves configured API keys
- `_get_website_analysis()` - Gets website analysis data
- `_get_research_preferences()` - Retrieves research preferences
- `_check_persona_readiness()` - Validates persona generation readiness
#### 3. OnboardingConfigService
**Purpose**: Manages onboarding configuration and provider setup information
**Key Methods**:
- `get_onboarding_config()` - Returns complete onboarding configuration
- `get_provider_setup_info()` - Provider-specific setup information
- `get_all_providers_info()` - All available providers
- `validate_provider_key()` - API key validation
- `get_enhanced_validation_status()` - Comprehensive validation status
#### 4. BusinessInfoService
**Purpose**: Handles business information management for users without websites
**Key Methods**:
- `save_business_info()` - Create new business information
- `get_business_info()` - Retrieve by ID
- `get_business_info_by_user()` - Retrieve by user ID
- `update_business_info()` - Update existing information
#### 5. APIKeyManagementService
**Purpose**: Manages API key operations with caching and security
**Key Methods**:
- `get_api_keys()` - Retrieves masked API keys with caching
- `save_api_key()` - Saves new API keys securely
- `validate_api_keys()` - Validates all configured keys
#### 6. StepManagementService
**Purpose**: Controls step progression and validation
**Key Methods**:
- `get_onboarding_status()` - Current onboarding status
- `get_onboarding_progress_full()` - Complete progress data
- `get_step_data()` - Specific step information
- `complete_step()` - Mark step as completed with environment setup
- `skip_step()` - Skip optional steps
- `validate_step_access()` - Validate step accessibility
#### 7. OnboardingControlService
**Purpose**: Manages onboarding session control
**Key Methods**:
- `start_onboarding()` - Initialize new onboarding session
- `reset_onboarding()` - Reset onboarding progress
- `get_resume_info()` - Resume information for incomplete sessions
#### 8. PersonaManagementService
**Purpose**: Handles persona generation and management
**Key Methods**:
- `check_persona_generation_readiness()` - Validate persona readiness
- `generate_persona_preview()` - Generate preview without saving
- `generate_writing_persona()` - Generate and save persona
- `get_user_writing_personas()` - Retrieve user personas
## 🔧 Integration Points
### Progressive Setup Integration
The onboarding system integrates with the progressive setup service:
```python
# In step_management_service.py
from services.progressive_setup_service import ProgressiveSetupService
# Initialize/upgrade user environment based on new step
if step_number == 1:
setup_service.initialize_user_environment(user_id)
else:
setup_service.upgrade_user_environment(user_id, step_number)
```
### User Isolation
Each user gets their own:
- **Workspace**: `lib/workspace/users/user_<id>/`
- **Database Tables**: `user_<id>_*` tables
- **Configuration**: User-specific settings
- **Progress**: Individual onboarding progress
### Authentication Integration
All services require authentication:
```python
from middleware.auth_middleware import get_current_user
async def endpoint_function(current_user: Dict[str, Any] = Depends(get_current_user)):
user_id = str(current_user.get('id'))
# Service logic here
```
## 📊 Data Flow
### 1. Onboarding Initialization
```
User Login → Authentication → Check Onboarding Status → Redirect to Appropriate Step
```
### 2. Step Completion
```
User Completes Step → Validate Step → Save Progress → Setup User Environment → Return Success
```
### 3. Environment Setup
```
Step Completed → Progressive Setup Service → User Workspace Creation → Feature Activation
```
### 4. Final Completion
```
All Steps Complete → Validation → Persona Generation → Environment Finalization → Onboarding Complete
```
## 🛠️ Development Guidelines
### Adding New Services
1. **Create Service Class**:
```python
class NewService:
def __init__(self):
# Initialize dependencies
async def main_method(self, params):
# Main functionality
pass
```
2. **Update __init__.py**:
```python
from .new_service import NewService
__all__ = [
# ... existing services
'NewService'
]
```
3. **Update Main Onboarding File**:
```python
async def new_endpoint():
try:
from onboarding_utils.new_service import NewService
service = NewService()
return await service.main_method()
except Exception as e:
logger.error(f"Error: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
```
### Error Handling Pattern
All services follow a consistent error handling pattern:
```python
try:
# Service logic
return result
except HTTPException:
raise # Re-raise HTTP exceptions
except Exception as e:
logger.error(f"Error in service: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
```
### Logging Guidelines
Use structured logging with context:
```python
logger.info(f"[service_name] Action for user {user_id}")
logger.success(f"✅ Operation completed for user {user_id}")
logger.warning(f"⚠️ Non-critical issue: {issue}")
logger.error(f"❌ Error in operation: {str(e)}")
```
## 🧪 Testing
### Unit Testing
Each service should have comprehensive unit tests:
```python
import pytest
from onboarding_utils.step_management_service import StepManagementService
class TestStepManagementService:
def setup_method(self):
self.service = StepManagementService()
async def test_get_onboarding_status(self):
# Test implementation
pass
```
### Integration Testing
Test service interactions:
```python
async def test_complete_onboarding_flow():
# Test complete onboarding workflow
pass
```
## 🔒 Security Considerations
### API Key Security
- Keys are masked in responses
- Encryption before storage
- Secure transmission only
### User Data Isolation
- User-specific workspaces
- Isolated database tables
- No cross-user data access
### Input Validation
- Validate all user inputs
- Sanitize data before processing
- Use Pydantic models for validation
## 📈 Performance Optimization
### Caching Strategy
- API key responses cached for 30 seconds
- User progress cached in memory
- Database queries optimized
### Database Optimization
- User-specific table indexing
- Efficient query patterns
- Connection pooling
### Resource Management
- Proper database session handling
- Memory-efficient data processing
- Background task optimization
## 🚀 Deployment Considerations
### Environment Variables
```bash
# Required for onboarding
CLERK_PUBLISHABLE_KEY=your_key
CLERK_SECRET_KEY=your_secret
GEMINI_API_KEY=your_gemini_key
EXA_API_KEY=your_exa_key
COPILOTKIT_API_KEY=your_copilotkit_key
```
### Database Setup
- User-specific tables created on demand
- Progressive table creation based on onboarding progress
- Automatic cleanup on user deletion
### Monitoring
- Track onboarding completion rates
- Monitor step abandonment points
- Performance metrics for each service
## 🔄 Maintenance
### Regular Tasks
- Review and update API key validation
- Monitor service performance
- Update documentation
- Clean up abandoned onboarding sessions
### Version Updates
- Maintain backward compatibility
- Gradual feature rollouts
- User migration strategies
## 📚 Additional Resources
### Related Documentation
- [User Environment Setup](../services/user_workspace_manager.py)
- [Progressive Setup Service](../services/progressive_setup_service.py)
- [Authentication Middleware](../middleware/auth_middleware.py)
### External Dependencies
- FastAPI for API framework
- SQLAlchemy for database operations
- Pydantic for data validation
- Loguru for logging
---
*This developer guide provides comprehensive information for maintaining and extending the ALwrity Onboarding System. For questions or contributions, please refer to the main project documentation.*

View File

@@ -1,184 +0,0 @@
# 🚀 Persona Generation Optimization Summary
## 📊 **Issues Identified & Fixed**
### **1. spaCy Dependency Issue**
**Problem**: `ModuleNotFoundError: No module named 'spacy'`
**Solution**: Made spaCy an optional dependency with graceful fallback
- ✅ spaCy is now optional - system works with NLTK only
- ✅ Graceful degradation when spaCy is not available
- ✅ Enhanced linguistic analysis when spaCy is present
### **2. API Call Optimization**
**Problem**: Too many sequential API calls
**Previous**: 1 (core) + N (platforms) + 1 (quality) = N + 2 API calls
**Optimized**: 1 (comprehensive) = 1 API call total
### **3. Parallel Execution**
**Problem**: Sequential platform persona generation
**Solution**: Parallel execution for all platform adaptations
## 🎯 **Optimization Strategies**
### **Strategy 1: Single Comprehensive API Call**
```python
# OLD APPROACH (N + 2 API calls)
core_persona = generate_core_persona() # 1 API call
for platform in platforms:
platform_persona = generate_platform_persona() # N API calls
quality_metrics = assess_quality() # 1 API call
# NEW APPROACH (1 API call)
comprehensive_response = generate_all_personas() # 1 API call
```
### **Strategy 2: Rule-Based Quality Assessment**
```python
# OLD: API-based quality assessment
quality_metrics = await llm_assess_quality() # 1 API call
# NEW: Rule-based assessment
quality_metrics = assess_persona_quality_rule_based() # 0 API calls
```
### **Strategy 3: Parallel Execution**
```python
# OLD: Sequential execution
for platform in platforms:
await generate_platform_persona(platform)
# NEW: Parallel execution
tasks = [generate_platform_persona_async(platform) for platform in platforms]
results = await asyncio.gather(*tasks)
```
## 📈 **Performance Improvements**
| Metric | Before | After | Improvement |
|--------|--------|-------|-------------|
| **API Calls** | N + 2 | 1 | ~70% reduction |
| **Execution Time** | Sequential | Parallel | ~60% faster |
| **Dependencies** | Required spaCy | Optional spaCy | More reliable |
| **Quality Assessment** | LLM-based | Rule-based | 100% faster |
### **Real-World Examples:**
- **3 Platforms**: 5 API calls → 1 API call (80% reduction)
- **5 Platforms**: 7 API calls → 1 API call (85% reduction)
- **Execution Time**: ~15 seconds → ~5 seconds (67% faster)
## 🔧 **Technical Implementation**
### **1. spaCy Dependency Fix**
```python
class EnhancedLinguisticAnalyzer:
def __init__(self):
self.spacy_available = False
try:
import spacy
self.nlp = spacy.load("en_core_web_sm")
self.spacy_available = True
except (ImportError, OSError) as e:
logger.warning(f"spaCy not available: {e}. Using NLTK-only analysis.")
self.spacy_available = False
```
### **2. Comprehensive Prompt Strategy**
```python
def build_comprehensive_persona_prompt(onboarding_data, platforms):
return f"""
Generate a comprehensive AI writing persona system:
1. CORE PERSONA: {onboarding_data}
2. PLATFORM ADAPTATIONS: {platforms}
3. Single response with all personas
"""
```
### **3. Rule-Based Quality Assessment**
```python
def assess_persona_quality_rule_based(core_persona, platform_personas):
core_completeness = calculate_completeness_score(core_persona)
platform_consistency = calculate_consistency_score(core_persona, platform_personas)
platform_optimization = calculate_platform_optimization_score(platform_personas)
return {
"overall_score": (core_completeness + platform_consistency + platform_optimization) / 3,
"recommendations": generate_recommendations(...)
}
```
## 🎯 **API Call Analysis**
### **Previous Implementation:**
```
Step 1: Core Persona Generation → 1 API call
Step 2: Platform Adaptations → N API calls (sequential)
Step 3: Quality Assessment → 1 API call
Total: 1 + N + 1 = N + 2 API calls
```
### **Optimized Implementation:**
```
Step 1: Comprehensive Generation → 1 API call (core + all platforms)
Step 2: Rule-Based Quality Assessment → 0 API calls
Total: 1 API call
```
### **Parallel Execution (Alternative):**
```
Step 1: Core Persona Generation → 1 API call
Step 2: Platform Adaptations → N API calls (parallel)
Step 3: Rule-Based Quality Assessment → 0 API calls
Total: 1 + N API calls (but parallel execution)
```
## 🚀 **Benefits**
### **1. Performance**
- **70% fewer API calls** for 3+ platforms
- **60% faster execution** through parallelization
- **100% faster quality assessment** (rule-based vs LLM)
### **2. Reliability**
- **No spaCy dependency issues** - graceful fallback
- **Better error handling** - individual platform failures don't break entire process
- **More predictable execution time**
### **3. Cost Efficiency**
- **Significant cost reduction** from fewer API calls
- **Better resource utilization** through parallel execution
- **Scalable** - performance improvement increases with more platforms
### **4. User Experience**
- **Faster persona generation** - users get results quicker
- **More reliable** - fewer dependency issues
- **Better quality metrics** - rule-based assessment is consistent
## 📋 **Implementation Options**
### **Option 1: Ultra-Optimized (Recommended)**
- **File**: `step4_persona_routes_optimized.py`
- **API Calls**: 1 total
- **Best for**: Production environments, cost optimization
- **Trade-off**: Single large prompt vs multiple focused prompts
### **Option 2: Parallel Optimized**
- **File**: `step4_persona_routes.py` (updated)
- **API Calls**: 1 + N (parallel)
- **Best for**: When platform-specific optimization is critical
- **Trade-off**: More API calls but better platform specialization
### **Option 3: Hybrid Approach**
- **Core persona**: Single API call
- **Platform adaptations**: Parallel API calls
- **Quality assessment**: Rule-based
- **Best for**: Balanced approach
## 🎯 **Recommendation**
**Use Option 1 (Ultra-Optimized)** for the best performance and cost efficiency:
- 1 API call total
- 70% cost reduction
- 60% faster execution
- Reliable and scalable
The optimized approach maintains quality while dramatically improving performance and reducing costs.

View File

@@ -1,269 +0,0 @@
# ALwrity Onboarding System
## Overview
The ALwrity Onboarding System is a comprehensive, user-friendly process designed to get new users up and running with AI-powered content creation capabilities. This system guides users through a structured 6-step process to configure their AI services, analyze their content style, and set up personalized content creation workflows.
## 🎯 What is Onboarding?
Onboarding is your first-time setup experience with ALwrity. It's designed to:
- **Configure your AI services** (Gemini, Exa, CopilotKit)
- **Analyze your existing content** to understand your writing style
- **Set up research preferences** for intelligent content creation
- **Personalize your experience** based on your brand and audience
- **Connect integrations** for seamless content publishing
- **Generate your writing persona** for consistent, on-brand content
## 📋 The 6-Step Onboarding Process
### Step 1: AI LLM Providers Setup
**Purpose**: Connect your AI services to enable intelligent content creation
**What you'll do**:
- Configure **Gemini API** for advanced content generation
- Set up **Exa AI** for intelligent web research
- Connect **CopilotKit** for AI-powered assistance
**Why it's important**: These services work together to provide comprehensive AI functionality for content creation, research, and assistance.
**Requirements**: All three services are mandatory to proceed.
### Step 2: Website Analysis
**Purpose**: Analyze your existing content to understand your writing style and brand voice
**What you'll do**:
- Provide your website URL
- Let ALwrity analyze your existing content
- Review style analysis results
**What ALwrity does**:
- Crawls your website content
- Analyzes writing patterns, tone, and voice
- Identifies your target audience
- Generates style guidelines for consistent content
**Benefits**: Ensures all AI-generated content matches your existing brand voice and style.
### Step 3: AI Research Configuration
**Purpose**: Set up intelligent research capabilities for fact-based content creation
**What you'll do**:
- Choose research depth (Basic, Standard, Comprehensive, Expert)
- Select content types you create
- Configure auto-research preferences
- Enable factual content verification
**Benefits**: Ensures your content is well-researched, accurate, and up-to-date.
### Step 4: Personalization Setup
**Purpose**: Customize ALwrity to match your specific needs and preferences
**What you'll do**:
- Set posting preferences (frequency, timing)
- Configure content types and formats
- Define your target audience
- Set brand voice parameters
**Benefits**: Creates a personalized experience that matches your content strategy.
### Step 5: Integrations (Optional)
**Purpose**: Connect external platforms for seamless content publishing
**Available integrations**:
- **Wix** - Direct publishing to your Wix website
- **LinkedIn** - Automated LinkedIn content posting
- **WordPress** - WordPress site integration
- **Other platforms** - Additional integrations as available
**Benefits**: Streamlines your content workflow from creation to publication.
### Step 6: Complete Setup
**Purpose**: Finalize your onboarding and generate your writing persona
**What happens**:
- Validates all required configurations
- Generates your personalized writing persona
- Sets up your user workspace
- Activates all configured features
**Result**: You're ready to start creating AI-powered content that matches your brand!
## 🔧 Technical Architecture
### Service-Based Design
The onboarding system is built with a modular, service-based architecture:
```
onboarding_utils/
├── onboarding_completion_service.py # Handles final onboarding completion
├── onboarding_summary_service.py # Generates comprehensive summaries
├── onboarding_config_service.py # Manages configuration and providers
├── business_info_service.py # Handles business information
├── api_key_management_service.py # Manages API key operations
├── step_management_service.py # Controls step progression
├── onboarding_control_service.py # Manages onboarding sessions
└── persona_management_service.py # Handles persona generation
```
### Key Features
- **User Isolation**: Each user gets their own workspace and configuration
- **Progressive Setup**: Features are enabled incrementally based on progress
- **Persistent Storage**: All settings are saved and persist across sessions
- **Validation**: Comprehensive validation at each step
- **Error Handling**: Graceful error handling with helpful messages
- **Security**: API keys are encrypted and stored securely
## 🚀 Getting Started
### For New Users
1. **Sign up** with your preferred authentication method
2. **Start onboarding** - You'll be automatically redirected
3. **Follow the 6-step process** - Each step builds on the previous
4. **Complete setup** - Generate your writing persona
5. **Start creating** - Begin using ALwrity's AI-powered features
### For Returning Users
- **Resume onboarding** - Continue where you left off
- **Skip optional steps** - Focus on what you need
- **Update configurations** - Modify settings anytime
- **Add integrations** - Connect new platforms as needed
## 📊 Progress Tracking
The system tracks your progress through:
- **Step completion status** - See which steps are done
- **Progress percentage** - Visual progress indicator
- **Validation status** - Know what needs attention
- **Resume information** - Pick up where you left off
## 🔒 Security & Privacy
- **API Key Encryption**: All API keys are encrypted before storage
- **User Isolation**: Your data is completely separate from other users
- **Secure Storage**: Data is stored securely on your device
- **No Data Sharing**: Your content and preferences are never shared
## 🛠️ Troubleshooting
### Common Issues
**"Cannot proceed to next step"**
- Complete all required fields in the current step
- Ensure API keys are valid and working
- Check for any validation errors
**"API key validation failed"**
- Verify your API key is correct
- Check if the service is available
- Ensure you have sufficient credits/quota
**"Website analysis failed"**
- Ensure your website is publicly accessible
- Check if the URL is correct
- Try again after a few minutes
### Getting Help
- **In-app help** - Use the "Get Help" button in each step
- **Documentation** - Check the detailed setup guides
- **Support** - Contact support for technical issues
## 🎨 Customization Options
### Writing Style
- **Tone**: Professional, Casual, Friendly, Authoritative
- **Voice**: First-person, Third-person, Brand voice
- **Complexity**: Simple, Intermediate, Advanced, Expert
### Content Preferences
- **Length**: Short, Medium, Long, Variable
- **Format**: Blog posts, Social media, Emails, Articles
- **Frequency**: Daily, Weekly, Monthly, Custom
### Research Settings
- **Depth**: Basic, Standard, Comprehensive, Expert
- **Sources**: Web, Academic, News, Social media
- **Verification**: Auto-fact-check, Manual review, AI-assisted
## 📈 Benefits of Completing Onboarding
### Immediate Benefits
- **AI-Powered Content Creation** - Generate high-quality content instantly
- **Style Consistency** - All content matches your brand voice
- **Research Integration** - Fact-based, well-researched content
- **Time Savings** - Reduce content creation time by 80%
### Long-term Benefits
- **Brand Consistency** - Maintain consistent voice across all content
- **Scalability** - Create more content without sacrificing quality
- **Efficiency** - Streamlined workflow from idea to publication
- **Growth** - Focus on strategy while AI handles execution
## 🔄 Updating Your Configuration
You can update your onboarding settings anytime:
- **API Keys** - Update or add new service keys
- **Website Analysis** - Re-analyze your content for style updates
- **Research Preferences** - Adjust research depth and sources
- **Personalization** - Update your brand voice and preferences
- **Integrations** - Add or remove platform connections
## 📞 Support & Resources
### Documentation
- **Setup Guides** - Step-by-step configuration instructions
- **API Documentation** - Technical reference for developers
- **Best Practices** - Tips for optimal onboarding experience
### Community
- **User Forum** - Connect with other ALwrity users
- **Feature Requests** - Suggest improvements
- **Success Stories** - Learn from other users' experiences
### Support Channels
- **In-app Support** - Get help directly within ALwrity
- **Email Support** - support@alwrity.com
- **Live Chat** - Available during business hours
- **Video Tutorials** - Visual guides for complex setups
## 🎯 Success Metrics
Track your onboarding success with these metrics:
- **Completion Rate** - Percentage of users who complete onboarding
- **Time to Value** - How quickly users see benefits
- **Feature Adoption** - Which features users engage with
- **Satisfaction Score** - User feedback on the experience
## 🔮 Future Enhancements
We're constantly improving the onboarding experience:
- **Smart Recommendations** - AI-suggested configurations
- **Template Library** - Pre-built setups for different industries
- **Advanced Analytics** - Detailed insights into your content performance
- **Mobile Experience** - Optimized mobile onboarding flow
- **Voice Setup** - Voice-based configuration for accessibility
---
## Quick Start Checklist
- [ ] **Step 1**: Configure Gemini, Exa, and CopilotKit API keys
- [ ] **Step 2**: Provide website URL for style analysis
- [ ] **Step 3**: Set research preferences and content types
- [ ] **Step 4**: Configure personalization settings
- [ ] **Step 5**: Connect desired integrations (optional)
- [ ] **Step 6**: Complete setup and generate writing persona
**🎉 You're ready to create amazing AI-powered content!**
---
*This onboarding system is designed to get you up and running quickly while ensuring your content maintains your unique brand voice and style. Take your time with each step - the more accurate your configuration, the better your AI-generated content will be.*

View File

@@ -1,6 +1,7 @@
from typing import Dict, Any
from loguru import logger
from fastapi import HTTPException
from fastapi import HTTPException, Depends
from middleware.auth_middleware import get_current_user
async def complete_step(step_number: int, request_data: Dict[str, Any], current_user: Dict[str, Any]):
@@ -57,11 +58,11 @@ async def complete_onboarding(current_user: Dict[str, Any]):
raise HTTPException(status_code=500, detail="Internal server error")
async def reset_onboarding():
async def reset_onboarding(current_user: dict = Depends(get_current_user)):
try:
from api.onboarding_utils.onboarding_control_service import OnboardingControlService
control_service = OnboardingControlService()
return await control_service.reset_onboarding()
return await control_service.reset_onboarding(current_user)
except Exception as e:
logger.error(f"Error resetting onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")

View File

@@ -31,17 +31,23 @@ class OnboardingControlService:
logger.error(f"Error starting onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
async def reset_onboarding(self) -> Dict[str, Any]:
"""Reset the onboarding progress."""
async def reset_onboarding(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
"""Reset the onboarding progress for a specific user."""
try:
progress = get_onboarding_progress()
progress.reset_progress()
return {
"message": "Onboarding progress reset successfully",
"current_step": progress.current_step,
"started_at": progress.started_at
}
from services.onboarding.progress_service import get_onboarding_progress_service
user_id = str(current_user.get('id'))
progress_service = get_onboarding_progress_service()
success = progress_service.reset_onboarding(user_id)
if success:
return {
"message": "Onboarding progress reset successfully",
"current_step": 1,
"started_at": None,
"user_id": user_id
}
else:
raise HTTPException(status_code=500, detail="Failed to reset onboarding progress")
except Exception as e:
logger.error(f"Error resetting onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")

View File

@@ -17,6 +17,7 @@ Last Updated: January 2025
from typing import Dict, List, Optional, Any
from datetime import datetime
import traceback
from loguru import logger
from services.research.exa_service import ExaService
from services.database import get_db_session
@@ -427,13 +428,25 @@ class Step3ResearchService:
# Store each competitor in CompetitorAnalysis table
from models.onboarding import CompetitorAnalysis
for competitor in competitors:
# Create competitor analysis record
competitor_record = CompetitorAnalysis(
session_id=session.id,
competitor_url=competitor.get("url", ""),
competitor_domain=competitor.get("domain", ""),
analysis_data={
logger.warning(f"🔍 COMPETITOR SAVE: Starting to save {len(competitors)} competitors for session {session_id}")
logger.warning(f" Session ID: {session.id}")
logger.warning(f" Session user_id: {session.user_id}")
saved_count = 0
failed_count = 0
for idx, competitor in enumerate(competitors):
try:
logger.warning(f"🔍 COMPETITOR SAVE: Saving competitor {idx + 1}/{len(competitors)}")
logger.warning(f" Competitor URL: {competitor.get('url', 'N/A')}")
logger.warning(f" Competitor Domain: {competitor.get('domain', 'N/A')}")
logger.warning(f" Has title: {bool(competitor.get('title'))}")
logger.warning(f" Has summary: {bool(competitor.get('summary'))}")
logger.warning(f" Has competitive_insights: {bool(competitor.get('competitive_insights'))}")
logger.warning(f" Has content_insights: {bool(competitor.get('content_insights'))}")
# Create competitor analysis record
analysis_data = {
"title": competitor.get("title", ""),
"summary": competitor.get("summary", ""),
"relevance_score": competitor.get("relevance_score", 0.5),
@@ -448,9 +461,27 @@ class Step3ResearchService:
"analysis_metadata": analysis_metadata,
"completed_at": datetime.utcnow().isoformat()
}
)
logger.warning(f" analysis_data keys: {list(analysis_data.keys())}")
logger.warning(f" competitive_analysis type: {type(analysis_data.get('competitive_analysis'))}")
logger.warning(f" content_insights type: {type(analysis_data.get('content_insights'))}")
competitor_record = CompetitorAnalysis(
session_id=session.id,
competitor_url=competitor.get("url", ""),
competitor_domain=competitor.get("domain", ""),
analysis_data=analysis_data,
status="completed"
)
db.add(competitor_record)
db.add(competitor_record)
saved_count += 1
logger.warning(f" ✅ Added competitor record {idx + 1} to session")
except Exception as e:
failed_count += 1
logger.error(f" ❌ Failed to save competitor {idx + 1}: {str(e)}")
logger.error(f" Traceback: {traceback.format_exc()}")
# Store summary in session for quick access (backward compatibility)
research_summary = {
@@ -465,9 +496,25 @@ class Step3ResearchService:
# For now, we'll skip this since the model doesn't have step_data
# TODO: Add step_data JSON column to OnboardingSession model if needed
db.commit()
logger.info(f"Stored {len(competitors)} competitors in CompetitorAnalysis table for session {session_id}")
return True
try:
db.commit()
logger.warning(f"🔍 COMPETITOR SAVE: ✅ Committed {saved_count} competitors to database")
logger.warning(f" Failed: {failed_count}")
# Verify the save by querying back
from models.onboarding import CompetitorAnalysis
verify_count = db.query(CompetitorAnalysis).filter(
CompetitorAnalysis.session_id == session.id
).count()
logger.warning(f"🔍 COMPETITOR SAVE: Verification - {verify_count} competitors found in DB for session {session.id}")
logger.info(f"Stored {len(competitors)} competitors in CompetitorAnalysis table for session {session_id}")
return True
except Exception as e:
db.rollback()
logger.error(f"❌ COMPETITOR SAVE: Failed to commit competitors: {str(e)}")
logger.error(f" Traceback: {traceback.format_exc()}")
return False
except Exception as e:
logger.error(f"Error storing research data: {str(e)}", exc_info=True)

View File

@@ -203,32 +203,125 @@ class StepManagementService:
db = next(get_db())
db_service = OnboardingDatabaseService()
save_errors = [] # Track save failures
# Step-specific side effects: save API keys to DB
if step_number == 1 and request_data and 'api_keys' in request_data:
api_keys = request_data['api_keys'] or {}
for provider, key in api_keys.items():
if key:
db_service.save_api_key(user_id, provider, key, db)
# Step-specific side effects: save data to DB
if step_number == 1 and request_data:
# Step 1: Save API keys
step_data = request_data.get('data') or request_data
logger.info(f"🔍 Step 1: Raw request_data keys: {list(request_data.keys()) if request_data else 'None'}")
logger.info(f"🔍 Step 1: Extracted step_data keys: {list(step_data.keys()) if step_data else 'None'}")
api_keys = step_data.get('api_keys', {})
logger.info(f"🔍 Step 1: API keys found: {list(api_keys.keys()) if api_keys else 'None'}")
if api_keys:
for provider, key in api_keys.items():
if key:
try:
saved = db_service.save_api_key(user_id, provider, key, db)
if saved:
logger.info(f"✅ Saved API key for provider {provider}")
else:
# This should not happen anymore since save_api_key now raises exceptions
raise Exception(f"API key save returned False for provider {provider}")
except Exception as e:
logger.error(f"❌ BLOCKING ERROR: Failed to save API key for provider {provider}: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Failed to save API key for {provider}. Onboarding cannot proceed until this is resolved."
) from e
# Step 2: Save website analysis data
elif step_number == 2 and request_data:
website_data = request_data.get('data') or request_data
logger.info(f"🔍 Step 2: Raw request_data keys: {list(request_data.keys()) if request_data else 'None'}")
logger.info(f"🔍 Step 2: Extracted website_data keys: {list(website_data.keys()) if website_data else 'None'}")
logger.info(f"🔍 Step 2: website_data.website: {website_data.get('website') if website_data else 'None'}")
logger.info(f"🔍 Step 2: website_data.analysis: {bool(website_data.get('analysis')) if website_data else 'None'}")
if website_data.get('analysis'):
logger.info(f"🔍 Step 2: analysis keys: {list(website_data['analysis'].keys()) if isinstance(website_data.get('analysis'), dict) else 'Not dict'}")
if website_data:
try:
saved = db_service.save_website_analysis(user_id, website_data, db)
if saved:
logger.info(f"✅ Saved website analysis for user {user_id}")
else:
# This should not happen anymore since save_website_analysis now raises exceptions
raise Exception("Website analysis save returned False")
except Exception as e:
logger.error(f"❌ BLOCKING ERROR: Failed to save website analysis: {str(e)}")
raise HTTPException(
status_code=500,
detail="Failed to save website analysis data. Onboarding cannot proceed until this is resolved."
) from e
# Step 3: Save research preferences data
elif step_number == 3 and request_data:
research_data = request_data.get('data') or request_data
logger.info(f"🔍 Step 3: Raw request_data keys: {list(request_data.keys()) if request_data else 'None'}")
logger.info(f"🔍 Step 3: Extracted research_data keys: {list(research_data.keys()) if research_data else 'None'}")
if research_data:
# Note: Competitor data is saved separately via discover-competitors endpoint
# This saves research preferences (content_types, target_audience, etc.)
try:
saved = db_service.save_research_preferences(user_id, research_data, db)
if saved:
logger.info(f"✅ Saved research preferences for user {user_id}")
else:
# This should not happen anymore since save_research_preferences now raises exceptions
raise Exception("Research preferences save returned False")
except Exception as e:
logger.error(f"❌ BLOCKING ERROR: Failed to save research preferences: {str(e)}")
raise HTTPException(
status_code=500,
detail="Failed to save research preferences. Onboarding cannot proceed until this is resolved."
) from e
# Step 4: Save persona data
elif step_number == 4 and request_data:
persona_data = request_data.get('data') or request_data
logger.info(f"🔍 Step 4: Raw request_data keys: {list(request_data.keys()) if request_data else 'None'}")
logger.info(f"🔍 Step 4: Extracted persona_data keys: {list(persona_data.keys()) if persona_data else 'None'}")
if persona_data:
try:
saved = db_service.save_persona_data(user_id, persona_data, db)
if saved:
logger.info(f"✅ Saved persona data for user {user_id}")
else:
# This should not happen anymore since save_persona_data now raises exceptions
raise Exception("Persona data save returned False")
except Exception as e:
logger.error(f"❌ BLOCKING ERROR: Failed to save persona data: {str(e)}")
raise HTTPException(
status_code=500,
detail="Failed to save persona data. Onboarding cannot proceed until this is resolved."
) from e
# Persist current step and progress in DB
db_service.update_step(user_id, step_number, db)
try:
progress_pct = min(100.0, round((step_number / 6) * 100))
db_service.update_progress(user_id, float(progress_pct), db)
except Exception:
pass
except Exception as e:
logger.warning(f"Failed to update progress: {e}")
# Log save errors but don't block step completion (non-blocking)
if save_errors:
logger.warning(f"⚠️ Step {step_number} completed but some data save operations failed: {save_errors}")
logger.info(f"[complete_step] Step {step_number} persisted to DB for user {user_id}")
return {
"message": "Step completed successfully",
"step_number": step_number,
"data": request_data or {}
"data": request_data or {},
"warnings": save_errors if save_errors else None # Include warnings in response
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error completing step: {str(e)}")
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail="Internal server error")
async def skip_step(self, step_number: int, current_user: Dict[str, Any]) -> Dict[str, Any]: