ALwrity onboarding fixes

This commit is contained in:
ajaysi
2025-10-04 13:24:41 +05:30
parent 510b79bbf8
commit 14dfb2e5c0
14 changed files with 1182 additions and 446 deletions

View File

@@ -5,7 +5,7 @@
"title": "AI LLM Providers",
"description": "Configure AI language model providers",
"status": "completed",
"completed_at": "2025-09-30T11:54:21.688932",
"completed_at": "2025-10-03T17:29:12.878656",
"data": {
"api_keys": {
"gemini": "AIzaSyB6QrCiOBAzh8xLdmSumec2ysdHeyqyxgw",
@@ -19,9 +19,175 @@
"step_number": 2,
"title": "Website Analysis",
"description": "Set up website analysis and crawling",
"status": "pending",
"completed_at": null,
"data": null,
"status": "completed",
"completed_at": "2025-10-03T17:42:17.953305",
"data": {
"website": "https://alwrity.com",
"domainName": "Alwrity.com",
"analysis": {
"writing_style": {
"tone": "Informative, enthusiastic",
"voice": "Helpful, direct",
"complexity": "Moderate",
"engagement_level": "High",
"brand_personality": "Innovative, friendly",
"formality_level": "Semi-formal",
"emotional_appeal": "Benefit-driven"
},
"content_characteristics": {
"sentence_structure": "Varied",
"vocabulary_level": "Accessible",
"paragraph_organization": "Clear, concise",
"content_flow": "Logical, user-centric",
"readability_score": "Good",
"content_density": "Moderate",
"visual_elements_usage": "High"
},
"target_audience": {
"demographics": [
"Marketers",
"Bloggers",
"Content creators"
],
"expertise_level": "Beginner to intermediate",
"industry_focus": "General, tech",
"geographic_focus": "",
"psychographic_profile": [
"Tech-savvy",
"Value-conscious",
"Efficiency-seeking"
],
"pain_points": [
"Time-consuming content creation",
"Lack of SEO knowledge",
"Writer's block"
],
"motivations": [
"Efficiency",
"Increased website traffic",
"Content quality"
]
},
"content_type": {
"primary_type": "AI writing tool promotion",
"secondary_types": [
"Blog posts",
"Marketing materials"
],
"purpose": "Promote AI tools, generate leads",
"call_to_action": "Try Now!",
"conversion_focus": "Tool usage",
"educational_value": "Moderate"
},
"recommended_settings": {
"writing_tone": "Enthusiastic, informative",
"target_audience": "Content creators, marketers",
"content_type": "AI tool promotion, blog",
"creativity_level": "High",
"geographic_location": "",
"industry_context": "AI, Content Creation",
"brand_alignment": "Strong"
},
"guidelines": {
"tone_recommendations": [
"Informative & Enthusiastic: Maintain a helpful and engaging tone. Example: 'Let's dive into how AI can revolutionize your content creation!'"
],
"structure_guidelines": [
"Clear & Concise: Use headings, subheadings, and bullet points for easy readability. Example: Break down complex topics into digestible sections.",
"Logical Flow: Organize content with a user-centric approach, starting with the problem and offering solutions. Example: Start with the pain points of content creation and then introduce your AI tools."
],
"vocabulary_suggestions": [
"Accessible Language: Avoid jargon; use clear and concise language. Example: Instead of 'leverage AI,' use 'use AI.'"
],
"engagement_tips": [
"Visuals: Incorporate images, screenshots, and videos to enhance understanding. Example: Include screenshots of your AI tools in action.",
"Benefit-Driven: Focus on the benefits for the user. Example: 'Save time and create high-quality content with our AI.'"
],
"audience_considerations": [
"Targeted Content: Address the needs of marketers, bloggers, and content creators. Example: Provide specific examples relevant to their workflows.",
"Address Pain Points: Acknowledge and solve common content creation challenges. Example: Offer solutions to writer's block and SEO optimization."
],
"brand_alignment": [
"Helpful & Innovative Voice: Maintain a helpful, innovative, and friendly brand voice. Example: Offer free resources and tutorials.",
"Open Source Focus: Highlight the open-source nature of the tools. Example: Mention the benefits of open-source for users.",
"Value Proposition: Clearly communicate the value of the tools. Example: 'Create fact-based, multilingual content efficiently.'"
],
"seo_optimization": [
"Keyword Research: Identify relevant keywords and incorporate them naturally. Example: Use keywords like 'AI content creation,' 'SEO optimization,' and 'free AI tools.'",
"Optimize Headings & Meta Descriptions: Use keywords in headings and create compelling meta descriptions. Example: Write a meta description that includes a clear call to action and keyword."
],
"conversion_optimization": [
"Clear CTAs: Include clear calls to action. Example: 'Try our free AI tool today!'",
"Focus on Benefits: Emphasize the value proposition. Example: 'Sign up to get instant access to AI-powered content creation.'"
]
},
"best_practices": [
"Provide In-Depth Tutorials: Offer detailed guides and tutorials to showcase the tools' capabilities.",
"Showcase Diverse Use Cases: Demonstrate how the tools can be applied in various scenarios.",
"Build Community: Encourage user interaction and feedback.",
"Integrate with Platforms: Explore integrations with popular content platforms."
],
"avoid_elements": [
"Overly Technical Jargon: Steer clear of overly complex technical terms that may alienate the audience.",
"Misleading Claims: Avoid making exaggerated claims about AI capabilities.",
"Negative Brand Association: Do not use language that portrays the brand as anything other than helpful and accessible."
],
"content_strategy": "Focus on creating informative, user-centric content that highlights the benefits of open-source AI tools for content creation, addressing the pain points of the target audience while providing practical solutions and SEO optimization.",
"ai_generation_tips": [
"Fact-Checking: Always verify the information generated by AI tools.",
"Human Oversight: Review and refine AI-generated content for accuracy, clarity, and brand voice.",
"Experimentation: Test different prompts and inputs to optimize output."
],
"competitive_advantages": [
"Fact-Based Content: Emphasize the ability to generate fact-based content.",
"Open Source: Highlight the benefits of open-source for users (e.g., transparency, community support, customization).",
"Multilingual Support: Promote the multilingual capabilities of the tools."
],
"content_calendar_suggestions": [
"Tutorials: Create step-by-step guides on using the AI tools for different content types.",
"Use Case Studies: Showcase successful implementations of the tools.",
"SEO Optimization Guides: Provide tips and best practices for improving search engine rankings.",
"Industry News & Trends: Share insights on the latest developments in AI and content creation."
],
"style_patterns": {
"patterns": {
"sentence_length": "short to medium",
"vocabulary_patterns": [
"keywords related to AI and content generation",
"action-oriented verbs"
],
"rhetorical_devices": [
"repetition",
"call to action"
],
"paragraph_structure": "varied, often short and focused",
"transition_phrases": [
"Click to",
"and"
]
},
"style_consistency": "consistent in tone and purpose",
"unique_elements": [
"focus on AI-powered content creation tools",
"integration with platforms like WordPress"
],
"meta": {
"schema_version": "1.1",
"confidence": 0.8,
"notes": "The content is promotional and tool-focused.",
"uncertainty": {
"fields": []
}
}
},
"style_consistency": "consistent in tone and purpose",
"unique_elements": [
"focus on AI-powered content creation tools",
"integration with platforms like WordPress"
]
},
"useAnalysisForGenAI": true
},
"validation_errors": []
},
{
@@ -61,9 +227,9 @@
"validation_errors": []
}
],
"current_step": 2,
"current_step": 3,
"started_at": "2025-09-29T17:22:14.375002",
"last_updated": "2025-09-30T11:54:21.688938",
"last_updated": "2025-10-03T17:42:17.953324",
"is_completed": false,
"completed_at": null
}

View File

@@ -8,6 +8,7 @@ from sqlalchemy.orm import Session
from loguru import logger
from typing import Dict, Any
from datetime import datetime
import hashlib
from models.component_logic import (
UserInfoRequest, UserInfoResponse,
@@ -45,6 +46,23 @@ research_utilities = ResearchUtilities()
# Create router
router = APIRouter(prefix="/api/onboarding", tags=["component_logic"])
# Utility function for consistent user ID to integer conversion
def clerk_user_id_to_int(user_id: str) -> int:
"""
Convert Clerk user ID to consistent integer for database session_id.
Uses SHA256 hashing for deterministic, consistent results across all requests.
Args:
user_id: Clerk user ID (e.g., 'user_2qA6V8bFFnhPRGp8JYxP4YTJtHl')
Returns:
int: Deterministic integer derived from user ID
"""
# Use SHA256 for consistent hashing (unlike Python's hash() which varies per process)
user_id_hash = hashlib.sha256(user_id.encode()).hexdigest()
# Take first 8 characters of hex and convert to int, mod to fit in INT range
return int(user_id_hash[:8], 16) % 2147483647
# AI Research Endpoints
@router.post("/ai-research/validate-user", response_model=UserInfoResponse)
@@ -99,11 +117,8 @@ async def configure_research_preferences(
preferences_service = ResearchPreferencesService(db)
# Use authenticated Clerk user ID for proper user isolation
# Convert user_id to int if service expects it, or update service to accept string
try:
user_id_int = int(user_id.replace('user_', '').replace('-', '')[:8], 16) % 2147483647
except:
user_id_int = hash(user_id) % 2147483647
# Use consistent SHA256-based conversion
user_id_int = clerk_user_id_to_int(user_id)
# Save preferences with user ID (not session_id)
preferences_id = preferences_service.save_preferences_with_style_data(user_id_int, preferences)
@@ -504,10 +519,8 @@ async def complete_style_detection(
analysis_service = WebsiteAnalysisService(db_session)
# Use authenticated Clerk user ID for proper user isolation
try:
user_id_int = int(user_id.replace('user_', '').replace('-', '')[:8], 16) % 2147483647
except:
user_id_int = hash(user_id) % 2147483647
# Use consistent SHA256-based conversion
user_id_int = clerk_user_id_to_int(user_id)
# Check for existing analysis if URL is provided
existing_analysis = None
@@ -536,11 +549,44 @@ async def complete_style_detection(
timestamp=datetime.now().isoformat()
)
# Step 2: Analyze style
style_analysis = style_logic.analyze_content_style(crawl_result['content'])
# Step 2-4: Parallelize AI API calls for performance (3 calls → 1 parallel batch)
import asyncio
from functools import partial
# Prepare parallel tasks
logger.info("[complete_style_detection] Starting parallel AI analysis...")
async def run_style_analysis():
"""Run style analysis in executor"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, partial(style_logic.analyze_content_style, crawl_result['content']))
async def run_patterns_analysis():
"""Run patterns analysis in executor (if requested)"""
if not request.include_patterns:
return None
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, partial(style_logic.analyze_style_patterns, crawl_result['content']))
# Execute style and patterns analysis in parallel
style_analysis, patterns_result = await asyncio.gather(
run_style_analysis(),
run_patterns_analysis(),
return_exceptions=True
)
# Check if style_analysis failed
if isinstance(style_analysis, Exception):
error_msg = str(style_analysis)
logger.error(f"Style analysis failed with exception: {error_msg}")
analysis_service.save_error_analysis(user_id_int, request.url or "text_sample", error_msg)
return StyleDetectionResponse(
success=False,
error=f"Style analysis failed: {error_msg}",
timestamp=datetime.now().isoformat()
)
if not style_analysis or not style_analysis.get('success'):
# Check if it's an API key issue
error_msg = style_analysis.get('error', 'Unknown error') if style_analysis else 'Analysis failed'
if 'API key' in error_msg or 'configure' in error_msg:
return StyleDetectionResponse(
@@ -549,7 +595,6 @@ async def complete_style_detection(
timestamp=datetime.now().isoformat()
)
else:
# Save error analysis
analysis_service.save_error_analysis(user_id_int, request.url or "text_sample", error_msg)
return StyleDetectionResponse(
success=False,
@@ -557,17 +602,20 @@ async def complete_style_detection(
timestamp=datetime.now().isoformat()
)
# Step 3: Analyze patterns (optional)
# Process patterns result
style_patterns = None
if request.include_patterns:
patterns_result = style_logic.analyze_style_patterns(crawl_result['content'])
if patterns_result and patterns_result.get('success'):
if request.include_patterns and patterns_result and not isinstance(patterns_result, Exception):
if patterns_result.get('success'):
style_patterns = patterns_result.get('patterns')
# Step 4: Generate guidelines (optional)
# Step 4: Generate guidelines (depends on style_analysis, must run after)
style_guidelines = None
if request.include_guidelines:
guidelines_result = style_logic.generate_style_guidelines(style_analysis.get('analysis', {}))
loop = asyncio.get_event_loop()
guidelines_result = await loop.run_in_executor(
None,
partial(style_logic.generate_style_guidelines, style_analysis.get('analysis', {}))
)
if guidelines_result and guidelines_result.get('success'):
style_guidelines = guidelines_result.get('guidelines')
@@ -628,10 +676,8 @@ async def check_existing_analysis(
analysis_service = WebsiteAnalysisService(db_session)
# Use authenticated Clerk user ID for proper user isolation
try:
user_id_int = int(user_id.replace('user_', '').replace('-', '')[:8], 16) % 2147483647
except:
user_id_int = hash(user_id) % 2147483647
# Use consistent SHA256-based conversion
user_id_int = clerk_user_id_to_int(user_id)
# Check for existing analysis for THIS USER ONLY
existing_analysis = analysis_service.check_existing_analysis(user_id_int, website_url)
@@ -684,10 +730,8 @@ async def get_session_analyses(current_user: Dict[str, Any] = Depends(get_curren
analysis_service = WebsiteAnalysisService(db_session)
# Use authenticated Clerk user ID for proper user isolation
try:
user_id_int = int(user_id.replace('user_', '').replace('-', '')[:8], 16) % 2147483647
except:
user_id_int = hash(user_id) % 2147483647
# Use consistent SHA256-based conversion
user_id_int = clerk_user_id_to_int(user_id)
# Get analyses for THIS USER ONLY (not all users!)
analyses = analysis_service.get_session_analyses(user_id_int)

View File

@@ -117,26 +117,24 @@ class ClerkAuthMiddleware:
# Use cached PyJWKClient to avoid repeated JWKS fetches
if jwks_url not in self._jwks_client_cache:
logger.info(f"Creating new PyJWKClient for {jwks_url} with caching enabled")
# Create client with caching: cache_keys=True, max_cached_keys=16, cache_jwk_set_timeout=3600 (1 hour)
# Create client with caching enabled (cache_keys=True keeps keys in memory)
self._jwks_client_cache[jwks_url] = PyJWKClient(
jwks_url,
cache_keys=True,
max_cached_keys=16,
cache_jwk_set_timeout=3600, # Cache JWKS for 1 hour
timeout=10 # 10 second timeout for JWKS fetch
max_cached_keys=16
)
jwks_client = self._jwks_client_cache[jwks_url]
signing_key = jwks_client.get_signing_key_from_jwt(token)
# Verify and decode the token with clock skew tolerance
# Add 60 seconds leeway to handle clock skew between client/server
# Add 300 seconds (5 minutes) leeway to handle clock skew and token refresh delays
decoded_token = jwt.decode(
token,
signing_key.key,
algorithms=["RS256"],
options={"verify_signature": True, "verify_exp": True},
leeway=60 # Allow 60 seconds clock skew
leeway=300 # Allow 5 minutes leeway for token refresh during navigation
)
# Extract user information
@@ -171,7 +169,7 @@ class ClerkAuthMiddleware:
decoded_token = jwt.decode(
token,
options={"verify_signature": False},
leeway=60 # Allow 60 seconds clock skew
leeway=300 # Allow 5 minutes leeway for token refresh
)
# Extract user information from the token

View File

@@ -41,11 +41,17 @@ class WebsiteAnalysisService:
if existing_analysis:
# Update existing analysis
existing_analysis.writing_style = analysis_data.get('style_analysis', {}).get('writing_style')
existing_analysis.content_characteristics = analysis_data.get('style_analysis', {}).get('content_characteristics')
existing_analysis.target_audience = analysis_data.get('style_analysis', {}).get('target_audience')
existing_analysis.content_type = analysis_data.get('style_analysis', {}).get('content_type')
existing_analysis.recommended_settings = analysis_data.get('style_analysis', {}).get('recommended_settings')
style_analysis = analysis_data.get('style_analysis', {})
existing_analysis.writing_style = style_analysis.get('writing_style')
existing_analysis.content_characteristics = style_analysis.get('content_characteristics')
existing_analysis.target_audience = style_analysis.get('target_audience')
existing_analysis.content_type = style_analysis.get('content_type')
existing_analysis.recommended_settings = style_analysis.get('recommended_settings')
# Store brand_analysis and content_strategy_insights if model supports it
if hasattr(existing_analysis, 'brand_analysis'):
existing_analysis.brand_analysis = style_analysis.get('brand_analysis')
if hasattr(existing_analysis, 'content_strategy_insights'):
existing_analysis.content_strategy_insights = style_analysis.get('content_strategy_insights')
existing_analysis.crawl_result = analysis_data.get('crawl_result')
existing_analysis.style_patterns = analysis_data.get('style_patterns')
existing_analysis.style_guidelines = analysis_data.get('style_guidelines')
@@ -59,20 +65,28 @@ class WebsiteAnalysisService:
return existing_analysis.id
else:
# Create new analysis
analysis = WebsiteAnalysis(
session_id=session_id,
website_url=website_url,
writing_style=analysis_data.get('style_analysis', {}).get('writing_style'),
content_characteristics=analysis_data.get('style_analysis', {}).get('content_characteristics'),
target_audience=analysis_data.get('style_analysis', {}).get('target_audience'),
content_type=analysis_data.get('style_analysis', {}).get('content_type'),
recommended_settings=analysis_data.get('style_analysis', {}).get('recommended_settings'),
crawl_result=analysis_data.get('crawl_result'),
style_patterns=analysis_data.get('style_patterns'),
style_guidelines=analysis_data.get('style_guidelines'),
status='completed',
warning_message=analysis_data.get('warning')
)
style_analysis = analysis_data.get('style_analysis', {})
analysis_args = {
'session_id': session_id,
'website_url': website_url,
'writing_style': style_analysis.get('writing_style'),
'content_characteristics': style_analysis.get('content_characteristics'),
'target_audience': style_analysis.get('target_audience'),
'content_type': style_analysis.get('content_type'),
'recommended_settings': style_analysis.get('recommended_settings'),
'crawl_result': analysis_data.get('crawl_result'),
'style_patterns': analysis_data.get('style_patterns'),
'style_guidelines': analysis_data.get('style_guidelines'),
'status': 'completed',
'warning_message': analysis_data.get('warning')
}
# Add brand_analysis and content_strategy_insights if model supports it
if hasattr(WebsiteAnalysis, 'brand_analysis'):
analysis_args['brand_analysis'] = style_analysis.get('brand_analysis')
if hasattr(WebsiteAnalysis, 'content_strategy_insights'):
analysis_args['content_strategy_insights'] = style_analysis.get('content_strategy_insights')
analysis = WebsiteAnalysis(**analysis_args)
self.db.add(analysis)
self.db.commit()