Alwrity version 0.5.4

2025-08-11 10:54:50 +05:30
parent 13ca78f653
commit 39b96c44da
44 changed files with 10448 additions and 2119 deletions
--- a/backend/api/content_planning/services/content_strategy/autofill/ai_structured_autofill.py
+++ b/backend/api/content_planning/services/content_strategy/autofill/ai_structured_autofill.py
@@ -1,7 +1,8 @@
 import json
 import logging
 import traceback
-from typing import Any, Dict
+from typing import Any, Dict, List
+from datetime import datetime

 from services.ai_service_manager import AIServiceManager, AIServiceType

@@ -60,44 +61,104 @@ class AIStructuredAutofillService:
        research = context.get('research_preferences') or {}
        api_keys = context.get('api_keys_data') or {}
        session = context.get('onboarding_session') or {}
+        
+        # Extract detailed personalization data
+        writing_style = website.get('writing_style', {})
+        target_audience = website.get('target_audience', {})
+        content_type = website.get('content_type', {})
+        recommended_settings = website.get('recommended_settings', {})
+        content_characteristics = website.get('content_characteristics', {})
+        
        summary = {
-            'website_summary': {
+            'user_profile': {
                'website_url': website.get('website_url'),
-                'industry': website.get('industry'),
-                'content_types': website.get('content_types'),
-                'target_audience': website.get('target_audience'),
-                'performance_metrics': website.get('performance_metrics'),
-                'seo_summary': website.get('seo_analysis')
-            },
-            'research_summary': {
-                'audience_segments': research.get('audience_segments'),
-                'content_preferences': research.get('content_preferences'),
-                'consumption_patterns': research.get('consumption_patterns'),
-                'seasonality': research.get('seasonal_trends')
-            },
-            'api_summary': {
-                'providers': api_keys.get('providers'),
-                'total_keys': api_keys.get('total_keys')
-            },
-            'session_summary': {
                'business_size': session.get('business_size'),
-                'region': session.get('region')
+                'region': session.get('region'),
+                'onboarding_progress': session.get('progress', 0)
+            },
+            'content_analysis': {
+                'writing_style': {
+                    'tone': writing_style.get('tone'),
+                    'voice': writing_style.get('voice'),
+                    'complexity': writing_style.get('complexity'),
+                    'engagement_level': writing_style.get('engagement_level')
+                },
+                'content_characteristics': {
+                    'sentence_structure': content_characteristics.get('sentence_structure'),
+                    'vocabulary': content_characteristics.get('vocabulary'),
+                    'paragraph_organization': content_characteristics.get('paragraph_organization')
+                },
+                'content_type': {
+                    'primary_type': content_type.get('primary_type'),
+                    'secondary_types': content_type.get('secondary_types'),
+                    'purpose': content_type.get('purpose')
+                }
+            },
+            'audience_insights': {
+                'demographics': target_audience.get('demographics'),
+                'expertise_level': target_audience.get('expertise_level'),
+                'industry_focus': target_audience.get('industry_focus'),
+                'pain_points': target_audience.get('pain_points'),
+                'content_preferences': target_audience.get('content_preferences')
+            },
+            'ai_recommendations': {
+                'recommended_tone': recommended_settings.get('writing_tone'),
+                'recommended_audience': recommended_settings.get('target_audience'),
+                'recommended_content_type': recommended_settings.get('content_type'),
+                'style_guidelines': website.get('style_guidelines')
+            },
+            'research_config': {
+                'research_depth': research.get('research_depth'),
+                'content_types': research.get('content_types'),
+                'auto_research': research.get('auto_research'),
+                'factual_content': research.get('factual_content')
+            },
+            'api_capabilities': {
+                'providers': api_keys.get('providers', []),
+                'total_keys': api_keys.get('total_keys', 0),
+                'available_services': self._extract_available_services(api_keys)
+            },
+            'data_quality': {
+                'website_freshness': website.get('data_freshness'),
+                'confidence_level': website.get('confidence_level'),
+                'analysis_status': website.get('status')
            }
        }
+        
        try:
            logger.debug(
-                "AI Structured Autofill: context presence | website=%s research=%s api=%s session=%s",
+                "AI Structured Autofill: personalized context | website=%s research=%s api=%s session=%s",
                bool(website), bool(research), bool(api_keys), bool(session)
            )
            logger.debug(
-                "AI Structured Autofill: website keys=%s research keys=%s",
-                len(list(website.keys())) if hasattr(website, 'keys') else 0,
-                len(list(research.keys())) if hasattr(research, 'keys') else 0,
+                "AI Structured Autofill: personalization data | writing_style=%s target_audience=%s content_type=%s",
+                bool(writing_style), bool(target_audience), bool(content_type)
            )
        except Exception:
            pass
        return summary

+    def _extract_available_services(self, api_keys: Dict[str, Any]) -> List[str]:
+        """Extract available services from API keys."""
+        services = []
+        providers = api_keys.get('providers', [])
+        
+        # Map providers to services
+        provider_service_map = {
+            'google_search_console': ['SEO Analytics', 'Search Performance'],
+            'google_analytics': ['Web Analytics', 'User Behavior'],
+            'semrush': ['Competitive Analysis', 'Keyword Research'],
+            'ahrefs': ['Backlink Analysis', 'SEO Tools'],
+            'moz': ['SEO Tools', 'Rank Tracking'],
+            'social_media': ['Social Media Analytics', 'Social Listening']
+        }
+        
+        for provider in providers:
+            if provider in provider_service_map:
+                services.extend(provider_service_map[provider])
+        
+        return list(set(services))  # Remove duplicates
+
    def _build_schema(self) -> Dict[str, Any]:
        # Simplified schema following Gemini best practices
        # Reduce complexity by flattening nested structures and simplifying constraints
@@ -167,47 +228,109 @@ class AIStructuredAutofillService:
        return schema

    def _build_prompt(self, context_summary: Dict[str, Any]) -> str:
-        # Ultra-simplified prompt to avoid JSON parsing issues
-        prompt = (
-            "Generate a JSON object with exactly 30 fields for content strategy. Use this exact format:\n\n"
-            
-            '{\n'
-            '"business_objectives": "Increase traffic and leads",\n'
-            '"target_metrics": "25% growth, 15% conversion",\n'
-            '"content_budget": 3000,\n'
-            '"team_size": 3,\n'
-            '"implementation_timeline": "6 months",\n'
-            '"market_share": "15%",\n'
-            '"competitive_position": "Leader",\n'
-            '"performance_metrics": "Current metrics data",\n'
-            '"content_preferences": "Blog posts, videos",\n'
-            '"consumption_patterns": "Peak hours 9-11 AM",\n'
-            '"audience_pain_points": "Time constraints, complexity",\n'
-            '"buying_journey": "Awareness to Decision",\n'
-            '"seasonal_trends": "Q1 planning, Q2 execution",\n'
-            '"engagement_metrics": "3.5% engagement rate",\n'
-            '"top_competitors": "Competitor A, B, C",\n'
-            '"competitor_content_strategies": "Educational content approach",\n'
-            '"market_gaps": "AI tools, automation guides",\n'
-            '"industry_trends": "AI integration, video content",\n'
-            '"emerging_trends": "Voice search, interactive content",\n'
-            '"preferred_formats": "Blog posts, videos, infographics",\n'
-            '"content_mix": "70% educational, 30% promotional",\n'
-            '"content_frequency": "Weekly",\n'
-            '"optimal_timing": "Tuesday/Thursday 10 AM",\n'
-            '"quality_metrics": "SEO score >90, engagement >3%",\n'
-            '"editorial_guidelines": "Professional tone, actionable insights",\n'
-            '"brand_voice": "Professional",\n'
-            '"traffic_sources": "Organic search, social media",\n'
-            '"conversion_rates": "15% conversion, $200 CPA",\n'
-            '"content_roi_targets": "15% conversion, 3:1 ROI",\n'
-            '"ab_testing_capabilities": true\n'
-            '}\n\n'
-            
-            f"Business context: {json.dumps(context_summary, indent=2)}\n\n"
-            "Generate the complete JSON with all 30 fields:"
-        )
-        logger.debug("AI Structured Autofill: ultra-simplified prompt (%d chars)", len(prompt))
+        # Build personalized prompt using actual user data
+        user_profile = context_summary.get('user_profile', {})
+        content_analysis = context_summary.get('content_analysis', {})
+        audience_insights = context_summary.get('audience_insights', {})
+        ai_recommendations = context_summary.get('ai_recommendations', {})
+        research_config = context_summary.get('research_config', {})
+        api_capabilities = context_summary.get('api_capabilities', {})
+        
+        # Extract specific personalization data
+        website_url = user_profile.get('website_url', 'your website')
+        writing_tone = content_analysis.get('writing_style', {}).get('tone', 'professional')
+        target_demographics = audience_insights.get('demographics', ['professionals'])
+        industry_focus = audience_insights.get('industry_focus', 'general')
+        expertise_level = audience_insights.get('expertise_level', 'intermediate')
+        primary_content_type = content_analysis.get('content_type', {}).get('primary_type', 'blog')
+        research_depth = research_config.get('research_depth', 'Standard')
+        available_services = api_capabilities.get('available_services', [])
+        
+        # Build personalized context description
+        personalization_context = f"""
+PERSONALIZED CONTEXT FOR {website_url.upper()}:
+
+🎯 YOUR BUSINESS PROFILE:
+- Website: {website_url}
+- Industry Focus: {industry_focus}
+- Business Size: {user_profile.get('business_size', 'SME')}
+- Region: {user_profile.get('region', 'Global')}
+
+📝 YOUR CONTENT ANALYSIS:
+- Current Writing Tone: {writing_tone}
+- Primary Content Type: {primary_content_type}
+- Target Demographics: {', '.join(target_demographics) if isinstance(target_demographics, list) else target_demographics}
+- Audience Expertise Level: {expertise_level}
+- Content Purpose: {content_analysis.get('content_type', {}).get('purpose', 'informational')}
+
+🔍 YOUR AUDIENCE INSIGHTS:
+- Pain Points: {audience_insights.get('pain_points', 'time constraints, complexity')}
+- Content Preferences: {audience_insights.get('content_preferences', 'educational, actionable')}
+- Industry Focus: {industry_focus}
+
+🤖 AI RECOMMENDATIONS FOR YOUR SITE:
+- Recommended Tone: {ai_recommendations.get('recommended_tone', writing_tone)}
+- Recommended Content Type: {ai_recommendations.get('recommended_content_type', primary_content_type)}
+- Style Guidelines: {ai_recommendations.get('style_guidelines', 'professional, engaging')}
+
+⚙️ YOUR RESEARCH CONFIGURATION:
+- Research Depth: {research_depth}
+- Content Types: {', '.join(research_config.get('content_types', ['blog', 'article'])) if isinstance(research_config.get('content_types'), list) else research_config.get('content_types', 'blog, article')}
+- Auto Research: {research_config.get('auto_research', True)}
+- Factual Content: {research_config.get('factual_content', True)}
+
+🔧 YOUR AVAILABLE TOOLS:
+- Analytics Services: {', '.join(available_services) if available_services else 'Basic analytics'}
+- API Providers: {', '.join(api_capabilities.get('providers', [])) if api_capabilities.get('providers') else 'Manual tracking'}
+"""
+
+        # Personalized prompt with specific instructions
+        prompt = f"""
+You are a content strategy expert analyzing {website_url}. Based on the detailed analysis of this website and user's onboarding data, generate a personalized content strategy with exactly 30 fields.
+
+{personalization_context}
+
+IMPORTANT: Make each field specific to {website_url} and the user's actual data. Avoid generic placeholder values. Use the real insights from their website analysis.
+
+Generate a JSON object with exactly 30 fields using this exact format:
+
+{{
+"business_objectives": "Specific goals for {website_url} based on {industry_focus} industry",
+"target_metrics": "Realistic KPIs for {user_profile.get('business_size', 'SME')} business",
+"content_budget": 3000,
+"team_size": 3,
+"implementation_timeline": "6 months",
+"market_share": "15%",
+"competitive_position": "Leader",
+"performance_metrics": "Current performance data for {website_url}",
+"content_preferences": "Content formats preferred by {', '.join(target_demographics) if isinstance(target_demographics, list) else target_demographics} audience",
+"consumption_patterns": "When {expertise_level} level audience consumes content",
+"audience_pain_points": "Specific challenges for {industry_focus} professionals",
+"buying_journey": "Customer journey for {industry_focus} industry",
+"seasonal_trends": "Seasonal patterns in {industry_focus}",
+"engagement_metrics": "Expected engagement for {writing_tone} tone content",
+"top_competitors": "Main competitors in {industry_focus} space",
+"competitor_content_strategies": "How competitors approach {primary_content_type} content",
+"market_gaps": "Opportunities in {industry_focus} content market",
+"industry_trends": "Current trends in {industry_focus} industry",
+"emerging_trends": "Upcoming trends for {industry_focus}",
+"preferred_formats": "Formats that work for {expertise_level} audience",
+"content_mix": "Optimal mix for {primary_content_type} focus",
+"content_frequency": "Frequency for {research_depth} research depth",
+"optimal_timing": "Best times for {target_demographics[0] if isinstance(target_demographics, list) and target_demographics else 'your'} audience",
+"quality_metrics": "Quality standards for {writing_tone} content",
+"editorial_guidelines": "Guidelines matching {writing_tone} tone",
+"brand_voice": "{writing_tone.title()}",
+"traffic_sources": "Primary sources for {industry_focus} content",
+"conversion_rates": "Realistic rates for {user_profile.get('business_size', 'SME')}",
+"content_roi_targets": "ROI goals for {industry_focus} content",
+"ab_testing_capabilities": true
+}}
+
+Generate the complete JSON with all 30 fields personalized for {website_url}:
+"""
+        
+        logger.debug("AI Structured Autofill: personalized prompt (%d chars)", len(prompt))
        return prompt

    def _normalize_value(self, key: str, value: Any) -> Any:
@@ -459,7 +582,15 @@ class AIStructuredAutofillService:
            raw_value = last_result.get(key)
            norm_value = self._normalize_value(key, raw_value)
            if norm_value is not None and norm_value != "" and norm_value != []:
-                fields[key] = { 'value': norm_value, 'source': 'ai_refresh', 'confidence': 0.8 }
+                # Add personalization metadata to each field
+                personalized_metadata = self._add_personalization_metadata(key, norm_value, context_summary)
+                fields[key] = { 
+                    'value': norm_value, 
+                    'source': 'ai_refresh', 
+                    'confidence': 0.8,
+                    'personalized': True,
+                    'personalization_data': personalized_metadata
+                }
                sources[key] = 'ai_refresh'
                non_null_keys.append(key)
            else:
@@ -479,31 +610,93 @@ class AIStructuredAutofillService:
            'performance_analytics': ['traffic_sources', 'conversion_rates', 'content_roi_targets', 'ab_testing_capabilities']
        }
        
+        # Log category-wise success rates
        for category, category_fields in field_categories.items():
-            generated_in_category = [f for f in category_fields if f in non_null_keys]
-            missing_in_category = [f for f in category_fields if f in missing_fields]
-            logger.info("📊 %s: %d/%d fields generated (%s missing: %s)", 
-                       category.upper(), len(generated_in_category), len(category_fields), 
-                       len(missing_in_category), missing_in_category)
+            generated_count = len([f for f in category_fields if f in non_null_keys])
+            missing_count = len([f for f in category_fields if f in missing_fields])
+            logger.info(f"📊 {category.upper()}: {generated_count}/{len(category_fields)} fields generated ({missing_count} missing: {[f for f in category_fields if f in missing_fields]})")
        
        success_rate = self._calculate_success_rate(last_result)
+        logger.info(f"AI structured autofill completed | non_null_fields={len(non_null_keys)} missing={len(missing_fields)} success_rate={success_rate:.1f}% attempts={self.max_retries + 1}")

-        payload = {
+        return {
            'fields': fields,
            'sources': sources,
            'meta': {
-                'ai_used': len(non_null_keys) > 0,
+                'ai_used': True,
                'ai_overrides_count': len(non_null_keys),
-                'ai_override_fields': non_null_keys,
-                'ai_only': True,
                'missing_fields': missing_fields,
                'success_rate': success_rate,
-                'attempts': self.max_retries + 1
+                'attempts': self.max_retries + 1,
+                'personalization_level': 'high',
+                'data_sources_used': list(set(sources.values())),
+                'website_analyzed': context_summary.get('user_profile', {}).get('website_url'),
+                'generated_at': datetime.utcnow().isoformat()
+            }
+        }
+
+    def _add_personalization_metadata(self, field_key: str, value: Any, context_summary: Dict[str, Any]) -> Dict[str, Any]:
+        """Add personalization metadata to explain how the value was personalized."""
+        user_profile = context_summary.get('user_profile', {})
+        content_analysis = context_summary.get('content_analysis', {})
+        audience_insights = context_summary.get('audience_insights', {})
+        ai_recommendations = context_summary.get('ai_recommendations', {})
+        
+        website_url = user_profile.get('website_url', 'your website')
+        writing_tone = content_analysis.get('writing_style', {}).get('tone', 'professional')
+        industry_focus = audience_insights.get('industry_focus', 'general')
+        expertise_level = audience_insights.get('expertise_level', 'intermediate')
+        
+        # Create personalized explanation for each field
+        personalization_explanations = {
+            'business_objectives': f"Based on {industry_focus} industry analysis and {user_profile.get('business_size', 'SME')} business profile",
+            'target_metrics': f"Realistic KPIs for {user_profile.get('business_size', 'SME')} business in {industry_focus}",
+            'content_budget': f"Budget recommendation based on {user_profile.get('business_size', 'SME')} scale and {industry_focus} content needs",
+            'team_size': f"Team size optimized for {user_profile.get('business_size', 'SME')} business and {content_analysis.get('content_type', {}).get('primary_type', 'blog')} content",
+            'implementation_timeline': f"Timeline based on {user_profile.get('business_size', 'SME')} resources and {industry_focus} complexity",
+            'market_share': f"Market position analysis for {industry_focus} industry",
+            'competitive_position': f"Competitive analysis for {industry_focus} market",
+            'performance_metrics': f"Current performance data from {website_url} analysis",
+            'content_preferences': f"Formats preferred by {', '.join(audience_insights.get('demographics', ['professionals']))} audience",
+            'consumption_patterns': f"Patterns for {expertise_level} level audience in {industry_focus}",
+            'audience_pain_points': f"Specific challenges for {industry_focus} professionals",
+            'buying_journey': f"Customer journey mapped for {industry_focus} industry",
+            'seasonal_trends': f"Seasonal patterns specific to {industry_focus} content",
+            'engagement_metrics': f"Expected engagement for {writing_tone} tone content",
+            'top_competitors': f"Main competitors in {industry_focus} space",
+            'competitor_content_strategies': f"Competitor analysis for {industry_focus} content strategies",
+            'market_gaps': f"Opportunities identified in {industry_focus} content market",
+            'industry_trends': f"Current trends in {industry_focus} industry",
+            'emerging_trends': f"Upcoming trends for {industry_focus} content",
+            'preferred_formats': f"Formats optimized for {expertise_level} audience",
+            'content_mix': f"Optimal mix for {content_analysis.get('content_type', {}).get('primary_type', 'blog')} focus",
+            'content_frequency': f"Frequency based on {context_summary.get('research_config', {}).get('research_depth', 'Standard')} research depth",
+            'optimal_timing': f"Best times for {audience_insights.get('demographics', ['professionals'])[0] if isinstance(audience_insights.get('demographics'), list) and audience_insights.get('demographics') else 'your'} audience",
+            'quality_metrics': f"Quality standards for {writing_tone} content",
+            'editorial_guidelines': f"Guidelines matching {writing_tone} tone from {website_url} analysis",
+            'brand_voice': f"Voice derived from {writing_tone} tone analysis of {website_url}",
+            'traffic_sources': f"Primary sources for {industry_focus} content",
+            'conversion_rates': f"Realistic rates for {user_profile.get('business_size', 'SME')} business",
+            'content_roi_targets': f"ROI goals for {industry_focus} content",
+            'ab_testing_capabilities': f"A/B testing availability based on {user_profile.get('business_size', 'SME')} capabilities"
+        }
+        
+        return {
+            'explanation': personalization_explanations.get(field_key, f"Personalized for {website_url}"),
+            'data_sources': {
+                'website_analysis': bool(context_summary.get('content_analysis')),
+                'audience_insights': bool(context_summary.get('audience_insights')),
+                'ai_recommendations': bool(context_summary.get('ai_recommendations')),
+                'research_config': bool(context_summary.get('research_config'))
+            },
+            'personalization_factors': {
+                'website_url': website_url,
+                'industry_focus': industry_focus,
+                'writing_tone': writing_tone,
+                'expertise_level': expertise_level,
+                'business_size': user_profile.get('business_size', 'SME')
            }
        }
-        logger.info("AI structured autofill completed | non_null_fields=%d missing=%d success_rate=%.1f%% attempts=%d", 
-                   len(non_null_keys), len(missing_fields), success_rate, self.max_retries + 1)
-        return payload

    def _extract_fields_from_raw_response(self, result: Dict[str, Any]) -> Dict[str, Any]:
        """Extract fields from malformed JSON response using regex patterns."""