From 13ca78f653a1c5870c4bf761a4b617bab7f9617c Mon Sep 17 00:00:00 2001 From: ajaysi Date: Sun, 10 Aug 2025 13:10:32 +0530 Subject: [PATCH] ALwrity version 0.5.4 --- .../api/enhanced_strategy_routes.py | 129 ++++- .../content_strategy/autofill/ai_refresh.py | 13 +- .../autofill/ai_structured_autofill.py | 491 +++++++++++++++--- .../services/enhanced_strategy_service.py | 32 +- backend/services/ai_service_manager.py | 5 +- .../services/llm_providers/gemini_provider.py | 178 ++++++- .../components/ContentStrategyBuilder.tsx | 62 ++- .../StrategicInputField.tsx | 4 +- frontend/src/stores/enhancedStrategyStore.ts | 4 +- 9 files changed, 801 insertions(+), 117 deletions(-) diff --git a/backend/api/content_planning/api/enhanced_strategy_routes.py b/backend/api/content_planning/api/enhanced_strategy_routes.py index 941c1dd2..6b4b551e 100644 --- a/backend/api/content_planning/api/enhanced_strategy_routes.py +++ b/backend/api/content_planning/api/enhanced_strategy_routes.py @@ -13,6 +13,7 @@ import asyncio from datetime import datetime, timedelta from collections import defaultdict import time +import re # Import database from services.database import get_db_session @@ -339,24 +340,138 @@ async def create_enhanced_strategy( try: logger.info("🚀 Creating enhanced content strategy") - # Validate required fields + # Basic required checks if not strategy_data.get('user_id'): raise HTTPException(status_code=400, detail="user_id is required") - if not strategy_data.get('name'): raise HTTPException(status_code=400, detail="strategy name is required") - - # Create enhanced strategy + + def parse_float(value: Any) -> Optional[float]: + if value is None: + return None + if isinstance(value, (int, float)): + return float(value) + if isinstance(value, str): + s = value.strip().lower().replace(",", "") + # Handle percentage + if s.endswith('%'): + try: + return float(s[:-1]) + except Exception: + pass + # Handle k/m suffix + mul = 1.0 + if s.endswith('k'): + mul = 1_000.0 + s = s[:-1] + elif s.endswith('m'): + mul = 1_000_000.0 + s = s[:-1] + m = re.search(r"[-+]?\d*\.?\d+", s) + if m: + try: + return float(m.group(0)) * mul + except Exception: + return None + return None + + def parse_int(value: Any) -> Optional[int]: + f = parse_float(value) + if f is None: + return None + try: + return int(round(f)) + except Exception: + return None + + def parse_json(value: Any) -> Optional[Any]: + if value is None: + return None + if isinstance(value, (dict, list)): + return value + if isinstance(value, str): + try: + return json.loads(value) + except Exception: + # Accept plain strings in JSON columns + return value + return None + + def parse_array(value: Any) -> Optional[list]: + if value is None: + return None + if isinstance(value, list): + return value + if isinstance(value, str): + # Try JSON first + try: + j = json.loads(value) + if isinstance(j, list): + return j + except Exception: + pass + parts = [p.strip() for p in value.split(',') if p.strip()] + return parts if parts else None + return None + + # Coerce and validate fields + warnings: Dict[str, str] = {} + cleaned = dict(strategy_data) + + # Numerics + content_budget = parse_float(strategy_data.get('content_budget')) + if strategy_data.get('content_budget') is not None and content_budget is None: + warnings['content_budget'] = 'Could not parse number; saved as null' + cleaned['content_budget'] = content_budget + + team_size = parse_int(strategy_data.get('team_size')) + if strategy_data.get('team_size') is not None and team_size is None: + warnings['team_size'] = 'Could not parse integer; saved as null' + cleaned['team_size'] = team_size + + # Arrays + preferred_formats = parse_array(strategy_data.get('preferred_formats')) + if strategy_data.get('preferred_formats') is not None and preferred_formats is None: + warnings['preferred_formats'] = 'Could not parse list; saved as null' + cleaned['preferred_formats'] = preferred_formats + + # JSON fields + json_fields = [ + 'business_objectives','target_metrics','performance_metrics','content_preferences', + 'consumption_patterns','audience_pain_points','buying_journey','seasonal_trends', + 'engagement_metrics','top_competitors','competitor_content_strategies','market_gaps', + 'industry_trends','emerging_trends','content_mix','optimal_timing','quality_metrics', + 'editorial_guidelines','brand_voice','traffic_sources','conversion_rates','content_roi_targets', + 'target_audience','content_pillars','ai_recommendations' + ] + for field in json_fields: + raw = strategy_data.get(field) + parsed = parse_json(raw) + # parsed may be a plain string; accept it + cleaned[field] = parsed + + # Booleans + if 'ab_testing_capabilities' in strategy_data: + cleaned['ab_testing_capabilities'] = bool(strategy_data.get('ab_testing_capabilities')) + + # Early return on validation errors + if warnings: + logger.warning(f"â„šī¸ Strategy create warnings: {warnings}") + + # Proceed with create using cleaned data db_service = EnhancedStrategyDBService(db) enhanced_service = EnhancedStrategyService(db_service) - created_strategy = await enhanced_service.create_enhanced_strategy(strategy_data, db) + created_strategy = await enhanced_service.create_enhanced_strategy(cleaned, db) - logger.info(f"✅ Enhanced strategy created successfully: {created_strategy.get('id')}") + logger.info(f"✅ Enhanced strategy created successfully: {created_strategy.get('id') if isinstance(created_strategy, dict) else getattr(created_strategy,'id', None)}") - return ResponseBuilder.create_success_response( + resp = ResponseBuilder.create_success_response( message="Enhanced content strategy created successfully", data=created_strategy ) + if warnings: + resp['warnings'] = warnings + return resp except HTTPException: raise diff --git a/backend/api/content_planning/services/content_strategy/autofill/ai_refresh.py b/backend/api/content_planning/services/content_strategy/autofill/ai_refresh.py index 58b02902..4cca9f5b 100644 --- a/backend/api/content_planning/services/content_strategy/autofill/ai_refresh.py +++ b/backend/api/content_planning/services/content_strategy/autofill/ai_refresh.py @@ -54,7 +54,18 @@ class AutoFillRefreshService: except Exception as e: logger.error("AI-only structured generation failed | user=%s | err=%s", user_id, repr(e)) logger.error("Traceback:\n%s", traceback.format_exc()) - raise + # Return graceful fallback instead of raising + return { + 'fields': {}, + 'sources': {}, + 'meta': { + 'ai_used': False, + 'ai_overrides_count': 0, + 'ai_override_fields': [], + 'ai_only': True, + 'error': str(e) + } + } # Fallback to previous behavior (DB + sparse overrides) payload = await self.autofill.get_autofill(user_id) diff --git a/backend/api/content_planning/services/content_strategy/autofill/ai_structured_autofill.py b/backend/api/content_planning/services/content_strategy/autofill/ai_structured_autofill.py index 0f251272..523091ee 100644 --- a/backend/api/content_planning/services/content_strategy/autofill/ai_structured_autofill.py +++ b/backend/api/content_planning/services/content_strategy/autofill/ai_structured_autofill.py @@ -7,27 +7,53 @@ from services.ai_service_manager import AIServiceManager, AIServiceType logger = logging.getLogger(__name__) +# Complete core fields - all 30+ fields that the frontend expects CORE_FIELDS = [ - 'business_objectives','target_metrics','content_budget','team_size','implementation_timeline', - 'market_share','competitive_position','performance_metrics','content_preferences','consumption_patterns', - 'audience_pain_points','buying_journey','seasonal_trends','engagement_metrics','top_competitors', - 'competitor_content_strategies','market_gaps','industry_trends','emerging_trends','preferred_formats', - 'content_mix','content_frequency','optimal_timing','quality_metrics','editorial_guidelines','brand_voice', - 'traffic_sources','conversion_rates','content_roi_targets','ab_testing_capabilities' + # Business Context (8 fields) + 'business_objectives', 'target_metrics', 'content_budget', 'team_size', 'implementation_timeline', + 'market_share', 'competitive_position', 'performance_metrics', + + # Audience Intelligence (6 fields) + 'content_preferences', 'consumption_patterns', 'audience_pain_points', + 'buying_journey', 'seasonal_trends', 'engagement_metrics', + + # Competitive Intelligence (5 fields) + 'top_competitors', 'competitor_content_strategies', 'market_gaps', 'industry_trends', 'emerging_trends', + + # Content Strategy (7 fields) + 'preferred_formats', 'content_mix', 'content_frequency', 'optimal_timing', + 'quality_metrics', 'editorial_guidelines', 'brand_voice', + + # Performance & Analytics (4 fields) + 'traffic_sources', 'conversion_rates', 'content_roi_targets', 'ab_testing_capabilities' ] JSON_FIELDS = { - 'business_objectives', 'target_metrics', 'content_preferences' + 'business_objectives', 'target_metrics', 'content_preferences', 'consumption_patterns', + 'audience_pain_points', 'buying_journey', 'seasonal_trends', 'engagement_metrics', + 'competitor_content_strategies', 'market_gaps', 'industry_trends', 'emerging_trends', + 'content_mix', 'optimal_timing', 'quality_metrics', 'editorial_guidelines', + 'conversion_rates', 'content_roi_targets', 'performance_metrics' } + ARRAY_FIELDS = { - 'preferred_formats' + 'preferred_formats', 'top_competitors', 'market_gaps', 'industry_trends', 'traffic_sources' +} + +# Select field options mapping for value normalization +SELECT_FIELD_OPTIONS = { + 'implementation_timeline': ['3 months', '6 months', '1 year', '2 years', 'Ongoing'], + 'competitive_position': ['Leader', 'Challenger', 'Niche', 'Emerging'], + 'content_frequency': ['Daily', 'Weekly', 'Bi-weekly', 'Monthly', 'Quarterly'], + 'brand_voice': ['Professional', 'Casual', 'Friendly', 'Authoritative', 'Innovative'] } class AIStructuredAutofillService: - """Generate the complete 30+ Strategy Builder fields strictly from AI using onboarding context only.""" + """Generate the complete Strategy Builder fields strictly from AI using onboarding context only.""" def __init__(self) -> None: self.ai = AIServiceManager() + self.max_retries = 2 # Maximum retry attempts for malformed JSON def _build_context_summary(self, context: Dict[str, Any]) -> Dict[str, Any]: website = context.get('website_analysis') or {} @@ -73,89 +99,353 @@ class AIStructuredAutofillService: return summary def _build_schema(self) -> Dict[str, Any]: - # Build a Gemini SDK-compatible Schema (dict equivalent), not JSON Schema. - # Avoid unsupported keys like oneOf/additionalProperties. + # Simplified schema following Gemini best practices + # Reduce complexity by flattening nested structures and simplifying constraints properties: Dict[str, Any] = {} - typed_overrides: Dict[str, Any] = { - # Use STRING for complex JSON-bearing fields to avoid OBJECT property constraints - 'business_objectives': {"type": "STRING"}, - 'target_metrics': {"type": "STRING"}, - 'content_preferences': {"type": "STRING"}, - # Known arrays - 'preferred_formats': {"type": "ARRAY", "items": {"type": "STRING"}}, - # Known selects - 'content_frequency': {"type": "STRING"}, + + # Simplified field definitions - avoid complex constraints that cause 400 errors + field_definitions = { + # Core business fields (simplified) + 'business_objectives': {"type": "STRING", "description": "Business goals and objectives"}, + 'target_metrics': {"type": "STRING", "description": "KPIs and success metrics"}, + 'content_budget': {"type": "NUMBER", "description": "Monthly content budget in dollars"}, + 'team_size': {"type": "NUMBER", "description": "Number of people in content team"}, + 'implementation_timeline': {"type": "STRING", "description": "Strategy implementation timeline"}, + 'market_share': {"type": "STRING", "description": "Current market share percentage"}, + 'competitive_position': {"type": "STRING", "description": "Market competitive position"}, + 'performance_metrics': {"type": "STRING", "description": "Current performance data"}, + + # Audience fields (simplified) + 'content_preferences': {"type": "STRING", "description": "Content format and topic preferences"}, + 'consumption_patterns': {"type": "STRING", "description": "When and how audience consumes content"}, + 'audience_pain_points': {"type": "STRING", "description": "Key audience challenges and pain points"}, + 'buying_journey': {"type": "STRING", "description": "Customer journey stages and touchpoints"}, + 'seasonal_trends': {"type": "STRING", "description": "Seasonal content patterns and trends"}, + 'engagement_metrics': {"type": "STRING", "description": "Current engagement data and metrics"}, + + # Competitive fields (simplified) + 'top_competitors': {"type": "STRING", "description": "Main competitors"}, + 'competitor_content_strategies': {"type": "STRING", "description": "Analysis of competitor content approaches"}, + 'market_gaps': {"type": "STRING", "description": "Market opportunities and gaps"}, + 'industry_trends': {"type": "STRING", "description": "Current industry trends"}, + 'emerging_trends': {"type": "STRING", "description": "Upcoming trends and opportunities"}, + + # Content strategy fields (simplified) + 'preferred_formats': {"type": "STRING", "description": "Preferred content formats"}, + 'content_mix': {"type": "STRING", "description": "Content mix distribution"}, + 'content_frequency': {"type": "STRING", "description": "Content publishing frequency"}, + 'optimal_timing': {"type": "STRING", "description": "Best times for publishing content"}, + 'quality_metrics': {"type": "STRING", "description": "Content quality standards and metrics"}, + 'editorial_guidelines': {"type": "STRING", "description": "Style and tone guidelines"}, + 'brand_voice': {"type": "STRING", "description": "Brand voice and tone"}, + + # Performance fields (simplified) + 'traffic_sources': {"type": "STRING", "description": "Primary traffic sources"}, + 'conversion_rates': {"type": "STRING", "description": "Target conversion rates and metrics"}, + 'content_roi_targets': {"type": "STRING", "description": "ROI goals and targets for content"}, + 'ab_testing_capabilities': {"type": "BOOLEAN", "description": "Whether A/B testing capabilities are available"} } - for key in CORE_FIELDS: - properties[key] = typed_overrides.get(key, {"type": "STRING"}) + + # Build properties from field definitions + for field_id in CORE_FIELDS: + if field_id in field_definitions: + properties[field_id] = field_definitions[field_id] + else: + # Fallback for any missing fields + properties[field_id] = {"type": "STRING", "description": f"Value for {field_id}"} + + # Use propertyOrdering as recommended by Gemini docs for consistent output schema = { "type": "OBJECT", "properties": properties, - # Property ordering can help response consistency per Gemini docs - "propertyOrdering": CORE_FIELDS, + "required": CORE_FIELDS, # Make all fields required + "propertyOrdering": CORE_FIELDS, # Critical for consistent JSON output + "description": "Content strategy fields with simplified constraints" } - logger.debug("AI Structured Autofill: schema built (SDK) with %d properties", len(CORE_FIELDS)) + + logger.debug("AI Structured Autofill: simplified schema built with %d properties and property ordering", len(CORE_FIELDS)) return schema def _build_prompt(self, context_summary: Dict[str, Any]) -> str: + # Ultra-simplified prompt to avoid JSON parsing issues prompt = ( - "You are a senior content strategy system. Using ONLY the provided context (do not copy raw\n" - "values), infer professional, actionable values for ALL of the following 30+ strategy fields.\n" - "Output strictly valid JSON matching the given schema. Provide concise, business-ready values.\n" - "If you are uncertain, infer the most reasonable assumption for a small business. Do not leave\n" - "fields empty.\n\n" - f"CONTEXT:\n{json.dumps(context_summary, indent=2)}\n\n" - "FIELDS TO PRODUCE (keys only; values inferred):\n" - f"{CORE_FIELDS}\n" + "Generate a JSON object with exactly 30 fields for content strategy. Use this exact format:\n\n" + + '{\n' + '"business_objectives": "Increase traffic and leads",\n' + '"target_metrics": "25% growth, 15% conversion",\n' + '"content_budget": 3000,\n' + '"team_size": 3,\n' + '"implementation_timeline": "6 months",\n' + '"market_share": "15%",\n' + '"competitive_position": "Leader",\n' + '"performance_metrics": "Current metrics data",\n' + '"content_preferences": "Blog posts, videos",\n' + '"consumption_patterns": "Peak hours 9-11 AM",\n' + '"audience_pain_points": "Time constraints, complexity",\n' + '"buying_journey": "Awareness to Decision",\n' + '"seasonal_trends": "Q1 planning, Q2 execution",\n' + '"engagement_metrics": "3.5% engagement rate",\n' + '"top_competitors": "Competitor A, B, C",\n' + '"competitor_content_strategies": "Educational content approach",\n' + '"market_gaps": "AI tools, automation guides",\n' + '"industry_trends": "AI integration, video content",\n' + '"emerging_trends": "Voice search, interactive content",\n' + '"preferred_formats": "Blog posts, videos, infographics",\n' + '"content_mix": "70% educational, 30% promotional",\n' + '"content_frequency": "Weekly",\n' + '"optimal_timing": "Tuesday/Thursday 10 AM",\n' + '"quality_metrics": "SEO score >90, engagement >3%",\n' + '"editorial_guidelines": "Professional tone, actionable insights",\n' + '"brand_voice": "Professional",\n' + '"traffic_sources": "Organic search, social media",\n' + '"conversion_rates": "15% conversion, $200 CPA",\n' + '"content_roi_targets": "15% conversion, 3:1 ROI",\n' + '"ab_testing_capabilities": true\n' + '}\n\n' + + f"Business context: {json.dumps(context_summary, indent=2)}\n\n" + "Generate the complete JSON with all 30 fields:" ) - logger.debug("AI Structured Autofill: prompt preview=%d chars", len(prompt)) + logger.debug("AI Structured Autofill: ultra-simplified prompt (%d chars)", len(prompt)) return prompt def _normalize_value(self, key: str, value: Any) -> Any: if value is None: return None - # Parse JSON-bearing fields if they arrived as JSON strings - if key in JSON_FIELDS: + + # Handle numeric fields that might come as text + if key in ['content_budget', 'team_size']: + if isinstance(value, (int, float)): + return value + elif isinstance(value, str): + # Extract numeric value from text + import re + # Remove currency symbols, commas, and common words + cleaned = re.sub(r'[$,â‚ŦÂŖÂĨ]', '', value.lower()) + cleaned = re.sub(r'\b(monthly|yearly|annual|people|person|specialist|creator|writer|editor|team|member)\b', '', cleaned) + cleaned = re.sub(r'\s+', ' ', cleaned).strip() + + # Extract first number found + numbers = re.findall(r'\d+(?:\.\d+)?', cleaned) + if numbers: + try: + num_value = float(numbers[0]) + # For team_size, convert to integer + if key == 'team_size': + return int(num_value) + return num_value + except (ValueError, TypeError): + pass + + logger.warning(f"Could not extract numeric value from '{key}' field: '{value}'") + return None + + # Handle boolean fields + if key == 'ab_testing_capabilities': + if isinstance(value, bool): + return value + elif isinstance(value, str): + normalized_value = value.lower().strip() + if normalized_value in ['true', 'yes', 'available', 'enabled', '1']: + return True + elif normalized_value in ['false', 'no', 'unavailable', 'disabled', '0']: + return False + logger.warning(f"Could not parse boolean value for '{key}': '{value}'") + return None + + # Handle select fields with predefined options + if key in SELECT_FIELD_OPTIONS: if isinstance(value, str): - try: - return json.loads(value) - except Exception: - # Keep as string if not valid JSON - return value - return value - # Coerce arrays from comma-separated strings where applicable - if key in ARRAY_FIELDS: - if isinstance(value, str): - split = [s.strip() for s in value.split(',') if s.strip()] - return split if split else None - if isinstance(value, list): - return [str(v) for v in value] - return None - return value + # Try exact match first (case-insensitive) + normalized_value = value.lower().strip() + for option in SELECT_FIELD_OPTIONS[key]: + if normalized_value == option.lower(): + return option + + # Try partial matching for common variations + for option in SELECT_FIELD_OPTIONS[key]: + option_lower = option.lower() + # Handle common variations + if (normalized_value.startswith(option_lower) or + option_lower in normalized_value or + normalized_value.endswith(option_lower)): + return option + + # Special handling for content_frequency + if key == 'content_frequency': + if 'daily' in normalized_value: + return 'Daily' + elif 'weekly' in normalized_value or 'week' in normalized_value: + return 'Weekly' + elif 'bi-weekly' in normalized_value or 'biweekly' in normalized_value: + return 'Bi-weekly' + elif 'monthly' in normalized_value or 'month' in normalized_value: + return 'Monthly' + elif 'quarterly' in normalized_value or 'quarter' in normalized_value: + return 'Quarterly' + + # If no match found, return the first option as fallback + logger.warning(f"Could not normalize select field '{key}' value: '{value}' to valid options: {SELECT_FIELD_OPTIONS[key]}") + return SELECT_FIELD_OPTIONS[key][0] # Return first option as fallback + + # For all other fields, ensure they're strings and not empty + if isinstance(value, str): + # Special handling for multiselect fields + if key in ['preferred_formats', 'top_competitors', 'market_gaps', 'industry_trends', 'traffic_sources']: + # Split by comma and clean up each item + items = [item.strip() for item in value.split(',') if item.strip()] + if items: + return items # Return as array for multiselect fields + return None + return value.strip() if value.strip() else None + elif isinstance(value, (int, float, bool)): + return str(value) + elif isinstance(value, list): + # For multiselect fields, return the list as-is + if key in ['preferred_formats', 'top_competitors', 'market_gaps', 'industry_trends', 'traffic_sources']: + return [str(item) for item in value if item] + # For other fields, convert arrays to comma-separated strings + return ', '.join(str(item) for item in value if item) + else: + return str(value) if value else None + + def _calculate_success_rate(self, result: Dict[str, Any]) -> float: + """Calculate the percentage of successfully filled fields.""" + if not isinstance(result, dict): + return 0.0 + + filled_fields = 0 + for key in CORE_FIELDS: + value = result.get(key) + if value is not None and value != "" and value != []: + # Additional checks for different data types + if isinstance(value, str) and value.strip(): + filled_fields += 1 + elif isinstance(value, (int, float)) and value != 0: + filled_fields += 1 + elif isinstance(value, bool): + filled_fields += 1 + elif isinstance(value, list) and len(value) > 0: + filled_fields += 1 + elif value is not None and value != "": + filled_fields += 1 + + return (filled_fields / len(CORE_FIELDS)) * 100 + + def _should_retry(self, result: Dict[str, Any], attempt: int) -> bool: + """Determine if we should retry based on success rate and attempt count.""" + if attempt >= self.max_retries: + return False + + # Check if result has error + if 'error' in result: + logger.info(f"Retry attempt {attempt + 1} due to error: {result.get('error')}") + return True + + # Check success rate - stop immediately if we have 100% success + success_rate = self._calculate_success_rate(result) + logger.info(f"Success rate: {success_rate:.1f}% (attempt {attempt + 1})") + + # If we have 100% success, don't retry + if success_rate >= 100.0: + logger.info(f"Perfect success rate achieved: {success_rate:.1f}% - no retry needed") + return False + + # Retry if success rate is below 80% (more aggressive than 50%) + if success_rate < 80.0: + logger.info(f"Retry attempt {attempt + 1} due to low success rate: {success_rate:.1f}% (need 80%+)") + return True + + # Also retry if we're missing more than 6 fields (20% of 30 fields) + missing_count = len([k for k in CORE_FIELDS if not result.get(k) or result.get(k) == "" or result.get(k) == []]) + if missing_count > 6: + logger.info(f"Retry attempt {attempt + 1} due to too many missing fields: {missing_count} missing (max 6)") + return True + + return False async def generate_autofill_fields(self, user_id: int, context: Dict[str, Any]) -> Dict[str, Any]: context_summary = self._build_context_summary(context) schema = self._build_schema() prompt = self._build_prompt(context_summary) - logger.info("AIStructuredAutofillService: generating 30+ fields | user=%s", user_id) + logger.info("AIStructuredAutofillService: generating %d fields | user=%s", len(CORE_FIELDS), user_id) logger.debug("AIStructuredAutofillService: properties=%d", len(schema.get('properties', {}))) - try: - result = await self.ai.execute_structured_json_call( - service_type=AIServiceType.STRATEGIC_INTELLIGENCE, - prompt=prompt, - schema=schema - ) - except Exception as e: - logger.error("AI structured call failed | user=%s | err=%s", user_id, repr(e)) - logger.error("Traceback:\n%s", traceback.format_exc()) - raise + + last_result = None + for attempt in range(self.max_retries + 1): + try: + logger.info(f"AI structured call attempt {attempt + 1}/{self.max_retries + 1}") + result = await self.ai.execute_structured_json_call( + service_type=AIServiceType.STRATEGIC_INTELLIGENCE, + prompt=prompt, + schema=schema + ) + last_result = result + + # Check if we should retry + if not self._should_retry(result, attempt): + break + + # Add a small delay before retry + if attempt < self.max_retries: + import asyncio + await asyncio.sleep(1) + + except Exception as e: + logger.error(f"AI structured call failed (attempt {attempt + 1}) | user=%s | err=%s", user_id, repr(e)) + logger.error("Traceback:\n%s", traceback.format_exc()) + last_result = { + 'error': str(e) + } + if attempt < self.max_retries: + import asyncio + await asyncio.sleep(1) + continue + break - if not isinstance(result, dict): - raise ValueError("AI did not return a structured JSON object") + # Process the final result + if not isinstance(last_result, dict): + logger.warning("AI did not return a structured JSON object, got: %s", type(last_result)) + return { + 'fields': {}, + 'sources': {}, + 'meta': { + 'ai_used': False, + 'ai_overrides_count': 0, + 'missing_fields': CORE_FIELDS, + 'error': f"AI returned {type(last_result)} instead of dict", + 'attempts': self.max_retries + 1 + } + } + + # Check if AI returned an error + if 'error' in last_result: + logger.warning("AI returned error after all attempts: %s", last_result.get('error')) + return { + 'fields': {}, + 'sources': {}, + 'meta': { + 'ai_used': False, + 'ai_overrides_count': 0, + 'missing_fields': CORE_FIELDS, + 'error': last_result.get('error', 'Unknown AI error'), + 'attempts': self.max_retries + 1 + } + } + + # Try to extract fields from malformed JSON if needed + if len(last_result) < len(CORE_FIELDS) * 0.5: # If we got less than 50% of fields + logger.warning("AI returned incomplete result, attempting to extract from raw response") + # Try to extract key-value pairs from the raw response + extracted_result = self._extract_fields_from_raw_response(last_result) + if extracted_result and len(extracted_result) > len(last_result): + logger.info("Successfully extracted additional fields from raw response") + last_result = extracted_result try: - logger.debug("AI structured result keys=%d | sample keys=%s", len(list(result.keys())), list(result.keys())[:8]) + logger.debug("AI structured result keys=%d | sample keys=%s", len(list(last_result.keys())), list(last_result.keys())[:8]) except Exception: pass @@ -163,14 +453,40 @@ class AIStructuredAutofillService: fields: Dict[str, Any] = {} sources: Dict[str, str] = {} non_null_keys = [] + missing_fields = [] + for key in CORE_FIELDS: - raw_value = result.get(key) + raw_value = last_result.get(key) norm_value = self._normalize_value(key, raw_value) if norm_value is not None and norm_value != "" and norm_value != []: fields[key] = { 'value': norm_value, 'source': 'ai_refresh', 'confidence': 0.8 } sources[key] = 'ai_refresh' non_null_keys.append(key) - missing_fields = [k for k in CORE_FIELDS if k not in non_null_keys] + else: + missing_fields.append(key) + + # Log detailed field analysis + logger.info("AI structured autofill field analysis:") + logger.info("✅ Generated fields (%d): %s", len(non_null_keys), non_null_keys) + logger.info("❌ Missing fields (%d): %s", len(missing_fields), missing_fields) + + # Categorize missing fields + field_categories = { + 'business_context': ['business_objectives', 'target_metrics', 'content_budget', 'team_size', 'implementation_timeline', 'market_share', 'competitive_position', 'performance_metrics'], + 'audience_intelligence': ['content_preferences', 'consumption_patterns', 'audience_pain_points', 'buying_journey', 'seasonal_trends', 'engagement_metrics'], + 'competitive_intelligence': ['top_competitors', 'competitor_content_strategies', 'market_gaps', 'industry_trends', 'emerging_trends'], + 'content_strategy': ['preferred_formats', 'content_mix', 'content_frequency', 'optimal_timing', 'quality_metrics', 'editorial_guidelines', 'brand_voice'], + 'performance_analytics': ['traffic_sources', 'conversion_rates', 'content_roi_targets', 'ab_testing_capabilities'] + } + + for category, category_fields in field_categories.items(): + generated_in_category = [f for f in category_fields if f in non_null_keys] + missing_in_category = [f for f in category_fields if f in missing_fields] + logger.info("📊 %s: %d/%d fields generated (%s missing: %s)", + category.upper(), len(generated_in_category), len(category_fields), + len(missing_in_category), missing_in_category) + + success_rate = self._calculate_success_rate(last_result) payload = { 'fields': fields, @@ -180,8 +496,43 @@ class AIStructuredAutofillService: 'ai_overrides_count': len(non_null_keys), 'ai_override_fields': non_null_keys, 'ai_only': True, - 'missing_fields': missing_fields + 'missing_fields': missing_fields, + 'success_rate': success_rate, + 'attempts': self.max_retries + 1 } } - logger.info("AI structured autofill completed | non_null_fields=%d missing=%d", len(non_null_keys), len(missing_fields)) - return payload \ No newline at end of file + logger.info("AI structured autofill completed | non_null_fields=%d missing=%d success_rate=%.1f%% attempts=%d", + len(non_null_keys), len(missing_fields), success_rate, self.max_retries + 1) + return payload + + def _extract_fields_from_raw_response(self, result: Dict[str, Any]) -> Dict[str, Any]: + """Extract fields from malformed JSON response using regex patterns.""" + import re + + # Convert result to string for pattern matching + result_str = str(result) + + extracted = {} + + # Pattern to match key-value pairs in JSON-like format + patterns = [ + r'"([^"]+)":\s*"([^"]*)"', # String values + r'"([^"]+)":\s*(\d+(?:\.\d+)?)', # Numeric values + r'"([^"]+)":\s*(true|false)', # Boolean values + r'"([^"]+)":\s*\[([^\]]*)\]', # Array values + ] + + for pattern in patterns: + matches = re.findall(pattern, result_str) + for key, value in matches: + if key in CORE_FIELDS: + # Clean up the value + if value.lower() in ['true', 'false']: + extracted[key] = value.lower() == 'true' + elif value.replace('.', '').isdigit(): + extracted[key] = float(value) if '.' in value else int(value) + else: + extracted[key] = value.strip('"') + + logger.info("Extracted %d fields from raw response: %s", len(extracted), list(extracted.keys())) + return extracted \ No newline at end of file diff --git a/backend/api/content_planning/services/enhanced_strategy_service.py b/backend/api/content_planning/services/enhanced_strategy_service.py index ce2f1259..bd157add 100644 --- a/backend/api/content_planning/services/enhanced_strategy_service.py +++ b/backend/api/content_planning/services/enhanced_strategy_service.py @@ -802,6 +802,9 @@ class EnhancedStrategyService: payload = await service.get_autofill(user_id) logger.info(f"Retrieved comprehensive onboarding data for user {user_id}") return payload + except Exception as e: + logger.error(f"Error getting onboarding data: {str(e)}") + raise finally: temp_db.close() except Exception as e: @@ -821,15 +824,15 @@ class EnhancedStrategyService: if 'content_goals' in website_data and website_data.get('content_goals'): fields['business_objectives'] = { 'value': website_data.get('content_goals'), - 'source': 'website_analysis', + 'source': 'website_analysis', 'confidence': website_data.get('confidence_level') - } + } # Prefer explicit target_metrics; otherwise derive from performance_metrics if website_data.get('target_metrics'): fields['target_metrics'] = { 'value': website_data.get('target_metrics'), - 'source': 'website_analysis', + 'source': 'website_analysis', 'confidence': website_data.get('confidence_level') } elif website_data.get('performance_metrics'): @@ -843,35 +846,35 @@ class EnhancedStrategyService: if website_data.get('content_budget') is not None: fields['content_budget'] = { 'value': website_data.get('content_budget'), - 'source': 'website_analysis', + 'source': 'website_analysis', 'confidence': website_data.get('confidence_level') } elif isinstance(session_data, dict) and session_data.get('budget') is not None: fields['content_budget'] = { 'value': session_data.get('budget'), 'source': 'onboarding_session', - 'confidence': 0.7 - } + 'confidence': 0.7 + } # Team size: website data preferred, else onboarding session team_size if website_data.get('team_size') is not None: fields['team_size'] = { 'value': website_data.get('team_size'), - 'source': 'website_analysis', + 'source': 'website_analysis', 'confidence': website_data.get('confidence_level') } elif isinstance(session_data, dict) and session_data.get('team_size') is not None: fields['team_size'] = { 'value': session_data.get('team_size'), 'source': 'onboarding_session', - 'confidence': 0.7 - } + 'confidence': 0.7 + } # Implementation timeline: website data preferred, else onboarding session timeline if website_data.get('implementation_timeline'): fields['implementation_timeline'] = { 'value': website_data.get('implementation_timeline'), - 'source': 'website_analysis', + 'source': 'website_analysis', 'confidence': website_data.get('confidence_level') } elif isinstance(session_data, dict) and session_data.get('timeline'): @@ -885,15 +888,15 @@ class EnhancedStrategyService: if website_data.get('market_share'): fields['market_share'] = { 'value': website_data.get('market_share'), - 'source': 'website_analysis', + 'source': 'website_analysis', 'confidence': website_data.get('confidence_level') } elif website_data.get('performance_metrics'): fields['market_share'] = { 'value': website_data.get('performance_metrics').get('estimated_market_share', None), - 'source': 'website_analysis', + 'source': 'website_analysis', 'confidence': website_data.get('confidence_level') - } + } fields['performance_metrics'] = { 'value': website_data.get('performance_metrics', {}), @@ -1179,5 +1182,4 @@ class EnhancedStrategyService: 'error_rates': {}, 'throughput_metrics': {} } - # No further action required - return \ No newline at end of file + # No further action required \ No newline at end of file diff --git a/backend/services/ai_service_manager.py b/backend/services/ai_service_manager.py index f1a69451..e7606c21 100644 --- a/backend/services/ai_service_manager.py +++ b/backend/services/ai_service_manager.py @@ -522,7 +522,10 @@ Format as structured JSON with detailed assessment and optimization guidance. error_message=error_message ) self.metrics.append(metrics) - raise Exception(error_message) + # Don't raise JSON decode errors as fatal - let the calling code handle them + # The Gemini provider should have already attempted to repair malformed JSON + result = {"error": error_message, "raw_response": str(e)} + success = False except Exception as e: error_message = f"AI call error for {service_type.value}: {str(e)}" logger.error(error_message) diff --git a/backend/services/llm_providers/gemini_provider.py b/backend/services/llm_providers/gemini_provider.py index c418f8aa..9105883f 100644 --- a/backend/services/llm_providers/gemini_provider.py +++ b/backend/services/llm_providers/gemini_provider.py @@ -241,6 +241,7 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9, if parsed: return parsed if isinstance(parsed, dict) else json.loads(json.dumps(parsed)) text = (response.text or '').strip() + # Strip markdown code fences if present if text.startswith('```'): # remove leading ```json or ``` and trailing ``` @@ -251,10 +252,14 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9, if text.endswith('```'): text = text[:-3] text = text.strip() + + # Try direct JSON parsing first try: return json.loads(text) - except json.JSONDecodeError: - # Fallback: extract likely JSON object substring + except json.JSONDecodeError as e: + logger.warning(f"Direct JSON parsing failed: {e}") + + # Fallback 1: Extract likely JSON object substring first = text.find('{') last = text.rfind('}') if first != -1 and last != -1 and last > first: @@ -262,17 +267,176 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9, try: return json.loads(candidate) except json.JSONDecodeError: - pass - # Final fallback: regex any object + logger.warning("JSON object extraction failed, trying regex") + + # Fallback 2: Regex any object import re match = re.search(r'\{[\s\S]*\}', text) if match: - return json.loads(match.group(0)) - raise + try: + return json.loads(match.group(0)) + except json.JSONDecodeError: + logger.warning("Regex JSON extraction failed, trying repair") + + # Fallback 3: Attempt to repair common JSON issues + repaired = _repair_json_string(text) + if repaired: + try: + return json.loads(repaired) + except json.JSONDecodeError: + logger.warning("JSON repair failed") + + # Fallback 4: Extract and parse individual key-value pairs + extracted = _extract_key_value_pairs(text) + if extracted: + return extracted + + # Final fallback: return error with raw response for debugging + logger.error(f"All JSON parsing attempts failed for text: {text[:200]}...") + return {"error": f"Failed to parse JSON response: {e}", "raw_response": text[:500]} + except Exception as e: logger.error(f"Error parsing structured response: {e}") return {"error": f"Failed to parse JSON response: {e}", "raw_response": (response.text or '')} except Exception as e: logger.error(f"Error in Gemini Pro structured JSON generation: {e}") - return {"error": str(e)} \ No newline at end of file + return {"error": str(e)} + + +def _repair_json_string(text: str) -> Optional[str]: + """ + Attempt to repair common JSON issues in AI responses. + """ + if not text: + return None + + # Remove any non-JSON content before first { + start = text.find('{') + if start == -1: + return None + text = text[start:] + + # Remove any content after last } + end = text.rfind('}') + if end == -1: + return None + text = text[:end+1] + + # Fix common issues + repaired = text + + # 1. Fix unterminated arrays (add missing closing brackets) + # Count opening and closing brackets + open_brackets = repaired.count('[') + close_brackets = repaired.count(']') + if open_brackets > close_brackets: + # Add missing closing brackets + missing_brackets = open_brackets - close_brackets + repaired = repaired + ']' * missing_brackets + + # 2. Fix unterminated strings in arrays + # Look for patterns like ["item1", "item2" and add missing quote and bracket + lines = repaired.split('\n') + fixed_lines = [] + for i, line in enumerate(lines): + stripped = line.strip() + # Check if line ends with an unquoted string in an array + if stripped.endswith('"') and i < len(lines) - 1: + next_line = lines[i + 1].strip() + if next_line.startswith(']'): + # This is fine + pass + elif not next_line.startswith('"') and not next_line.startswith(']'): + # Add missing quote and comma + line = line + '",' + fixed_lines.append(line) + repaired = '\n'.join(fixed_lines) + + # 3. Fix unescaped quotes in string values + # This is complex - we'll use a simple approach + try: + # Try to balance quotes by adding missing ones + lines = repaired.split('\n') + fixed_lines = [] + for line in lines: + # Count quotes in the line + quote_count = line.count('"') + if quote_count % 2 == 1: # Odd number of quotes + # Add a quote at the end if it looks like an incomplete string + if ':' in line and line.strip().endswith('"'): + line = line + '"' + elif ':' in line and not line.strip().endswith('"') and not line.strip().endswith(','): + line = line + '",' + fixed_lines.append(line) + repaired = '\n'.join(fixed_lines) + except Exception: + pass + + # 4. Remove trailing commas before closing braces/brackets + repaired = re.sub(r',(\s*[}\]])', r'\1', repaired) + + # 5. Fix missing commas between object properties + repaired = re.sub(r'"(\s*)"', r'",\1"', repaired) + + return repaired + + +def _extract_key_value_pairs(text: str) -> Optional[Dict[str, Any]]: + """ + Extract key-value pairs from malformed JSON text as a last resort. + """ + if not text: + return None + + result = {} + + # Look for patterns like "key": "value" or "key": value + # This regex looks for quoted keys followed by colons and values + pattern = r'"([^"]+)"\s*:\s*(?:"([^"]*)"|([^,}\]]+))' + matches = re.findall(pattern, text) + + for key, quoted_value, unquoted_value in matches: + value = quoted_value if quoted_value else unquoted_value.strip() + + # Clean up the value - remove any trailing content that looks like the next key + # This handles cases where the regex captured too much + if value and '"' in value: + # Split at the first quote that might be the start of the next key + parts = value.split('"') + if len(parts) > 1: + value = parts[0].strip() + + # Try to parse the value appropriately + if value.lower() in ['true', 'false']: + result[key] = value.lower() == 'true' + elif value.lower() == 'null': + result[key] = None + elif value.isdigit(): + result[key] = int(value) + elif value.replace('.', '').replace('-', '').isdigit(): + try: + result[key] = float(value) + except ValueError: + result[key] = value + else: + result[key] = value + + # Also try to extract array values + array_pattern = r'"([^"]+)"\s*:\s*\[([^\]]*)\]' + array_matches = re.findall(array_pattern, text) + + for key, array_content in array_matches: + # Extract individual array items + items = [] + # Look for quoted strings in the array + item_pattern = r'"([^"]*)"' + item_matches = re.findall(item_pattern, array_content) + for item in item_matches: + if item.strip(): + items.append(item.strip()) + + if items: + result[key] = items + + return result if result else None \ No newline at end of file diff --git a/frontend/src/components/ContentPlanningDashboard/components/ContentStrategyBuilder.tsx b/frontend/src/components/ContentPlanningDashboard/components/ContentStrategyBuilder.tsx index 1faf8b23..5da2ceea 100644 --- a/frontend/src/components/ContentPlanningDashboard/components/ContentStrategyBuilder.tsx +++ b/frontend/src/components/ContentPlanningDashboard/components/ContentStrategyBuilder.tsx @@ -419,31 +419,69 @@ const ContentStrategyBuilder: React.FC = () => { const sources = payload.sources || {}; const inputDataPoints = payload.input_data_points || {}; const meta = payload.meta || {}; + + console.log('đŸŽ¯ AI Refresh Result - Payload:', payload); + console.log('đŸŽ¯ AI Refresh Result - Fields:', fields); + console.log('đŸŽ¯ AI Refresh Result - Meta:', meta); + const fieldValues: Record = {}; Object.keys(fields).forEach((fieldId) => { const fieldData = fields[fieldId]; if (fieldData && typeof fieldData === 'object' && 'value' in fieldData) { fieldValues[fieldId] = fieldData.value; + console.log(`✅ Processed field ${fieldId}:`, fieldData.value); + } else { + console.log(`❌ Skipped field ${fieldId}:`, fieldData); } }); - useEnhancedStrategyStore.setState((state) => ({ - autoPopulatedFields: { ...state.autoPopulatedFields, ...fieldValues }, - dataSources: { ...state.dataSources, ...sources }, - inputDataPoints, - formData: { ...state.formData, ...fieldValues } - })); - if (!meta.ai_used || meta.ai_overrides_count === 0) { - const msg = 'AI did not produce new values. Please try again or complete onboarding data.'; + + console.log('đŸŽ¯ Final fieldValues:', fieldValues); + + useEnhancedStrategyStore.setState((state) => { + const newState = { + autoPopulatedFields: { ...state.autoPopulatedFields, ...fieldValues }, + dataSources: { ...state.dataSources, ...sources }, + inputDataPoints, + formData: { ...state.formData, ...fieldValues } + }; + console.log('đŸŽ¯ Updated store state:', newState); + return newState; + }); + + // Enhanced success/error messaging based on retry attempts and success rate + const attempts = meta.attempts || 1; + const successRate = meta.success_rate || 0; + const aiOverridesCount = meta.ai_overrides_count || 0; + + if (!meta.ai_used || aiOverridesCount === 0) { + const msg = meta.error || 'AI did not produce new values. Please try again or complete onboarding data.'; setError(msg); setRefreshError(msg); - setRefreshMessage('No new AI values available.'); + setRefreshMessage(`No new AI values available. (${attempts} attempt${attempts > 1 ? 's' : ''})`); + } else { + // Show success message with retry info if applicable + if (attempts > 1) { + setRefreshMessage(`AI refresh completed successfully! Generated ${aiOverridesCount} fields in ${attempts} attempts (${successRate.toFixed(1)}% success rate).`); + } else { + setRefreshMessage(`AI refresh completed! Generated ${aiOverridesCount} fields (${successRate.toFixed(1)}% success rate).`); + } + + // Show warning if success rate is low but we got some data + if (successRate < 70 && aiOverridesCount > 0) { + setRefreshError(`Warning: Only ${successRate.toFixed(1)}% of fields were filled. Some fields may need manual input.`); + } } + es.close(); setAIGenerating(false); setIsRefreshing(false); - if (!meta || meta.ai_overrides_count > 0) { - setRefreshMessage(null); - setRefreshProgress(0); + + // Clear success message after a delay + if (aiOverridesCount > 0) { + setTimeout(() => { + setRefreshMessage(null); + setRefreshProgress(0); + }, 5000); } } if (data.type === 'error') { diff --git a/frontend/src/components/ContentPlanningDashboard/components/ContentStrategyBuilder/StrategicInputField.tsx b/frontend/src/components/ContentPlanningDashboard/components/ContentStrategyBuilder/StrategicInputField.tsx index 2ef376b5..6a578d73 100644 --- a/frontend/src/components/ContentPlanningDashboard/components/ContentStrategyBuilder/StrategicInputField.tsx +++ b/frontend/src/components/ContentPlanningDashboard/components/ContentStrategyBuilder/StrategicInputField.tsx @@ -254,9 +254,9 @@ const StrategicInputField: React.FC = ({ required: false }, traffic_sources: { - type: 'json', + type: 'multiselect', label: 'Traffic Sources', - placeholder: 'Define your traffic sources', + options: ['Organic Search', 'Social Media', 'Email Marketing', 'Direct Traffic', 'Referral Traffic', 'Paid Search', 'Display Advertising', 'Content Marketing', 'Influencer Marketing', 'Video Platforms'], required: false }, conversion_rates: { diff --git a/frontend/src/stores/enhancedStrategyStore.ts b/frontend/src/stores/enhancedStrategyStore.ts index d4d519fb..ac4f0ece 100644 --- a/frontend/src/stores/enhancedStrategyStore.ts +++ b/frontend/src/stores/enhancedStrategyStore.ts @@ -496,9 +496,9 @@ export const STRATEGIC_INPUT_FIELDS: StrategicInputField[] = [ label: 'Traffic Sources', description: 'Primary traffic sources', tooltip: 'Identify your main traffic sources to understand where your audience comes from and optimize accordingly.', - type: 'json', + type: 'multiselect', required: false, - placeholder: 'Define traffic sources' + options: ['Organic Search', 'Social Media', 'Email Marketing', 'Direct Traffic', 'Referral Traffic', 'Paid Search', 'Display Advertising', 'Content Marketing', 'Influencer Marketing', 'Video Platforms'] }, { id: 'conversion_rates',