ALwrity version 0.5.4

This commit is contained in:
ajaysi
2025-08-10 13:10:32 +05:30
parent 5c08b6e007
commit 13ca78f653
9 changed files with 801 additions and 117 deletions

View File

@@ -13,6 +13,7 @@ import asyncio
from datetime import datetime, timedelta
from collections import defaultdict
import time
import re
# Import database
from services.database import get_db_session
@@ -339,24 +340,138 @@ async def create_enhanced_strategy(
try:
logger.info("🚀 Creating enhanced content strategy")
# Validate required fields
# Basic required checks
if not strategy_data.get('user_id'):
raise HTTPException(status_code=400, detail="user_id is required")
if not strategy_data.get('name'):
raise HTTPException(status_code=400, detail="strategy name is required")
# Create enhanced strategy
def parse_float(value: Any) -> Optional[float]:
if value is None:
return None
if isinstance(value, (int, float)):
return float(value)
if isinstance(value, str):
s = value.strip().lower().replace(",", "")
# Handle percentage
if s.endswith('%'):
try:
return float(s[:-1])
except Exception:
pass
# Handle k/m suffix
mul = 1.0
if s.endswith('k'):
mul = 1_000.0
s = s[:-1]
elif s.endswith('m'):
mul = 1_000_000.0
s = s[:-1]
m = re.search(r"[-+]?\d*\.?\d+", s)
if m:
try:
return float(m.group(0)) * mul
except Exception:
return None
return None
def parse_int(value: Any) -> Optional[int]:
f = parse_float(value)
if f is None:
return None
try:
return int(round(f))
except Exception:
return None
def parse_json(value: Any) -> Optional[Any]:
if value is None:
return None
if isinstance(value, (dict, list)):
return value
if isinstance(value, str):
try:
return json.loads(value)
except Exception:
# Accept plain strings in JSON columns
return value
return None
def parse_array(value: Any) -> Optional[list]:
if value is None:
return None
if isinstance(value, list):
return value
if isinstance(value, str):
# Try JSON first
try:
j = json.loads(value)
if isinstance(j, list):
return j
except Exception:
pass
parts = [p.strip() for p in value.split(',') if p.strip()]
return parts if parts else None
return None
# Coerce and validate fields
warnings: Dict[str, str] = {}
cleaned = dict(strategy_data)
# Numerics
content_budget = parse_float(strategy_data.get('content_budget'))
if strategy_data.get('content_budget') is not None and content_budget is None:
warnings['content_budget'] = 'Could not parse number; saved as null'
cleaned['content_budget'] = content_budget
team_size = parse_int(strategy_data.get('team_size'))
if strategy_data.get('team_size') is not None and team_size is None:
warnings['team_size'] = 'Could not parse integer; saved as null'
cleaned['team_size'] = team_size
# Arrays
preferred_formats = parse_array(strategy_data.get('preferred_formats'))
if strategy_data.get('preferred_formats') is not None and preferred_formats is None:
warnings['preferred_formats'] = 'Could not parse list; saved as null'
cleaned['preferred_formats'] = preferred_formats
# JSON fields
json_fields = [
'business_objectives','target_metrics','performance_metrics','content_preferences',
'consumption_patterns','audience_pain_points','buying_journey','seasonal_trends',
'engagement_metrics','top_competitors','competitor_content_strategies','market_gaps',
'industry_trends','emerging_trends','content_mix','optimal_timing','quality_metrics',
'editorial_guidelines','brand_voice','traffic_sources','conversion_rates','content_roi_targets',
'target_audience','content_pillars','ai_recommendations'
]
for field in json_fields:
raw = strategy_data.get(field)
parsed = parse_json(raw)
# parsed may be a plain string; accept it
cleaned[field] = parsed
# Booleans
if 'ab_testing_capabilities' in strategy_data:
cleaned['ab_testing_capabilities'] = bool(strategy_data.get('ab_testing_capabilities'))
# Early return on validation errors
if warnings:
logger.warning(f" Strategy create warnings: {warnings}")
# Proceed with create using cleaned data
db_service = EnhancedStrategyDBService(db)
enhanced_service = EnhancedStrategyService(db_service)
created_strategy = await enhanced_service.create_enhanced_strategy(strategy_data, db)
created_strategy = await enhanced_service.create_enhanced_strategy(cleaned, db)
logger.info(f"✅ Enhanced strategy created successfully: {created_strategy.get('id')}")
logger.info(f"✅ Enhanced strategy created successfully: {created_strategy.get('id') if isinstance(created_strategy, dict) else getattr(created_strategy,'id', None)}")
return ResponseBuilder.create_success_response(
resp = ResponseBuilder.create_success_response(
message="Enhanced content strategy created successfully",
data=created_strategy
)
if warnings:
resp['warnings'] = warnings
return resp
except HTTPException:
raise

View File

@@ -54,7 +54,18 @@ class AutoFillRefreshService:
except Exception as e:
logger.error("AI-only structured generation failed | user=%s | err=%s", user_id, repr(e))
logger.error("Traceback:\n%s", traceback.format_exc())
raise
# Return graceful fallback instead of raising
return {
'fields': {},
'sources': {},
'meta': {
'ai_used': False,
'ai_overrides_count': 0,
'ai_override_fields': [],
'ai_only': True,
'error': str(e)
}
}
# Fallback to previous behavior (DB + sparse overrides)
payload = await self.autofill.get_autofill(user_id)

View File

@@ -7,27 +7,53 @@ from services.ai_service_manager import AIServiceManager, AIServiceType
logger = logging.getLogger(__name__)
# Complete core fields - all 30+ fields that the frontend expects
CORE_FIELDS = [
'business_objectives','target_metrics','content_budget','team_size','implementation_timeline',
'market_share','competitive_position','performance_metrics','content_preferences','consumption_patterns',
'audience_pain_points','buying_journey','seasonal_trends','engagement_metrics','top_competitors',
'competitor_content_strategies','market_gaps','industry_trends','emerging_trends','preferred_formats',
'content_mix','content_frequency','optimal_timing','quality_metrics','editorial_guidelines','brand_voice',
'traffic_sources','conversion_rates','content_roi_targets','ab_testing_capabilities'
# Business Context (8 fields)
'business_objectives', 'target_metrics', 'content_budget', 'team_size', 'implementation_timeline',
'market_share', 'competitive_position', 'performance_metrics',
# Audience Intelligence (6 fields)
'content_preferences', 'consumption_patterns', 'audience_pain_points',
'buying_journey', 'seasonal_trends', 'engagement_metrics',
# Competitive Intelligence (5 fields)
'top_competitors', 'competitor_content_strategies', 'market_gaps', 'industry_trends', 'emerging_trends',
# Content Strategy (7 fields)
'preferred_formats', 'content_mix', 'content_frequency', 'optimal_timing',
'quality_metrics', 'editorial_guidelines', 'brand_voice',
# Performance & Analytics (4 fields)
'traffic_sources', 'conversion_rates', 'content_roi_targets', 'ab_testing_capabilities'
]
JSON_FIELDS = {
'business_objectives', 'target_metrics', 'content_preferences'
'business_objectives', 'target_metrics', 'content_preferences', 'consumption_patterns',
'audience_pain_points', 'buying_journey', 'seasonal_trends', 'engagement_metrics',
'competitor_content_strategies', 'market_gaps', 'industry_trends', 'emerging_trends',
'content_mix', 'optimal_timing', 'quality_metrics', 'editorial_guidelines',
'conversion_rates', 'content_roi_targets', 'performance_metrics'
}
ARRAY_FIELDS = {
'preferred_formats'
'preferred_formats', 'top_competitors', 'market_gaps', 'industry_trends', 'traffic_sources'
}
# Select field options mapping for value normalization
SELECT_FIELD_OPTIONS = {
'implementation_timeline': ['3 months', '6 months', '1 year', '2 years', 'Ongoing'],
'competitive_position': ['Leader', 'Challenger', 'Niche', 'Emerging'],
'content_frequency': ['Daily', 'Weekly', 'Bi-weekly', 'Monthly', 'Quarterly'],
'brand_voice': ['Professional', 'Casual', 'Friendly', 'Authoritative', 'Innovative']
}
class AIStructuredAutofillService:
"""Generate the complete 30+ Strategy Builder fields strictly from AI using onboarding context only."""
"""Generate the complete Strategy Builder fields strictly from AI using onboarding context only."""
def __init__(self) -> None:
self.ai = AIServiceManager()
self.max_retries = 2 # Maximum retry attempts for malformed JSON
def _build_context_summary(self, context: Dict[str, Any]) -> Dict[str, Any]:
website = context.get('website_analysis') or {}
@@ -73,89 +99,353 @@ class AIStructuredAutofillService:
return summary
def _build_schema(self) -> Dict[str, Any]:
# Build a Gemini SDK-compatible Schema (dict equivalent), not JSON Schema.
# Avoid unsupported keys like oneOf/additionalProperties.
# Simplified schema following Gemini best practices
# Reduce complexity by flattening nested structures and simplifying constraints
properties: Dict[str, Any] = {}
typed_overrides: Dict[str, Any] = {
# Use STRING for complex JSON-bearing fields to avoid OBJECT property constraints
'business_objectives': {"type": "STRING"},
'target_metrics': {"type": "STRING"},
'content_preferences': {"type": "STRING"},
# Known arrays
'preferred_formats': {"type": "ARRAY", "items": {"type": "STRING"}},
# Known selects
'content_frequency': {"type": "STRING"},
# Simplified field definitions - avoid complex constraints that cause 400 errors
field_definitions = {
# Core business fields (simplified)
'business_objectives': {"type": "STRING", "description": "Business goals and objectives"},
'target_metrics': {"type": "STRING", "description": "KPIs and success metrics"},
'content_budget': {"type": "NUMBER", "description": "Monthly content budget in dollars"},
'team_size': {"type": "NUMBER", "description": "Number of people in content team"},
'implementation_timeline': {"type": "STRING", "description": "Strategy implementation timeline"},
'market_share': {"type": "STRING", "description": "Current market share percentage"},
'competitive_position': {"type": "STRING", "description": "Market competitive position"},
'performance_metrics': {"type": "STRING", "description": "Current performance data"},
# Audience fields (simplified)
'content_preferences': {"type": "STRING", "description": "Content format and topic preferences"},
'consumption_patterns': {"type": "STRING", "description": "When and how audience consumes content"},
'audience_pain_points': {"type": "STRING", "description": "Key audience challenges and pain points"},
'buying_journey': {"type": "STRING", "description": "Customer journey stages and touchpoints"},
'seasonal_trends': {"type": "STRING", "description": "Seasonal content patterns and trends"},
'engagement_metrics': {"type": "STRING", "description": "Current engagement data and metrics"},
# Competitive fields (simplified)
'top_competitors': {"type": "STRING", "description": "Main competitors"},
'competitor_content_strategies': {"type": "STRING", "description": "Analysis of competitor content approaches"},
'market_gaps': {"type": "STRING", "description": "Market opportunities and gaps"},
'industry_trends': {"type": "STRING", "description": "Current industry trends"},
'emerging_trends': {"type": "STRING", "description": "Upcoming trends and opportunities"},
# Content strategy fields (simplified)
'preferred_formats': {"type": "STRING", "description": "Preferred content formats"},
'content_mix': {"type": "STRING", "description": "Content mix distribution"},
'content_frequency': {"type": "STRING", "description": "Content publishing frequency"},
'optimal_timing': {"type": "STRING", "description": "Best times for publishing content"},
'quality_metrics': {"type": "STRING", "description": "Content quality standards and metrics"},
'editorial_guidelines': {"type": "STRING", "description": "Style and tone guidelines"},
'brand_voice': {"type": "STRING", "description": "Brand voice and tone"},
# Performance fields (simplified)
'traffic_sources': {"type": "STRING", "description": "Primary traffic sources"},
'conversion_rates': {"type": "STRING", "description": "Target conversion rates and metrics"},
'content_roi_targets': {"type": "STRING", "description": "ROI goals and targets for content"},
'ab_testing_capabilities': {"type": "BOOLEAN", "description": "Whether A/B testing capabilities are available"}
}
for key in CORE_FIELDS:
properties[key] = typed_overrides.get(key, {"type": "STRING"})
# Build properties from field definitions
for field_id in CORE_FIELDS:
if field_id in field_definitions:
properties[field_id] = field_definitions[field_id]
else:
# Fallback for any missing fields
properties[field_id] = {"type": "STRING", "description": f"Value for {field_id}"}
# Use propertyOrdering as recommended by Gemini docs for consistent output
schema = {
"type": "OBJECT",
"properties": properties,
# Property ordering can help response consistency per Gemini docs
"propertyOrdering": CORE_FIELDS,
"required": CORE_FIELDS, # Make all fields required
"propertyOrdering": CORE_FIELDS, # Critical for consistent JSON output
"description": "Content strategy fields with simplified constraints"
}
logger.debug("AI Structured Autofill: schema built (SDK) with %d properties", len(CORE_FIELDS))
logger.debug("AI Structured Autofill: simplified schema built with %d properties and property ordering", len(CORE_FIELDS))
return schema
def _build_prompt(self, context_summary: Dict[str, Any]) -> str:
# Ultra-simplified prompt to avoid JSON parsing issues
prompt = (
"You are a senior content strategy system. Using ONLY the provided context (do not copy raw\n"
"values), infer professional, actionable values for ALL of the following 30+ strategy fields.\n"
"Output strictly valid JSON matching the given schema. Provide concise, business-ready values.\n"
"If you are uncertain, infer the most reasonable assumption for a small business. Do not leave\n"
"fields empty.\n\n"
f"CONTEXT:\n{json.dumps(context_summary, indent=2)}\n\n"
"FIELDS TO PRODUCE (keys only; values inferred):\n"
f"{CORE_FIELDS}\n"
"Generate a JSON object with exactly 30 fields for content strategy. Use this exact format:\n\n"
'{\n'
'"business_objectives": "Increase traffic and leads",\n'
'"target_metrics": "25% growth, 15% conversion",\n'
'"content_budget": 3000,\n'
'"team_size": 3,\n'
'"implementation_timeline": "6 months",\n'
'"market_share": "15%",\n'
'"competitive_position": "Leader",\n'
'"performance_metrics": "Current metrics data",\n'
'"content_preferences": "Blog posts, videos",\n'
'"consumption_patterns": "Peak hours 9-11 AM",\n'
'"audience_pain_points": "Time constraints, complexity",\n'
'"buying_journey": "Awareness to Decision",\n'
'"seasonal_trends": "Q1 planning, Q2 execution",\n'
'"engagement_metrics": "3.5% engagement rate",\n'
'"top_competitors": "Competitor A, B, C",\n'
'"competitor_content_strategies": "Educational content approach",\n'
'"market_gaps": "AI tools, automation guides",\n'
'"industry_trends": "AI integration, video content",\n'
'"emerging_trends": "Voice search, interactive content",\n'
'"preferred_formats": "Blog posts, videos, infographics",\n'
'"content_mix": "70% educational, 30% promotional",\n'
'"content_frequency": "Weekly",\n'
'"optimal_timing": "Tuesday/Thursday 10 AM",\n'
'"quality_metrics": "SEO score >90, engagement >3%",\n'
'"editorial_guidelines": "Professional tone, actionable insights",\n'
'"brand_voice": "Professional",\n'
'"traffic_sources": "Organic search, social media",\n'
'"conversion_rates": "15% conversion, $200 CPA",\n'
'"content_roi_targets": "15% conversion, 3:1 ROI",\n'
'"ab_testing_capabilities": true\n'
'}\n\n'
f"Business context: {json.dumps(context_summary, indent=2)}\n\n"
"Generate the complete JSON with all 30 fields:"
)
logger.debug("AI Structured Autofill: prompt preview=%d chars", len(prompt))
logger.debug("AI Structured Autofill: ultra-simplified prompt (%d chars)", len(prompt))
return prompt
def _normalize_value(self, key: str, value: Any) -> Any:
if value is None:
return None
# Parse JSON-bearing fields if they arrived as JSON strings
if key in JSON_FIELDS:
# Handle numeric fields that might come as text
if key in ['content_budget', 'team_size']:
if isinstance(value, (int, float)):
return value
elif isinstance(value, str):
# Extract numeric value from text
import re
# Remove currency symbols, commas, and common words
cleaned = re.sub(r'[$,€£¥]', '', value.lower())
cleaned = re.sub(r'\b(monthly|yearly|annual|people|person|specialist|creator|writer|editor|team|member)\b', '', cleaned)
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
# Extract first number found
numbers = re.findall(r'\d+(?:\.\d+)?', cleaned)
if numbers:
try:
num_value = float(numbers[0])
# For team_size, convert to integer
if key == 'team_size':
return int(num_value)
return num_value
except (ValueError, TypeError):
pass
logger.warning(f"Could not extract numeric value from '{key}' field: '{value}'")
return None
# Handle boolean fields
if key == 'ab_testing_capabilities':
if isinstance(value, bool):
return value
elif isinstance(value, str):
normalized_value = value.lower().strip()
if normalized_value in ['true', 'yes', 'available', 'enabled', '1']:
return True
elif normalized_value in ['false', 'no', 'unavailable', 'disabled', '0']:
return False
logger.warning(f"Could not parse boolean value for '{key}': '{value}'")
return None
# Handle select fields with predefined options
if key in SELECT_FIELD_OPTIONS:
if isinstance(value, str):
try:
return json.loads(value)
except Exception:
# Keep as string if not valid JSON
return value
return value
# Coerce arrays from comma-separated strings where applicable
if key in ARRAY_FIELDS:
if isinstance(value, str):
split = [s.strip() for s in value.split(',') if s.strip()]
return split if split else None
if isinstance(value, list):
return [str(v) for v in value]
return None
return value
# Try exact match first (case-insensitive)
normalized_value = value.lower().strip()
for option in SELECT_FIELD_OPTIONS[key]:
if normalized_value == option.lower():
return option
# Try partial matching for common variations
for option in SELECT_FIELD_OPTIONS[key]:
option_lower = option.lower()
# Handle common variations
if (normalized_value.startswith(option_lower) or
option_lower in normalized_value or
normalized_value.endswith(option_lower)):
return option
# Special handling for content_frequency
if key == 'content_frequency':
if 'daily' in normalized_value:
return 'Daily'
elif 'weekly' in normalized_value or 'week' in normalized_value:
return 'Weekly'
elif 'bi-weekly' in normalized_value or 'biweekly' in normalized_value:
return 'Bi-weekly'
elif 'monthly' in normalized_value or 'month' in normalized_value:
return 'Monthly'
elif 'quarterly' in normalized_value or 'quarter' in normalized_value:
return 'Quarterly'
# If no match found, return the first option as fallback
logger.warning(f"Could not normalize select field '{key}' value: '{value}' to valid options: {SELECT_FIELD_OPTIONS[key]}")
return SELECT_FIELD_OPTIONS[key][0] # Return first option as fallback
# For all other fields, ensure they're strings and not empty
if isinstance(value, str):
# Special handling for multiselect fields
if key in ['preferred_formats', 'top_competitors', 'market_gaps', 'industry_trends', 'traffic_sources']:
# Split by comma and clean up each item
items = [item.strip() for item in value.split(',') if item.strip()]
if items:
return items # Return as array for multiselect fields
return None
return value.strip() if value.strip() else None
elif isinstance(value, (int, float, bool)):
return str(value)
elif isinstance(value, list):
# For multiselect fields, return the list as-is
if key in ['preferred_formats', 'top_competitors', 'market_gaps', 'industry_trends', 'traffic_sources']:
return [str(item) for item in value if item]
# For other fields, convert arrays to comma-separated strings
return ', '.join(str(item) for item in value if item)
else:
return str(value) if value else None
def _calculate_success_rate(self, result: Dict[str, Any]) -> float:
"""Calculate the percentage of successfully filled fields."""
if not isinstance(result, dict):
return 0.0
filled_fields = 0
for key in CORE_FIELDS:
value = result.get(key)
if value is not None and value != "" and value != []:
# Additional checks for different data types
if isinstance(value, str) and value.strip():
filled_fields += 1
elif isinstance(value, (int, float)) and value != 0:
filled_fields += 1
elif isinstance(value, bool):
filled_fields += 1
elif isinstance(value, list) and len(value) > 0:
filled_fields += 1
elif value is not None and value != "":
filled_fields += 1
return (filled_fields / len(CORE_FIELDS)) * 100
def _should_retry(self, result: Dict[str, Any], attempt: int) -> bool:
"""Determine if we should retry based on success rate and attempt count."""
if attempt >= self.max_retries:
return False
# Check if result has error
if 'error' in result:
logger.info(f"Retry attempt {attempt + 1} due to error: {result.get('error')}")
return True
# Check success rate - stop immediately if we have 100% success
success_rate = self._calculate_success_rate(result)
logger.info(f"Success rate: {success_rate:.1f}% (attempt {attempt + 1})")
# If we have 100% success, don't retry
if success_rate >= 100.0:
logger.info(f"Perfect success rate achieved: {success_rate:.1f}% - no retry needed")
return False
# Retry if success rate is below 80% (more aggressive than 50%)
if success_rate < 80.0:
logger.info(f"Retry attempt {attempt + 1} due to low success rate: {success_rate:.1f}% (need 80%+)")
return True
# Also retry if we're missing more than 6 fields (20% of 30 fields)
missing_count = len([k for k in CORE_FIELDS if not result.get(k) or result.get(k) == "" or result.get(k) == []])
if missing_count > 6:
logger.info(f"Retry attempt {attempt + 1} due to too many missing fields: {missing_count} missing (max 6)")
return True
return False
async def generate_autofill_fields(self, user_id: int, context: Dict[str, Any]) -> Dict[str, Any]:
context_summary = self._build_context_summary(context)
schema = self._build_schema()
prompt = self._build_prompt(context_summary)
logger.info("AIStructuredAutofillService: generating 30+ fields | user=%s", user_id)
logger.info("AIStructuredAutofillService: generating %d fields | user=%s", len(CORE_FIELDS), user_id)
logger.debug("AIStructuredAutofillService: properties=%d", len(schema.get('properties', {})))
try:
result = await self.ai.execute_structured_json_call(
service_type=AIServiceType.STRATEGIC_INTELLIGENCE,
prompt=prompt,
schema=schema
)
except Exception as e:
logger.error("AI structured call failed | user=%s | err=%s", user_id, repr(e))
logger.error("Traceback:\n%s", traceback.format_exc())
raise
last_result = None
for attempt in range(self.max_retries + 1):
try:
logger.info(f"AI structured call attempt {attempt + 1}/{self.max_retries + 1}")
result = await self.ai.execute_structured_json_call(
service_type=AIServiceType.STRATEGIC_INTELLIGENCE,
prompt=prompt,
schema=schema
)
last_result = result
# Check if we should retry
if not self._should_retry(result, attempt):
break
# Add a small delay before retry
if attempt < self.max_retries:
import asyncio
await asyncio.sleep(1)
except Exception as e:
logger.error(f"AI structured call failed (attempt {attempt + 1}) | user=%s | err=%s", user_id, repr(e))
logger.error("Traceback:\n%s", traceback.format_exc())
last_result = {
'error': str(e)
}
if attempt < self.max_retries:
import asyncio
await asyncio.sleep(1)
continue
break
if not isinstance(result, dict):
raise ValueError("AI did not return a structured JSON object")
# Process the final result
if not isinstance(last_result, dict):
logger.warning("AI did not return a structured JSON object, got: %s", type(last_result))
return {
'fields': {},
'sources': {},
'meta': {
'ai_used': False,
'ai_overrides_count': 0,
'missing_fields': CORE_FIELDS,
'error': f"AI returned {type(last_result)} instead of dict",
'attempts': self.max_retries + 1
}
}
# Check if AI returned an error
if 'error' in last_result:
logger.warning("AI returned error after all attempts: %s", last_result.get('error'))
return {
'fields': {},
'sources': {},
'meta': {
'ai_used': False,
'ai_overrides_count': 0,
'missing_fields': CORE_FIELDS,
'error': last_result.get('error', 'Unknown AI error'),
'attempts': self.max_retries + 1
}
}
# Try to extract fields from malformed JSON if needed
if len(last_result) < len(CORE_FIELDS) * 0.5: # If we got less than 50% of fields
logger.warning("AI returned incomplete result, attempting to extract from raw response")
# Try to extract key-value pairs from the raw response
extracted_result = self._extract_fields_from_raw_response(last_result)
if extracted_result and len(extracted_result) > len(last_result):
logger.info("Successfully extracted additional fields from raw response")
last_result = extracted_result
try:
logger.debug("AI structured result keys=%d | sample keys=%s", len(list(result.keys())), list(result.keys())[:8])
logger.debug("AI structured result keys=%d | sample keys=%s", len(list(last_result.keys())), list(last_result.keys())[:8])
except Exception:
pass
@@ -163,14 +453,40 @@ class AIStructuredAutofillService:
fields: Dict[str, Any] = {}
sources: Dict[str, str] = {}
non_null_keys = []
missing_fields = []
for key in CORE_FIELDS:
raw_value = result.get(key)
raw_value = last_result.get(key)
norm_value = self._normalize_value(key, raw_value)
if norm_value is not None and norm_value != "" and norm_value != []:
fields[key] = { 'value': norm_value, 'source': 'ai_refresh', 'confidence': 0.8 }
sources[key] = 'ai_refresh'
non_null_keys.append(key)
missing_fields = [k for k in CORE_FIELDS if k not in non_null_keys]
else:
missing_fields.append(key)
# Log detailed field analysis
logger.info("AI structured autofill field analysis:")
logger.info("✅ Generated fields (%d): %s", len(non_null_keys), non_null_keys)
logger.info("❌ Missing fields (%d): %s", len(missing_fields), missing_fields)
# Categorize missing fields
field_categories = {
'business_context': ['business_objectives', 'target_metrics', 'content_budget', 'team_size', 'implementation_timeline', 'market_share', 'competitive_position', 'performance_metrics'],
'audience_intelligence': ['content_preferences', 'consumption_patterns', 'audience_pain_points', 'buying_journey', 'seasonal_trends', 'engagement_metrics'],
'competitive_intelligence': ['top_competitors', 'competitor_content_strategies', 'market_gaps', 'industry_trends', 'emerging_trends'],
'content_strategy': ['preferred_formats', 'content_mix', 'content_frequency', 'optimal_timing', 'quality_metrics', 'editorial_guidelines', 'brand_voice'],
'performance_analytics': ['traffic_sources', 'conversion_rates', 'content_roi_targets', 'ab_testing_capabilities']
}
for category, category_fields in field_categories.items():
generated_in_category = [f for f in category_fields if f in non_null_keys]
missing_in_category = [f for f in category_fields if f in missing_fields]
logger.info("📊 %s: %d/%d fields generated (%s missing: %s)",
category.upper(), len(generated_in_category), len(category_fields),
len(missing_in_category), missing_in_category)
success_rate = self._calculate_success_rate(last_result)
payload = {
'fields': fields,
@@ -180,8 +496,43 @@ class AIStructuredAutofillService:
'ai_overrides_count': len(non_null_keys),
'ai_override_fields': non_null_keys,
'ai_only': True,
'missing_fields': missing_fields
'missing_fields': missing_fields,
'success_rate': success_rate,
'attempts': self.max_retries + 1
}
}
logger.info("AI structured autofill completed | non_null_fields=%d missing=%d", len(non_null_keys), len(missing_fields))
return payload
logger.info("AI structured autofill completed | non_null_fields=%d missing=%d success_rate=%.1f%% attempts=%d",
len(non_null_keys), len(missing_fields), success_rate, self.max_retries + 1)
return payload
def _extract_fields_from_raw_response(self, result: Dict[str, Any]) -> Dict[str, Any]:
"""Extract fields from malformed JSON response using regex patterns."""
import re
# Convert result to string for pattern matching
result_str = str(result)
extracted = {}
# Pattern to match key-value pairs in JSON-like format
patterns = [
r'"([^"]+)":\s*"([^"]*)"', # String values
r'"([^"]+)":\s*(\d+(?:\.\d+)?)', # Numeric values
r'"([^"]+)":\s*(true|false)', # Boolean values
r'"([^"]+)":\s*\[([^\]]*)\]', # Array values
]
for pattern in patterns:
matches = re.findall(pattern, result_str)
for key, value in matches:
if key in CORE_FIELDS:
# Clean up the value
if value.lower() in ['true', 'false']:
extracted[key] = value.lower() == 'true'
elif value.replace('.', '').isdigit():
extracted[key] = float(value) if '.' in value else int(value)
else:
extracted[key] = value.strip('"')
logger.info("Extracted %d fields from raw response: %s", len(extracted), list(extracted.keys()))
return extracted

View File

@@ -802,6 +802,9 @@ class EnhancedStrategyService:
payload = await service.get_autofill(user_id)
logger.info(f"Retrieved comprehensive onboarding data for user {user_id}")
return payload
except Exception as e:
logger.error(f"Error getting onboarding data: {str(e)}")
raise
finally:
temp_db.close()
except Exception as e:
@@ -821,15 +824,15 @@ class EnhancedStrategyService:
if 'content_goals' in website_data and website_data.get('content_goals'):
fields['business_objectives'] = {
'value': website_data.get('content_goals'),
'source': 'website_analysis',
'source': 'website_analysis',
'confidence': website_data.get('confidence_level')
}
}
# Prefer explicit target_metrics; otherwise derive from performance_metrics
if website_data.get('target_metrics'):
fields['target_metrics'] = {
'value': website_data.get('target_metrics'),
'source': 'website_analysis',
'source': 'website_analysis',
'confidence': website_data.get('confidence_level')
}
elif website_data.get('performance_metrics'):
@@ -843,35 +846,35 @@ class EnhancedStrategyService:
if website_data.get('content_budget') is not None:
fields['content_budget'] = {
'value': website_data.get('content_budget'),
'source': 'website_analysis',
'source': 'website_analysis',
'confidence': website_data.get('confidence_level')
}
elif isinstance(session_data, dict) and session_data.get('budget') is not None:
fields['content_budget'] = {
'value': session_data.get('budget'),
'source': 'onboarding_session',
'confidence': 0.7
}
'confidence': 0.7
}
# Team size: website data preferred, else onboarding session team_size
if website_data.get('team_size') is not None:
fields['team_size'] = {
'value': website_data.get('team_size'),
'source': 'website_analysis',
'source': 'website_analysis',
'confidence': website_data.get('confidence_level')
}
elif isinstance(session_data, dict) and session_data.get('team_size') is not None:
fields['team_size'] = {
'value': session_data.get('team_size'),
'source': 'onboarding_session',
'confidence': 0.7
}
'confidence': 0.7
}
# Implementation timeline: website data preferred, else onboarding session timeline
if website_data.get('implementation_timeline'):
fields['implementation_timeline'] = {
'value': website_data.get('implementation_timeline'),
'source': 'website_analysis',
'source': 'website_analysis',
'confidence': website_data.get('confidence_level')
}
elif isinstance(session_data, dict) and session_data.get('timeline'):
@@ -885,15 +888,15 @@ class EnhancedStrategyService:
if website_data.get('market_share'):
fields['market_share'] = {
'value': website_data.get('market_share'),
'source': 'website_analysis',
'source': 'website_analysis',
'confidence': website_data.get('confidence_level')
}
elif website_data.get('performance_metrics'):
fields['market_share'] = {
'value': website_data.get('performance_metrics').get('estimated_market_share', None),
'source': 'website_analysis',
'source': 'website_analysis',
'confidence': website_data.get('confidence_level')
}
}
fields['performance_metrics'] = {
'value': website_data.get('performance_metrics', {}),
@@ -1179,5 +1182,4 @@ class EnhancedStrategyService:
'error_rates': {},
'throughput_metrics': {}
}
# No further action required
return
# No further action required

View File

@@ -522,7 +522,10 @@ Format as structured JSON with detailed assessment and optimization guidance.
error_message=error_message
)
self.metrics.append(metrics)
raise Exception(error_message)
# Don't raise JSON decode errors as fatal - let the calling code handle them
# The Gemini provider should have already attempted to repair malformed JSON
result = {"error": error_message, "raw_response": str(e)}
success = False
except Exception as e:
error_message = f"AI call error for {service_type.value}: {str(e)}"
logger.error(error_message)

View File

@@ -241,6 +241,7 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
if parsed:
return parsed if isinstance(parsed, dict) else json.loads(json.dumps(parsed))
text = (response.text or '').strip()
# Strip markdown code fences if present
if text.startswith('```'):
# remove leading ```json or ``` and trailing ```
@@ -251,10 +252,14 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
if text.endswith('```'):
text = text[:-3]
text = text.strip()
# Try direct JSON parsing first
try:
return json.loads(text)
except json.JSONDecodeError:
# Fallback: extract likely JSON object substring
except json.JSONDecodeError as e:
logger.warning(f"Direct JSON parsing failed: {e}")
# Fallback 1: Extract likely JSON object substring
first = text.find('{')
last = text.rfind('}')
if first != -1 and last != -1 and last > first:
@@ -262,17 +267,176 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
try:
return json.loads(candidate)
except json.JSONDecodeError:
pass
# Final fallback: regex any object
logger.warning("JSON object extraction failed, trying regex")
# Fallback 2: Regex any object
import re
match = re.search(r'\{[\s\S]*\}', text)
if match:
return json.loads(match.group(0))
raise
try:
return json.loads(match.group(0))
except json.JSONDecodeError:
logger.warning("Regex JSON extraction failed, trying repair")
# Fallback 3: Attempt to repair common JSON issues
repaired = _repair_json_string(text)
if repaired:
try:
return json.loads(repaired)
except json.JSONDecodeError:
logger.warning("JSON repair failed")
# Fallback 4: Extract and parse individual key-value pairs
extracted = _extract_key_value_pairs(text)
if extracted:
return extracted
# Final fallback: return error with raw response for debugging
logger.error(f"All JSON parsing attempts failed for text: {text[:200]}...")
return {"error": f"Failed to parse JSON response: {e}", "raw_response": text[:500]}
except Exception as e:
logger.error(f"Error parsing structured response: {e}")
return {"error": f"Failed to parse JSON response: {e}", "raw_response": (response.text or '')}
except Exception as e:
logger.error(f"Error in Gemini Pro structured JSON generation: {e}")
return {"error": str(e)}
return {"error": str(e)}
def _repair_json_string(text: str) -> Optional[str]:
"""
Attempt to repair common JSON issues in AI responses.
"""
if not text:
return None
# Remove any non-JSON content before first {
start = text.find('{')
if start == -1:
return None
text = text[start:]
# Remove any content after last }
end = text.rfind('}')
if end == -1:
return None
text = text[:end+1]
# Fix common issues
repaired = text
# 1. Fix unterminated arrays (add missing closing brackets)
# Count opening and closing brackets
open_brackets = repaired.count('[')
close_brackets = repaired.count(']')
if open_brackets > close_brackets:
# Add missing closing brackets
missing_brackets = open_brackets - close_brackets
repaired = repaired + ']' * missing_brackets
# 2. Fix unterminated strings in arrays
# Look for patterns like ["item1", "item2" and add missing quote and bracket
lines = repaired.split('\n')
fixed_lines = []
for i, line in enumerate(lines):
stripped = line.strip()
# Check if line ends with an unquoted string in an array
if stripped.endswith('"') and i < len(lines) - 1:
next_line = lines[i + 1].strip()
if next_line.startswith(']'):
# This is fine
pass
elif not next_line.startswith('"') and not next_line.startswith(']'):
# Add missing quote and comma
line = line + '",'
fixed_lines.append(line)
repaired = '\n'.join(fixed_lines)
# 3. Fix unescaped quotes in string values
# This is complex - we'll use a simple approach
try:
# Try to balance quotes by adding missing ones
lines = repaired.split('\n')
fixed_lines = []
for line in lines:
# Count quotes in the line
quote_count = line.count('"')
if quote_count % 2 == 1: # Odd number of quotes
# Add a quote at the end if it looks like an incomplete string
if ':' in line and line.strip().endswith('"'):
line = line + '"'
elif ':' in line and not line.strip().endswith('"') and not line.strip().endswith(','):
line = line + '",'
fixed_lines.append(line)
repaired = '\n'.join(fixed_lines)
except Exception:
pass
# 4. Remove trailing commas before closing braces/brackets
repaired = re.sub(r',(\s*[}\]])', r'\1', repaired)
# 5. Fix missing commas between object properties
repaired = re.sub(r'"(\s*)"', r'",\1"', repaired)
return repaired
def _extract_key_value_pairs(text: str) -> Optional[Dict[str, Any]]:
"""
Extract key-value pairs from malformed JSON text as a last resort.
"""
if not text:
return None
result = {}
# Look for patterns like "key": "value" or "key": value
# This regex looks for quoted keys followed by colons and values
pattern = r'"([^"]+)"\s*:\s*(?:"([^"]*)"|([^,}\]]+))'
matches = re.findall(pattern, text)
for key, quoted_value, unquoted_value in matches:
value = quoted_value if quoted_value else unquoted_value.strip()
# Clean up the value - remove any trailing content that looks like the next key
# This handles cases where the regex captured too much
if value and '"' in value:
# Split at the first quote that might be the start of the next key
parts = value.split('"')
if len(parts) > 1:
value = parts[0].strip()
# Try to parse the value appropriately
if value.lower() in ['true', 'false']:
result[key] = value.lower() == 'true'
elif value.lower() == 'null':
result[key] = None
elif value.isdigit():
result[key] = int(value)
elif value.replace('.', '').replace('-', '').isdigit():
try:
result[key] = float(value)
except ValueError:
result[key] = value
else:
result[key] = value
# Also try to extract array values
array_pattern = r'"([^"]+)"\s*:\s*\[([^\]]*)\]'
array_matches = re.findall(array_pattern, text)
for key, array_content in array_matches:
# Extract individual array items
items = []
# Look for quoted strings in the array
item_pattern = r'"([^"]*)"'
item_matches = re.findall(item_pattern, array_content)
for item in item_matches:
if item.strip():
items.append(item.strip())
if items:
result[key] = items
return result if result else None

View File

@@ -419,31 +419,69 @@ const ContentStrategyBuilder: React.FC = () => {
const sources = payload.sources || {};
const inputDataPoints = payload.input_data_points || {};
const meta = payload.meta || {};
console.log('🎯 AI Refresh Result - Payload:', payload);
console.log('🎯 AI Refresh Result - Fields:', fields);
console.log('🎯 AI Refresh Result - Meta:', meta);
const fieldValues: Record<string, any> = {};
Object.keys(fields).forEach((fieldId) => {
const fieldData = fields[fieldId];
if (fieldData && typeof fieldData === 'object' && 'value' in fieldData) {
fieldValues[fieldId] = fieldData.value;
console.log(`✅ Processed field ${fieldId}:`, fieldData.value);
} else {
console.log(`❌ Skipped field ${fieldId}:`, fieldData);
}
});
useEnhancedStrategyStore.setState((state) => ({
autoPopulatedFields: { ...state.autoPopulatedFields, ...fieldValues },
dataSources: { ...state.dataSources, ...sources },
inputDataPoints,
formData: { ...state.formData, ...fieldValues }
}));
if (!meta.ai_used || meta.ai_overrides_count === 0) {
const msg = 'AI did not produce new values. Please try again or complete onboarding data.';
console.log('🎯 Final fieldValues:', fieldValues);
useEnhancedStrategyStore.setState((state) => {
const newState = {
autoPopulatedFields: { ...state.autoPopulatedFields, ...fieldValues },
dataSources: { ...state.dataSources, ...sources },
inputDataPoints,
formData: { ...state.formData, ...fieldValues }
};
console.log('🎯 Updated store state:', newState);
return newState;
});
// Enhanced success/error messaging based on retry attempts and success rate
const attempts = meta.attempts || 1;
const successRate = meta.success_rate || 0;
const aiOverridesCount = meta.ai_overrides_count || 0;
if (!meta.ai_used || aiOverridesCount === 0) {
const msg = meta.error || 'AI did not produce new values. Please try again or complete onboarding data.';
setError(msg);
setRefreshError(msg);
setRefreshMessage('No new AI values available.');
setRefreshMessage(`No new AI values available. (${attempts} attempt${attempts > 1 ? 's' : ''})`);
} else {
// Show success message with retry info if applicable
if (attempts > 1) {
setRefreshMessage(`AI refresh completed successfully! Generated ${aiOverridesCount} fields in ${attempts} attempts (${successRate.toFixed(1)}% success rate).`);
} else {
setRefreshMessage(`AI refresh completed! Generated ${aiOverridesCount} fields (${successRate.toFixed(1)}% success rate).`);
}
// Show warning if success rate is low but we got some data
if (successRate < 70 && aiOverridesCount > 0) {
setRefreshError(`Warning: Only ${successRate.toFixed(1)}% of fields were filled. Some fields may need manual input.`);
}
}
es.close();
setAIGenerating(false);
setIsRefreshing(false);
if (!meta || meta.ai_overrides_count > 0) {
setRefreshMessage(null);
setRefreshProgress(0);
// Clear success message after a delay
if (aiOverridesCount > 0) {
setTimeout(() => {
setRefreshMessage(null);
setRefreshProgress(0);
}, 5000);
}
}
if (data.type === 'error') {

View File

@@ -254,9 +254,9 @@ const StrategicInputField: React.FC<StrategicInputFieldProps> = ({
required: false
},
traffic_sources: {
type: 'json',
type: 'multiselect',
label: 'Traffic Sources',
placeholder: 'Define your traffic sources',
options: ['Organic Search', 'Social Media', 'Email Marketing', 'Direct Traffic', 'Referral Traffic', 'Paid Search', 'Display Advertising', 'Content Marketing', 'Influencer Marketing', 'Video Platforms'],
required: false
},
conversion_rates: {

View File

@@ -496,9 +496,9 @@ export const STRATEGIC_INPUT_FIELDS: StrategicInputField[] = [
label: 'Traffic Sources',
description: 'Primary traffic sources',
tooltip: 'Identify your main traffic sources to understand where your audience comes from and optimize accordingly.',
type: 'json',
type: 'multiselect',
required: false,
placeholder: 'Define traffic sources'
options: ['Organic Search', 'Social Media', 'Email Marketing', 'Direct Traffic', 'Referral Traffic', 'Paid Search', 'Display Advertising', 'Content Marketing', 'Influencer Marketing', 'Video Platforms']
},
{
id: 'conversion_rates',