fix: resolve onboarding session not found warnings and frontend build OOM
- Use canonical Clerk user id (clerk_user_id) across all onboarding entrypoints to ensure consistent OnboardingSession.user_id lookup - Fix API key persistence in api_key_manager.py to use correct APIKey model columns (session_id, provider, key) - Increase Node heap for frontend build to 8GB and add build:nomap script to disable sourcemaps and reduce memory usage - Update onboarding endpoints (endpoints_core.py, onboarding_control_service.py, step_management_service.py) to prefer clerk_user_id over id - Fix frontend workflowStore.ts TypeScript error by returning WorkflowError instance - Add website_automation_service.py for onboarding automation
This commit is contained in:
285
backend/services/onboarding/website_intake_service.py
Normal file
285
backend/services/onboarding/website_intake_service.py
Normal file
@@ -0,0 +1,285 @@
|
||||
"""Website Intake Service for generating site briefs from business information."""
|
||||
from typing import Dict, Any, Optional
|
||||
from loguru import logger
|
||||
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
SITE_BRIEF_SCHEMA: Dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"site_brief": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"business_name": {"type": "string"},
|
||||
"tagline": {"type": "string"},
|
||||
"template_type": {"type": "string", "enum": ["blog", "profile", "shop", "dont_know"]},
|
||||
"geo_scope": {"type": "string", "enum": ["global", "local", "hyper_local", "dont_know"]},
|
||||
"primary_offerings": {"type": "array", "items": {"type": "string"}},
|
||||
"product_assets": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"urls": {"type": "array", "items": {"type": "string"}},
|
||||
"asset_ids": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
"required": ["urls", "asset_ids"],
|
||||
},
|
||||
"audience": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"segment": {"type": "string"},
|
||||
"b2b_b2c": {"type": "string", "enum": ["B2B", "B2C", "Both", "dont_know"]},
|
||||
"persona_notes": {"type": "string"},
|
||||
},
|
||||
"required": ["segment", "b2b_b2c", "persona_notes"],
|
||||
},
|
||||
"brand_voice": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tone": {"type": "string"},
|
||||
"adjectives": {"type": "array", "items": {"type": "string"}},
|
||||
"avoid": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
"required": ["tone", "adjectives", "avoid"],
|
||||
},
|
||||
"contact": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"email": {"type": "string"},
|
||||
"phone": {"type": ["string", "null"]},
|
||||
"location": {"type": ["string", "null"]},
|
||||
},
|
||||
"required": ["email", "phone", "location"],
|
||||
},
|
||||
"competitor_urls": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
"required": [
|
||||
"business_name",
|
||||
"tagline",
|
||||
"template_type",
|
||||
"geo_scope",
|
||||
"primary_offerings",
|
||||
"audience",
|
||||
"brand_voice",
|
||||
"contact",
|
||||
"competitor_urls",
|
||||
],
|
||||
},
|
||||
"content_plan": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"required_pages": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"page": {
|
||||
"type": "string",
|
||||
"enum": ["home", "about", "services", "products", "contact", "blog", "faq"],
|
||||
},
|
||||
"goal": {"type": "string"},
|
||||
"key_points": {"type": "array", "items": {"type": "string"}},
|
||||
"cta": {"type": "string"},
|
||||
},
|
||||
"required": ["page", "goal", "key_points", "cta"],
|
||||
},
|
||||
},
|
||||
"optional_sections": {"type": "array", "items": {"type": "string"}},
|
||||
"min_content_items": {"type": "integer"},
|
||||
},
|
||||
"required": ["required_pages", "optional_sections", "min_content_items"],
|
||||
},
|
||||
"exa_query_map": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"home": {"$ref": "#/$defs/exaSection"},
|
||||
"about": {"$ref": "#/$defs/exaSection"},
|
||||
"services_or_products": {"$ref": "#/$defs/exaSection"},
|
||||
"contact": {"$ref": "#/$defs/exaSection"},
|
||||
"competitor_optional": {"$ref": "#/$defs/exaSection"},
|
||||
},
|
||||
"required": ["home", "about", "services_or_products", "contact", "competitor_optional"],
|
||||
},
|
||||
"quality_flags": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"confidence": {"type": "number"},
|
||||
"missing_fields": {"type": "array", "items": {"type": "string"}},
|
||||
"followup_questions": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
"required": ["confidence", "missing_fields", "followup_questions"],
|
||||
},
|
||||
},
|
||||
"required": ["site_brief", "content_plan", "exa_query_map", "quality_flags"],
|
||||
"$defs": {
|
||||
"exaSection": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"queries": {"type": "array", "items": {"type": "string"}},
|
||||
"summary_query": {"type": "string"},
|
||||
"include_text": {"type": "array", "items": {"type": "string"}},
|
||||
"search_type": {"type": "string", "enum": ["auto", "neural", "fast", "deep"]},
|
||||
"category": {"type": "string"},
|
||||
},
|
||||
"required": ["queries", "summary_query", "include_text", "search_type", "category"],
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class WebsiteIntakeService:
|
||||
"""Generate site briefs and Exa query maps from minimal intake inputs."""
|
||||
|
||||
def _normalize_list(self, value: Any) -> list:
|
||||
if not value:
|
||||
return []
|
||||
if isinstance(value, list):
|
||||
return [str(item).strip() for item in value if str(item).strip()]
|
||||
if isinstance(value, str):
|
||||
return [item.strip() for item in value.split(",") if item.strip()]
|
||||
return [str(value).strip()] if str(value).strip() else []
|
||||
|
||||
def _extract_product_assets(self, intake: Dict[str, Any]) -> Dict[str, list]:
|
||||
urls = self._normalize_list(intake.get("product_asset_urls"))
|
||||
asset_ids = self._normalize_list(intake.get("product_asset_ids"))
|
||||
return {"urls": urls, "asset_ids": asset_ids}
|
||||
|
||||
def build_prompt(self, intake: Dict[str, Any]) -> str:
|
||||
return (
|
||||
"You are creating a website brief and research plan for a non-technical user. "
|
||||
"Use the inputs below, keep assumptions minimal, and prefer 'dont_know' when unsure. "
|
||||
"Ensure at least 5 content items across required pages.\n\n"
|
||||
f"INTAKE INPUTS:\n{intake}\n\n"
|
||||
"Output structured JSON that matches the schema exactly."
|
||||
)
|
||||
|
||||
def generate_site_brief(self, intake: Dict[str, Any], user_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
logger.info("Generating site brief and Exa query map from intake")
|
||||
|
||||
try:
|
||||
prompt = self.build_prompt(intake)
|
||||
result = llm_text_gen(prompt=prompt, json_struct=SITE_BRIEF_SCHEMA, user_id=user_id)
|
||||
|
||||
if isinstance(result, str):
|
||||
logger.warning("LLM returned string response; expected structured JSON")
|
||||
return {"error": "invalid_response", "raw": result}
|
||||
|
||||
product_assets = self._extract_product_assets(intake)
|
||||
if product_assets.get("urls") or product_assets.get("asset_ids"):
|
||||
result.setdefault("site_brief", {})
|
||||
result["site_brief"]["product_assets"] = product_assets
|
||||
|
||||
logger.success(f"Generated site brief for user {user_id}")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate site brief: {str(e)}")
|
||||
# Return a fallback site brief for development
|
||||
return self._generate_fallback_site_brief(intake)
|
||||
|
||||
def _generate_fallback_site_brief(self, intake: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate a fallback site brief when LLM is not available."""
|
||||
logger.info("Generating fallback site brief")
|
||||
|
||||
business_name = intake.get("business_name", "Your Business")
|
||||
business_summary = intake.get("business_summary", "Business description")
|
||||
template_type = intake.get("template_type", "blog")
|
||||
|
||||
fallback_brief = {
|
||||
"site_brief": {
|
||||
"business_name": business_name,
|
||||
"tagline": f"Professional {template_type} website",
|
||||
"template_type": template_type,
|
||||
"geo_scope": "global",
|
||||
"primary_offerings": self._normalize_list(intake.get("primary_offerings", ["Services"])),
|
||||
"product_assets": self._extract_product_assets(intake),
|
||||
"audience": {
|
||||
"segment": intake.get("target_audience", "General audience"),
|
||||
"b2b_b2c": intake.get("audience_type", "Both"),
|
||||
"persona_notes": intake.get("target_audience", "General audience description")
|
||||
},
|
||||
"brand_voice": {
|
||||
"tone": intake.get("brand_tone", "professional"),
|
||||
"adjectives": self._normalize_list(intake.get("brand_adjectives", ["professional", "reliable"])),
|
||||
"avoid": self._normalize_list(intake.get("avoid_terms", []))
|
||||
},
|
||||
"contact": {
|
||||
"email": intake.get("contact_email", "contact@example.com"),
|
||||
"phone": intake.get("contact_phone"),
|
||||
"location": intake.get("contact_location")
|
||||
},
|
||||
"competitor_urls": self._normalize_list(intake.get("competitor_urls", []))
|
||||
},
|
||||
"content_plan": {
|
||||
"required_pages": [
|
||||
{
|
||||
"page": "home",
|
||||
"goal": "Welcome visitors and introduce the business",
|
||||
"key_points": [business_name, business_summary],
|
||||
"cta": "Get Started"
|
||||
},
|
||||
{
|
||||
"page": "about",
|
||||
"goal": "Share business story and values",
|
||||
"key_points": ["Our story", "Our mission", "Our values"],
|
||||
"cta": "Learn More"
|
||||
},
|
||||
{
|
||||
"page": "contact",
|
||||
"goal": "Enable visitors to get in touch",
|
||||
"key_points": ["Contact information", "Business hours", "Location"],
|
||||
"cta": "Contact Us"
|
||||
}
|
||||
],
|
||||
"optional_sections": ["blog", "faq", "testimonials"],
|
||||
"min_content_items": 5
|
||||
},
|
||||
"exa_query_map": {
|
||||
"home": {
|
||||
"queries": [f"{business_name} website", f"{business_name} services"],
|
||||
"summary_query": f"What is {business_name} and what do they offer?",
|
||||
"include_text": ["services", "about", "contact"],
|
||||
"search_type": "auto",
|
||||
"category": "business"
|
||||
},
|
||||
"about": {
|
||||
"queries": [f"{business_name} about us", f"{business_name} story"],
|
||||
"summary_query": f"Tell me about {business_name}'s history and mission",
|
||||
"include_text": ["about", "story", "mission", "values"],
|
||||
"search_type": "auto",
|
||||
"category": "business"
|
||||
},
|
||||
"services_or_products": {
|
||||
"queries": [f"{business_name} services", f"{business_name} products"],
|
||||
"summary_query": f"What services and products does {business_name} offer?",
|
||||
"include_text": ["services", "products", "offerings"],
|
||||
"search_type": "auto",
|
||||
"category": "business"
|
||||
},
|
||||
"contact": {
|
||||
"queries": [f"{business_name} contact", f"{business_name} location"],
|
||||
"summary_query": f"How can I contact {business_name}?",
|
||||
"include_text": ["contact", "phone", "email", "address"],
|
||||
"search_type": "auto",
|
||||
"category": "business"
|
||||
},
|
||||
"competitor_optional": {
|
||||
"queries": [f"{business_name} competitors", f"alternatives to {business_name}"],
|
||||
"summary_query": f"Who are the main competitors of {business_name}?",
|
||||
"include_text": ["competitors", "alternatives"],
|
||||
"search_type": "auto",
|
||||
"category": "business"
|
||||
}
|
||||
},
|
||||
"quality_flags": {
|
||||
"confidence": 0.8,
|
||||
"missing_fields": [],
|
||||
"followup_questions": []
|
||||
}
|
||||
}
|
||||
|
||||
return fallback_brief
|
||||
|
||||
|
||||
# Singleton instance
|
||||
website_intake_service = WebsiteIntakeService()
|
||||
Reference in New Issue
Block a user