- Use canonical Clerk user id (clerk_user_id) across all onboarding entrypoints to ensure consistent OnboardingSession.user_id lookup - Fix API key persistence in api_key_manager.py to use correct APIKey model columns (session_id, provider, key) - Increase Node heap for frontend build to 8GB and add build:nomap script to disable sourcemaps and reduce memory usage - Update onboarding endpoints (endpoints_core.py, onboarding_control_service.py, step_management_service.py) to prefer clerk_user_id over id - Fix frontend workflowStore.ts TypeScript error by returning WorkflowError instance - Add website_automation_service.py for onboarding automation
286 lines
13 KiB
Python
286 lines
13 KiB
Python
"""Website Intake Service for generating site briefs from business information."""
|
|
from typing import Dict, Any, Optional
|
|
from loguru import logger
|
|
|
|
from services.llm_providers.main_text_generation import llm_text_gen
|
|
|
|
|
|
SITE_BRIEF_SCHEMA: Dict[str, Any] = {
|
|
"type": "object",
|
|
"properties": {
|
|
"site_brief": {
|
|
"type": "object",
|
|
"properties": {
|
|
"business_name": {"type": "string"},
|
|
"tagline": {"type": "string"},
|
|
"template_type": {"type": "string", "enum": ["blog", "profile", "shop", "dont_know"]},
|
|
"geo_scope": {"type": "string", "enum": ["global", "local", "hyper_local", "dont_know"]},
|
|
"primary_offerings": {"type": "array", "items": {"type": "string"}},
|
|
"product_assets": {
|
|
"type": "object",
|
|
"properties": {
|
|
"urls": {"type": "array", "items": {"type": "string"}},
|
|
"asset_ids": {"type": "array", "items": {"type": "string"}},
|
|
},
|
|
"required": ["urls", "asset_ids"],
|
|
},
|
|
"audience": {
|
|
"type": "object",
|
|
"properties": {
|
|
"segment": {"type": "string"},
|
|
"b2b_b2c": {"type": "string", "enum": ["B2B", "B2C", "Both", "dont_know"]},
|
|
"persona_notes": {"type": "string"},
|
|
},
|
|
"required": ["segment", "b2b_b2c", "persona_notes"],
|
|
},
|
|
"brand_voice": {
|
|
"type": "object",
|
|
"properties": {
|
|
"tone": {"type": "string"},
|
|
"adjectives": {"type": "array", "items": {"type": "string"}},
|
|
"avoid": {"type": "array", "items": {"type": "string"}},
|
|
},
|
|
"required": ["tone", "adjectives", "avoid"],
|
|
},
|
|
"contact": {
|
|
"type": "object",
|
|
"properties": {
|
|
"email": {"type": "string"},
|
|
"phone": {"type": ["string", "null"]},
|
|
"location": {"type": ["string", "null"]},
|
|
},
|
|
"required": ["email", "phone", "location"],
|
|
},
|
|
"competitor_urls": {"type": "array", "items": {"type": "string"}},
|
|
},
|
|
"required": [
|
|
"business_name",
|
|
"tagline",
|
|
"template_type",
|
|
"geo_scope",
|
|
"primary_offerings",
|
|
"audience",
|
|
"brand_voice",
|
|
"contact",
|
|
"competitor_urls",
|
|
],
|
|
},
|
|
"content_plan": {
|
|
"type": "object",
|
|
"properties": {
|
|
"required_pages": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"page": {
|
|
"type": "string",
|
|
"enum": ["home", "about", "services", "products", "contact", "blog", "faq"],
|
|
},
|
|
"goal": {"type": "string"},
|
|
"key_points": {"type": "array", "items": {"type": "string"}},
|
|
"cta": {"type": "string"},
|
|
},
|
|
"required": ["page", "goal", "key_points", "cta"],
|
|
},
|
|
},
|
|
"optional_sections": {"type": "array", "items": {"type": "string"}},
|
|
"min_content_items": {"type": "integer"},
|
|
},
|
|
"required": ["required_pages", "optional_sections", "min_content_items"],
|
|
},
|
|
"exa_query_map": {
|
|
"type": "object",
|
|
"properties": {
|
|
"home": {"$ref": "#/$defs/exaSection"},
|
|
"about": {"$ref": "#/$defs/exaSection"},
|
|
"services_or_products": {"$ref": "#/$defs/exaSection"},
|
|
"contact": {"$ref": "#/$defs/exaSection"},
|
|
"competitor_optional": {"$ref": "#/$defs/exaSection"},
|
|
},
|
|
"required": ["home", "about", "services_or_products", "contact", "competitor_optional"],
|
|
},
|
|
"quality_flags": {
|
|
"type": "object",
|
|
"properties": {
|
|
"confidence": {"type": "number"},
|
|
"missing_fields": {"type": "array", "items": {"type": "string"}},
|
|
"followup_questions": {"type": "array", "items": {"type": "string"}},
|
|
},
|
|
"required": ["confidence", "missing_fields", "followup_questions"],
|
|
},
|
|
},
|
|
"required": ["site_brief", "content_plan", "exa_query_map", "quality_flags"],
|
|
"$defs": {
|
|
"exaSection": {
|
|
"type": "object",
|
|
"properties": {
|
|
"queries": {"type": "array", "items": {"type": "string"}},
|
|
"summary_query": {"type": "string"},
|
|
"include_text": {"type": "array", "items": {"type": "string"}},
|
|
"search_type": {"type": "string", "enum": ["auto", "neural", "fast", "deep"]},
|
|
"category": {"type": "string"},
|
|
},
|
|
"required": ["queries", "summary_query", "include_text", "search_type", "category"],
|
|
}
|
|
},
|
|
}
|
|
|
|
|
|
class WebsiteIntakeService:
|
|
"""Generate site briefs and Exa query maps from minimal intake inputs."""
|
|
|
|
def _normalize_list(self, value: Any) -> list:
|
|
if not value:
|
|
return []
|
|
if isinstance(value, list):
|
|
return [str(item).strip() for item in value if str(item).strip()]
|
|
if isinstance(value, str):
|
|
return [item.strip() for item in value.split(",") if item.strip()]
|
|
return [str(value).strip()] if str(value).strip() else []
|
|
|
|
def _extract_product_assets(self, intake: Dict[str, Any]) -> Dict[str, list]:
|
|
urls = self._normalize_list(intake.get("product_asset_urls"))
|
|
asset_ids = self._normalize_list(intake.get("product_asset_ids"))
|
|
return {"urls": urls, "asset_ids": asset_ids}
|
|
|
|
def build_prompt(self, intake: Dict[str, Any]) -> str:
|
|
return (
|
|
"You are creating a website brief and research plan for a non-technical user. "
|
|
"Use the inputs below, keep assumptions minimal, and prefer 'dont_know' when unsure. "
|
|
"Ensure at least 5 content items across required pages.\n\n"
|
|
f"INTAKE INPUTS:\n{intake}\n\n"
|
|
"Output structured JSON that matches the schema exactly."
|
|
)
|
|
|
|
def generate_site_brief(self, intake: Dict[str, Any], user_id: Optional[str] = None) -> Dict[str, Any]:
|
|
logger.info("Generating site brief and Exa query map from intake")
|
|
|
|
try:
|
|
prompt = self.build_prompt(intake)
|
|
result = llm_text_gen(prompt=prompt, json_struct=SITE_BRIEF_SCHEMA, user_id=user_id)
|
|
|
|
if isinstance(result, str):
|
|
logger.warning("LLM returned string response; expected structured JSON")
|
|
return {"error": "invalid_response", "raw": result}
|
|
|
|
product_assets = self._extract_product_assets(intake)
|
|
if product_assets.get("urls") or product_assets.get("asset_ids"):
|
|
result.setdefault("site_brief", {})
|
|
result["site_brief"]["product_assets"] = product_assets
|
|
|
|
logger.success(f"Generated site brief for user {user_id}")
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to generate site brief: {str(e)}")
|
|
# Return a fallback site brief for development
|
|
return self._generate_fallback_site_brief(intake)
|
|
|
|
def _generate_fallback_site_brief(self, intake: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Generate a fallback site brief when LLM is not available."""
|
|
logger.info("Generating fallback site brief")
|
|
|
|
business_name = intake.get("business_name", "Your Business")
|
|
business_summary = intake.get("business_summary", "Business description")
|
|
template_type = intake.get("template_type", "blog")
|
|
|
|
fallback_brief = {
|
|
"site_brief": {
|
|
"business_name": business_name,
|
|
"tagline": f"Professional {template_type} website",
|
|
"template_type": template_type,
|
|
"geo_scope": "global",
|
|
"primary_offerings": self._normalize_list(intake.get("primary_offerings", ["Services"])),
|
|
"product_assets": self._extract_product_assets(intake),
|
|
"audience": {
|
|
"segment": intake.get("target_audience", "General audience"),
|
|
"b2b_b2c": intake.get("audience_type", "Both"),
|
|
"persona_notes": intake.get("target_audience", "General audience description")
|
|
},
|
|
"brand_voice": {
|
|
"tone": intake.get("brand_tone", "professional"),
|
|
"adjectives": self._normalize_list(intake.get("brand_adjectives", ["professional", "reliable"])),
|
|
"avoid": self._normalize_list(intake.get("avoid_terms", []))
|
|
},
|
|
"contact": {
|
|
"email": intake.get("contact_email", "contact@example.com"),
|
|
"phone": intake.get("contact_phone"),
|
|
"location": intake.get("contact_location")
|
|
},
|
|
"competitor_urls": self._normalize_list(intake.get("competitor_urls", []))
|
|
},
|
|
"content_plan": {
|
|
"required_pages": [
|
|
{
|
|
"page": "home",
|
|
"goal": "Welcome visitors and introduce the business",
|
|
"key_points": [business_name, business_summary],
|
|
"cta": "Get Started"
|
|
},
|
|
{
|
|
"page": "about",
|
|
"goal": "Share business story and values",
|
|
"key_points": ["Our story", "Our mission", "Our values"],
|
|
"cta": "Learn More"
|
|
},
|
|
{
|
|
"page": "contact",
|
|
"goal": "Enable visitors to get in touch",
|
|
"key_points": ["Contact information", "Business hours", "Location"],
|
|
"cta": "Contact Us"
|
|
}
|
|
],
|
|
"optional_sections": ["blog", "faq", "testimonials"],
|
|
"min_content_items": 5
|
|
},
|
|
"exa_query_map": {
|
|
"home": {
|
|
"queries": [f"{business_name} website", f"{business_name} services"],
|
|
"summary_query": f"What is {business_name} and what do they offer?",
|
|
"include_text": ["services", "about", "contact"],
|
|
"search_type": "auto",
|
|
"category": "business"
|
|
},
|
|
"about": {
|
|
"queries": [f"{business_name} about us", f"{business_name} story"],
|
|
"summary_query": f"Tell me about {business_name}'s history and mission",
|
|
"include_text": ["about", "story", "mission", "values"],
|
|
"search_type": "auto",
|
|
"category": "business"
|
|
},
|
|
"services_or_products": {
|
|
"queries": [f"{business_name} services", f"{business_name} products"],
|
|
"summary_query": f"What services and products does {business_name} offer?",
|
|
"include_text": ["services", "products", "offerings"],
|
|
"search_type": "auto",
|
|
"category": "business"
|
|
},
|
|
"contact": {
|
|
"queries": [f"{business_name} contact", f"{business_name} location"],
|
|
"summary_query": f"How can I contact {business_name}?",
|
|
"include_text": ["contact", "phone", "email", "address"],
|
|
"search_type": "auto",
|
|
"category": "business"
|
|
},
|
|
"competitor_optional": {
|
|
"queries": [f"{business_name} competitors", f"alternatives to {business_name}"],
|
|
"summary_query": f"Who are the main competitors of {business_name}?",
|
|
"include_text": ["competitors", "alternatives"],
|
|
"search_type": "auto",
|
|
"category": "business"
|
|
}
|
|
},
|
|
"quality_flags": {
|
|
"confidence": 0.8,
|
|
"missing_fields": [],
|
|
"followup_questions": []
|
|
}
|
|
}
|
|
|
|
return fallback_brief
|
|
|
|
|
|
# Singleton instance
|
|
website_intake_service = WebsiteIntakeService()
|