Research component integration, Copilotkit implementation, SEO copilotkit implementation, Wix SEO metadata complete, Wix SEO metadata review
This commit is contained in:
@@ -2,4 +2,14 @@
|
||||
Wix integration modular services package.
|
||||
"""
|
||||
|
||||
from services.integrations.wix.seo import build_seo_data
|
||||
from services.integrations.wix.ricos_converter import markdown_to_html, convert_via_wix_api
|
||||
from services.integrations.wix.blog_publisher import create_blog_post
|
||||
|
||||
__all__ = [
|
||||
'build_seo_data',
|
||||
'markdown_to_html',
|
||||
'convert_via_wix_api',
|
||||
'create_blog_post',
|
||||
]
|
||||
|
||||
|
||||
@@ -20,6 +20,40 @@ class WixBlogService:
|
||||
return h
|
||||
|
||||
def create_draft_post(self, access_token: str, payload: Dict[str, Any], extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||
# Log the exact payload being sent for debugging
|
||||
import json
|
||||
logger.warning(f"📤 Sending to Wix Blog API:")
|
||||
logger.warning(f" Endpoint: {self.base_url}/blog/v3/draft-posts")
|
||||
logger.warning(f" Payload top-level keys: {list(payload.keys())}")
|
||||
if 'draftPost' in payload:
|
||||
dp = payload['draftPost']
|
||||
logger.warning(f" draftPost keys: {list(dp.keys())}")
|
||||
if 'richContent' in dp:
|
||||
rc = dp['richContent']
|
||||
logger.warning(f" richContent keys: {list(rc.keys()) if isinstance(rc, dict) else 'N/A'}")
|
||||
if isinstance(rc, dict) and 'nodes' in rc:
|
||||
nodes = rc['nodes']
|
||||
logger.warning(f" richContent.nodes count: {len(nodes) if isinstance(nodes, list) else 'N/A'}")
|
||||
# Inspect first LIST_ITEM node if any
|
||||
for i, node in enumerate(nodes[:10]):
|
||||
if isinstance(node, dict) and node.get('type') == 'LIST_ITEM':
|
||||
logger.warning(f" Found LIST_ITEM at index {i}:")
|
||||
logger.warning(f" Keys: {list(node.keys())}")
|
||||
logger.warning(f" Has listItemData: {'listItemData' in node}")
|
||||
if 'listItemData' in node:
|
||||
logger.warning(f" listItemData type: {type(node['listItemData'])}, value: {node['listItemData']}")
|
||||
if 'nodes' in node:
|
||||
nested = node['nodes']
|
||||
logger.warning(f" Nested nodes count: {len(nested) if isinstance(nested, list) else 'N/A'}")
|
||||
for j, n_node in enumerate(nested[:3]):
|
||||
if isinstance(n_node, dict):
|
||||
logger.warning(f" Nested node {j}: type={n_node.get('type')}, keys={list(n_node.keys())}")
|
||||
if n_node.get('type') == 'PARAGRAPH' and 'paragraphData' in n_node:
|
||||
logger.warning(f" paragraphData type: {type(n_node['paragraphData'])}, value: {n_node['paragraphData']}")
|
||||
break # Only inspect first LIST_ITEM
|
||||
|
||||
logger.warning(f" Full Payload JSON (first 8000 chars):\n{json.dumps(payload, indent=2, ensure_ascii=False)[:8000]}...")
|
||||
|
||||
response = requests.post(f"{self.base_url}/blog/v3/draft-posts", headers=self.headers(access_token, extra_headers), json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
716
backend/services/integrations/wix/blog_publisher.py
Normal file
716
backend/services/integrations/wix/blog_publisher.py
Normal file
@@ -0,0 +1,716 @@
|
||||
"""
|
||||
Blog Post Publisher for Wix
|
||||
|
||||
Handles blog post creation, validation, and publishing to Wix.
|
||||
"""
|
||||
|
||||
import json
|
||||
import uuid
|
||||
import requests
|
||||
import jwt
|
||||
from typing import Dict, Any, Optional, List
|
||||
from loguru import logger
|
||||
from services.integrations.wix.blog import WixBlogService
|
||||
from services.integrations.wix.content import convert_content_to_ricos
|
||||
from services.integrations.wix.ricos_converter import convert_via_wix_api
|
||||
from services.integrations.wix.seo import build_seo_data
|
||||
|
||||
|
||||
def validate_ricos_content(ricos_content: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate and normalize Ricos document structure.
|
||||
|
||||
Args:
|
||||
ricos_content: Ricos document dict
|
||||
|
||||
Returns:
|
||||
Validated and normalized Ricos document
|
||||
"""
|
||||
# Validate Ricos document structure before using
|
||||
if not ricos_content or not isinstance(ricos_content, dict):
|
||||
logger.error("Invalid Ricos content - not a dict")
|
||||
raise ValueError("Failed to convert content to valid Ricos format")
|
||||
|
||||
if 'type' not in ricos_content:
|
||||
ricos_content['type'] = 'DOCUMENT'
|
||||
logger.debug("Added missing richContent type 'DOCUMENT'")
|
||||
if ricos_content.get('type') != 'DOCUMENT':
|
||||
logger.warning(f"richContent type expected 'DOCUMENT', got {ricos_content.get('type')}, correcting")
|
||||
ricos_content['type'] = 'DOCUMENT'
|
||||
|
||||
if 'id' not in ricos_content or not isinstance(ricos_content.get('id'), str):
|
||||
ricos_content['id'] = str(uuid.uuid4())
|
||||
logger.debug("Added missing richContent id")
|
||||
|
||||
if 'nodes' not in ricos_content:
|
||||
logger.warning("Ricos document missing 'nodes' field, adding empty nodes array")
|
||||
ricos_content['nodes'] = []
|
||||
|
||||
logger.debug(f"Ricos document structure: nodes={len(ricos_content.get('nodes', []))}")
|
||||
|
||||
# Validate richContent is a proper object with nodes array
|
||||
# Per Wix API: richContent must be a RichContent object with nodes array
|
||||
if not isinstance(ricos_content, dict):
|
||||
raise ValueError(f"richContent must be a dict object, got {type(ricos_content)}")
|
||||
|
||||
# Ensure nodes array exists and is valid
|
||||
if 'nodes' not in ricos_content:
|
||||
logger.warning("richContent missing 'nodes', adding empty array")
|
||||
ricos_content['nodes'] = []
|
||||
|
||||
if not isinstance(ricos_content['nodes'], list):
|
||||
raise ValueError(f"richContent.nodes must be a list, got {type(ricos_content['nodes'])}")
|
||||
|
||||
# Recursive function to validate and fix nodes at any depth
|
||||
def validate_node_recursive(node: Dict[str, Any], path: str = "root") -> None:
|
||||
"""
|
||||
Recursively validate a node and all its nested children, ensuring:
|
||||
1. All required data fields exist for each node type
|
||||
2. All 'nodes' arrays are proper lists
|
||||
3. No None values in critical fields
|
||||
"""
|
||||
if not isinstance(node, dict):
|
||||
logger.error(f"{path}: Node is not a dict: {type(node)}")
|
||||
return
|
||||
|
||||
# Ensure type and id exist
|
||||
if 'type' not in node:
|
||||
logger.error(f"{path}: Missing 'type' field - REQUIRED")
|
||||
node['type'] = 'PARAGRAPH' # Default fallback
|
||||
if 'id' not in node:
|
||||
node['id'] = str(uuid.uuid4())
|
||||
logger.debug(f"{path}: Added missing 'id'")
|
||||
|
||||
node_type = node.get('type')
|
||||
|
||||
# CRITICAL: Per Wix API schema, data fields like paragraphData, bulletedListData, etc.
|
||||
# are OPTIONAL and should be OMITTED entirely when empty, not included as {}
|
||||
# Only validate fields that have required properties
|
||||
|
||||
# Special handling: Remove listItemData if it exists (not in Wix API schema)
|
||||
if node_type == 'LIST_ITEM' and 'listItemData' in node:
|
||||
logger.debug(f"{path}: Removing incorrect listItemData field from LIST_ITEM")
|
||||
del node['listItemData']
|
||||
|
||||
# Only validate HEADING nodes - they require headingData with level property
|
||||
if node_type == 'HEADING':
|
||||
if 'headingData' not in node or not isinstance(node.get('headingData'), dict):
|
||||
logger.warning(f"{path} (HEADING): Missing headingData, adding default level 1")
|
||||
node['headingData'] = {'level': 1}
|
||||
elif 'level' not in node['headingData']:
|
||||
logger.warning(f"{path} (HEADING): Missing level in headingData, adding default")
|
||||
node['headingData']['level'] = 1
|
||||
|
||||
# TEXT nodes must have textData
|
||||
if node_type == 'TEXT':
|
||||
if 'textData' not in node or not isinstance(node.get('textData'), dict):
|
||||
logger.error(f"{path} (TEXT): Missing/invalid textData - node will be problematic")
|
||||
node['textData'] = {'text': '', 'decorations': []}
|
||||
|
||||
# LINK and IMAGE nodes must have their data fields
|
||||
if node_type == 'LINK' and ('linkData' not in node or not isinstance(node.get('linkData'), dict)):
|
||||
logger.error(f"{path} (LINK): Missing/invalid linkData - node will be problematic")
|
||||
if node_type == 'IMAGE' and ('imageData' not in node or not isinstance(node.get('imageData'), dict)):
|
||||
logger.error(f"{path} (IMAGE): Missing/invalid imageData - node will be problematic")
|
||||
|
||||
# Remove None values from any data fields that exist (Wix API rejects None)
|
||||
for data_field in ['headingData', 'paragraphData', 'blockquoteData', 'bulletedListData',
|
||||
'orderedListData', 'textData', 'linkData', 'imageData']:
|
||||
if data_field in node and isinstance(node[data_field], dict):
|
||||
data_value = node[data_field]
|
||||
keys_to_remove = [k for k, v in data_value.items() if v is None]
|
||||
if keys_to_remove:
|
||||
logger.debug(f"{path} ({node_type}): Removing None values from {data_field}: {keys_to_remove}")
|
||||
for key in keys_to_remove:
|
||||
del data_value[key]
|
||||
|
||||
# Ensure 'nodes' field exists for container nodes
|
||||
container_types = ['HEADING', 'PARAGRAPH', 'BLOCKQUOTE', 'LIST_ITEM', 'LINK',
|
||||
'BULLETED_LIST', 'ORDERED_LIST']
|
||||
if node_type in container_types:
|
||||
if 'nodes' not in node:
|
||||
logger.warning(f"{path} ({node_type}): Missing 'nodes' field, adding empty array")
|
||||
node['nodes'] = []
|
||||
elif not isinstance(node['nodes'], list):
|
||||
logger.error(f"{path} ({node_type}): Invalid 'nodes' field (not a list), fixing")
|
||||
node['nodes'] = []
|
||||
|
||||
# Recursively validate all nested nodes
|
||||
for nested_idx, nested_node in enumerate(node['nodes']):
|
||||
nested_path = f"{path}.nodes[{nested_idx}]"
|
||||
validate_node_recursive(nested_node, nested_path)
|
||||
|
||||
# Validate all top-level nodes recursively
|
||||
for idx, node in enumerate(ricos_content['nodes']):
|
||||
validate_node_recursive(node, f"nodes[{idx}]")
|
||||
|
||||
# Ensure documentStyle exists and is a dict (required by Wix API when provided)
|
||||
if 'metadata' not in ricos_content or not isinstance(ricos_content.get('metadata'), dict):
|
||||
ricos_content['metadata'] = {'version': 1, 'id': str(uuid.uuid4())}
|
||||
logger.debug("Added default metadata to richContent")
|
||||
else:
|
||||
ricos_content['metadata'].setdefault('version', 1)
|
||||
ricos_content['metadata'].setdefault('id', str(uuid.uuid4()))
|
||||
|
||||
if 'documentStyle' not in ricos_content or not isinstance(ricos_content.get('documentStyle'), dict):
|
||||
ricos_content['documentStyle'] = {
|
||||
'paragraph': {
|
||||
'decorations': [],
|
||||
'nodeStyle': {},
|
||||
'lineHeight': '1.5'
|
||||
}
|
||||
}
|
||||
logger.debug("Added default documentStyle to richContent")
|
||||
|
||||
logger.debug(f"✅ Validated richContent: {len(ricos_content['nodes'])} nodes, has_metadata={bool(ricos_content.get('metadata'))}, has_documentStyle={bool(ricos_content.get('documentStyle'))}")
|
||||
|
||||
return ricos_content
|
||||
|
||||
|
||||
def validate_payload_no_none(obj, path=""):
|
||||
"""Recursively validate that no None values exist in the payload"""
|
||||
if obj is None:
|
||||
raise ValueError(f"Found None value at path: {path}")
|
||||
if isinstance(obj, dict):
|
||||
for key, value in obj.items():
|
||||
validate_payload_no_none(value, f"{path}.{key}" if path else key)
|
||||
elif isinstance(obj, list):
|
||||
for idx, item in enumerate(obj):
|
||||
validate_payload_no_none(item, f"{path}[{idx}]" if path else f"[{idx}]")
|
||||
|
||||
|
||||
def create_blog_post(
|
||||
blog_service: WixBlogService,
|
||||
access_token: str,
|
||||
title: str,
|
||||
content: str,
|
||||
member_id: str,
|
||||
cover_image_url: str = None,
|
||||
category_ids: List[str] = None,
|
||||
tag_ids: List[str] = None,
|
||||
publish: bool = True,
|
||||
seo_metadata: Dict[str, Any] = None,
|
||||
import_image_func = None,
|
||||
lookup_categories_func = None,
|
||||
lookup_tags_func = None,
|
||||
base_url: str = 'https://www.wixapis.com'
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create and optionally publish a blog post on Wix
|
||||
|
||||
Args:
|
||||
blog_service: WixBlogService instance
|
||||
access_token: Valid access token
|
||||
title: Blog post title
|
||||
content: Blog post content (markdown)
|
||||
member_id: Required for third-party apps - the member ID of the post author
|
||||
cover_image_url: Optional cover image URL
|
||||
category_ids: Optional list of category IDs or names
|
||||
tag_ids: Optional list of tag IDs or names
|
||||
publish: Whether to publish immediately or save as draft
|
||||
seo_metadata: Optional SEO metadata dict
|
||||
import_image_func: Function to import images (optional)
|
||||
lookup_categories_func: Function to lookup/create categories (optional)
|
||||
lookup_tags_func: Function to lookup/create tags (optional)
|
||||
base_url: Wix API base URL
|
||||
|
||||
Returns:
|
||||
Created blog post information
|
||||
"""
|
||||
if not member_id:
|
||||
raise ValueError("memberId is required for third-party apps creating blog posts")
|
||||
|
||||
headers = {
|
||||
'Authorization': f'Bearer {access_token}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
# Build valid Ricos rich content
|
||||
# Ensure content is not empty
|
||||
if not content or not content.strip():
|
||||
content = "This is a post from ALwrity."
|
||||
logger.warning("⚠️ Content was empty, using default text")
|
||||
|
||||
# Try Wix API first (more reliable), fall back to custom parser
|
||||
ricos_content = None
|
||||
try:
|
||||
logger.warning("🔄 Attempting to convert markdown to Ricos via Wix API...")
|
||||
ricos_content = convert_via_wix_api(content, access_token, base_url)
|
||||
logger.warning(f"✅ Wix API conversion successful. Ricos document has {len(ricos_content.get('nodes', []))} nodes")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Wix Ricos API conversion failed: {e}. Falling back to custom parser...")
|
||||
# Fall back to custom parser
|
||||
ricos_content = convert_content_to_ricos(content, None)
|
||||
logger.warning(f"✅ Custom parser conversion complete. Ricos document has {len(ricos_content.get('nodes', []))} nodes")
|
||||
|
||||
# Validate Ricos content
|
||||
ricos_content = validate_ricos_content(ricos_content)
|
||||
|
||||
# Minimal payload per Wix docs: title, memberId, and richContent
|
||||
# CRITICAL: Only include fields that have valid values (no None, no empty strings for required fields)
|
||||
blog_data = {
|
||||
'draftPost': {
|
||||
'title': str(title).strip() if title else "Untitled",
|
||||
'memberId': str(member_id).strip(), # Required for third-party apps (validated above)
|
||||
'richContent': ricos_content, # Must be a valid Ricos document object
|
||||
},
|
||||
'publish': bool(publish),
|
||||
'fieldsets': ['URL'] # Simplified fieldsets
|
||||
}
|
||||
|
||||
# Add excerpt only if content exists and is not empty (avoid None or empty strings)
|
||||
excerpt = (content or '').strip()[:200] if content else None
|
||||
if excerpt and len(excerpt) > 0:
|
||||
blog_data['draftPost']['excerpt'] = str(excerpt)
|
||||
|
||||
# Add cover image if provided
|
||||
if cover_image_url and import_image_func:
|
||||
try:
|
||||
media_id = import_image_func(access_token, cover_image_url, f'Cover: {title}')
|
||||
# Ensure media_id is a string and not None
|
||||
if media_id and isinstance(media_id, str):
|
||||
blog_data['draftPost']['media'] = {
|
||||
'wixMedia': {
|
||||
'image': {'id': str(media_id).strip()}
|
||||
},
|
||||
'displayed': True,
|
||||
'custom': True
|
||||
}
|
||||
else:
|
||||
logger.warning(f"Invalid media_id type or value: {type(media_id)}, skipping media")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to import cover image: {e}")
|
||||
|
||||
# Handle categories - can be either IDs (list of strings) or names (for lookup)
|
||||
category_ids_to_use = None
|
||||
if category_ids:
|
||||
# Check if these are IDs (UUIDs) or names
|
||||
if isinstance(category_ids, list) and len(category_ids) > 0:
|
||||
# Assume IDs if first item looks like UUID (has hyphens and is long)
|
||||
first_item = str(category_ids[0])
|
||||
if '-' in first_item and len(first_item) > 30:
|
||||
category_ids_to_use = category_ids
|
||||
elif lookup_categories_func:
|
||||
# These are names, need to lookup/create
|
||||
extra_headers = {}
|
||||
if 'wix-site-id' in headers:
|
||||
extra_headers['wix-site-id'] = headers['wix-site-id']
|
||||
category_ids_to_use = lookup_categories_func(
|
||||
access_token, category_ids, extra_headers if extra_headers else None
|
||||
)
|
||||
|
||||
# Handle tags - can be either IDs (list of strings) or names (for lookup)
|
||||
tag_ids_to_use = None
|
||||
if tag_ids:
|
||||
# Check if these are IDs (UUIDs) or names
|
||||
if isinstance(tag_ids, list) and len(tag_ids) > 0:
|
||||
# Assume IDs if first item looks like UUID (has hyphens and is long)
|
||||
first_item = str(tag_ids[0])
|
||||
if '-' in first_item and len(first_item) > 30:
|
||||
tag_ids_to_use = tag_ids
|
||||
elif lookup_tags_func:
|
||||
# These are names, need to lookup/create
|
||||
extra_headers = {}
|
||||
if 'wix-site-id' in headers:
|
||||
extra_headers['wix-site-id'] = headers['wix-site-id']
|
||||
tag_ids_to_use = lookup_tags_func(
|
||||
access_token, tag_ids, extra_headers if extra_headers else None
|
||||
)
|
||||
|
||||
# Add categories if we have IDs (must be non-empty list of strings)
|
||||
# CRITICAL: Wix API rejects empty arrays or arrays with None/empty strings
|
||||
if category_ids_to_use and isinstance(category_ids_to_use, list) and len(category_ids_to_use) > 0:
|
||||
# Filter out None, empty strings, and ensure all are valid UUID strings
|
||||
valid_category_ids = [str(cid).strip() for cid in category_ids_to_use if cid and str(cid).strip()]
|
||||
if valid_category_ids:
|
||||
blog_data['draftPost']['categoryIds'] = valid_category_ids
|
||||
logger.debug(f"Added {len(valid_category_ids)} category IDs")
|
||||
else:
|
||||
logger.warning("All category IDs were invalid, not including categoryIds in payload")
|
||||
|
||||
# Add tags if we have IDs (must be non-empty list of strings)
|
||||
# CRITICAL: Wix API rejects empty arrays or arrays with None/empty strings
|
||||
if tag_ids_to_use and isinstance(tag_ids_to_use, list) and len(tag_ids_to_use) > 0:
|
||||
# Filter out None, empty strings, and ensure all are valid UUID strings
|
||||
valid_tag_ids = [str(tid).strip() for tid in tag_ids_to_use if tid and str(tid).strip()]
|
||||
if valid_tag_ids:
|
||||
blog_data['draftPost']['tagIds'] = valid_tag_ids
|
||||
logger.debug(f"Added {len(valid_tag_ids)} tag IDs")
|
||||
else:
|
||||
logger.warning("All tag IDs were invalid, not including tagIds in payload")
|
||||
|
||||
# Build SEO data from metadata if provided
|
||||
# TESTING: Skip SEO data temporarily to confirm richContent fix
|
||||
test_skip_seo = True
|
||||
if test_skip_seo:
|
||||
logger.warning("🧪 TESTING: Skipping SEO data to isolate richContent vs seoData issue")
|
||||
seo_data = None
|
||||
elif seo_metadata:
|
||||
logger.warning(f"📊 Building SEO data from metadata. Keys: {list(seo_metadata.keys())}")
|
||||
seo_data = build_seo_data(seo_metadata, title)
|
||||
if seo_data:
|
||||
# Log detailed SEO structure
|
||||
logger.warning(f"📋 SEO data built: {len(seo_data.get('tags', []))} tags, {len(seo_data.get('settings', {}).get('keywords', []))} keywords")
|
||||
|
||||
# Log each SEO tag for debugging (key ones only to avoid too much output)
|
||||
if seo_data.get('tags'):
|
||||
for idx, tag in enumerate(seo_data['tags'][:3]): # First 3 tags only
|
||||
tag_type = tag.get('type')
|
||||
if tag_type == 'title':
|
||||
logger.warning(f" SEO tag {idx+1}: type={tag_type}, children={str(tag.get('children', ''))[:50]}...")
|
||||
else:
|
||||
props = tag.get('props', {})
|
||||
content_preview = str(props.get('content', props.get('href', props.get('name', ''))))[:50]
|
||||
logger.warning(f" SEO tag {idx+1}: type={tag_type}, props={list(props.keys())}, content={content_preview}...")
|
||||
if len(seo_data['tags']) > 3:
|
||||
logger.warning(f" ... and {len(seo_data['tags']) - 3} more SEO tags")
|
||||
|
||||
blog_data['draftPost']['seoData'] = seo_data
|
||||
logger.warning(f"✅ Added seoData to blog post with {len(seo_data.get('tags', []))} tags")
|
||||
else:
|
||||
logger.warning("⚠️ SEO data was empty after building - check build_seo_data function")
|
||||
|
||||
# Add SEO slug if provided (separate field from seoData)
|
||||
if seo_metadata and seo_metadata.get('url_slug'):
|
||||
blog_data['draftPost']['seoSlug'] = str(seo_metadata.get('url_slug')).strip()
|
||||
logger.warning(f"✅ Added SEO slug: {blog_data['draftPost']['seoSlug']}")
|
||||
|
||||
if test_skip_seo:
|
||||
logger.warning("⚠️ SEO data skipped for testing - will add back once richContent is confirmed working")
|
||||
elif not seo_metadata:
|
||||
logger.warning("⚠️ No SEO metadata provided to create_blog_post")
|
||||
|
||||
# Log the payload structure for debugging (without sensitive data)
|
||||
logger.warning(f"📝 Creating blog post with title: '{title}'")
|
||||
logger.warning(f"📋 Draft post fields: {list(blog_data['draftPost'].keys())}")
|
||||
|
||||
# Detailed SEO logging
|
||||
if 'seoData' in blog_data['draftPost']:
|
||||
seo_data_debug = blog_data['draftPost']['seoData']
|
||||
logger.warning(f"📊 SEO data in payload: {len(seo_data_debug.get('tags', []))} tags, {len(seo_data_debug.get('settings', {}).get('keywords', []))} keywords")
|
||||
|
||||
# Log sample SEO tags (first 2 only to avoid too much output)
|
||||
if seo_data_debug.get('tags'):
|
||||
logger.warning("📋 SEO Tags sample:")
|
||||
for i, tag in enumerate(seo_data_debug['tags'][:2]): # First 2 tags
|
||||
logger.warning(f" Tag {i+1}: type={tag.get('type')}, custom={tag.get('custom')}, disabled={tag.get('disabled')}")
|
||||
if len(seo_data_debug['tags']) > 2:
|
||||
logger.warning(f" ... and {len(seo_data_debug['tags']) - 2} more tags")
|
||||
|
||||
if seo_data_debug.get('settings', {}).get('keywords'):
|
||||
keywords_list = [k.get('term') for k in seo_data_debug['settings']['keywords'][:3]]
|
||||
logger.warning(f"🔑 Keywords: {keywords_list}")
|
||||
|
||||
# Log FULL seoData structure for debugging
|
||||
import json
|
||||
try:
|
||||
seo_json = json.dumps(seo_data_debug, indent=2, ensure_ascii=False)
|
||||
logger.warning(f"📄 FULL seoData JSON:\n{seo_json[:2000]}...") # First 2000 chars
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to serialize seoData: {e}")
|
||||
else:
|
||||
logger.warning("⚠️ No seoData in draft post payload!")
|
||||
|
||||
try:
|
||||
# Add wix-site-id header if we can extract it from token
|
||||
extra_headers = {}
|
||||
try:
|
||||
token_str = str(access_token)
|
||||
if token_str and token_str.startswith('OauthNG.JWS.'):
|
||||
jwt_part = token_str[12:]
|
||||
payload = jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
|
||||
data_payload = payload.get('data', {})
|
||||
if isinstance(data_payload, str):
|
||||
try:
|
||||
data_payload = json.loads(data_payload)
|
||||
except:
|
||||
pass
|
||||
instance_data = data_payload.get('instance', {})
|
||||
meta_site_id = instance_data.get('metaSiteId')
|
||||
if isinstance(meta_site_id, str) and meta_site_id:
|
||||
extra_headers['wix-site-id'] = meta_site_id
|
||||
headers['wix-site-id'] = meta_site_id
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract site ID from token: {e}")
|
||||
|
||||
# Make the API call
|
||||
logger.warning(f"🚀 Calling Wix API: POST /blog/v3/draft-posts")
|
||||
logger.warning(f"📦 Payload: title='{blog_data['draftPost'].get('title')}', has_seoData={'seoData' in blog_data['draftPost']}, has_richContent={'richContent' in blog_data['draftPost']}")
|
||||
|
||||
# Validate payload structure before sending
|
||||
draft_post = blog_data.get('draftPost', {})
|
||||
if not isinstance(draft_post, dict):
|
||||
raise ValueError("draftPost must be a dict object")
|
||||
|
||||
# Validate richContent structure
|
||||
if 'richContent' in draft_post:
|
||||
rc = draft_post['richContent']
|
||||
if not isinstance(rc, dict):
|
||||
raise ValueError(f"richContent must be a dict, got {type(rc)}")
|
||||
if 'nodes' not in rc:
|
||||
raise ValueError("richContent missing 'nodes' field")
|
||||
if not isinstance(rc['nodes'], list):
|
||||
raise ValueError(f"richContent.nodes must be a list, got {type(rc['nodes'])}")
|
||||
logger.debug(f"✅ richContent validation passed: {len(rc.get('nodes', []))} nodes")
|
||||
|
||||
# Validate seoData structure if present
|
||||
if 'seoData' in draft_post:
|
||||
seo = draft_post['seoData']
|
||||
if not isinstance(seo, dict):
|
||||
raise ValueError(f"seoData must be a dict, got {type(seo)}")
|
||||
if 'tags' in seo and not isinstance(seo['tags'], list):
|
||||
raise ValueError(f"seoData.tags must be a list, got {type(seo.get('tags'))}")
|
||||
if 'settings' in seo and not isinstance(seo['settings'], dict):
|
||||
raise ValueError(f"seoData.settings must be a dict, got {type(seo.get('settings'))}")
|
||||
logger.debug(f"✅ seoData validation passed: {len(seo.get('tags', []))} tags")
|
||||
|
||||
# Final validation: Ensure no None values in any nested objects
|
||||
# Wix API rejects None values and expects proper types
|
||||
try:
|
||||
validate_payload_no_none(blog_data, "blog_data")
|
||||
logger.debug("✅ Payload validation passed: No None values found")
|
||||
except ValueError as e:
|
||||
logger.error(f"❌ Payload validation failed: {e}")
|
||||
raise
|
||||
|
||||
# Log full payload structure for debugging (sanitized)
|
||||
logger.warning(f"📦 Full payload structure validation:")
|
||||
logger.warning(f" - draftPost type: {type(draft_post)}")
|
||||
logger.warning(f" - draftPost keys: {list(draft_post.keys())}")
|
||||
logger.warning(f" - richContent type: {type(draft_post.get('richContent'))}")
|
||||
if 'richContent' in draft_post:
|
||||
rc = draft_post['richContent']
|
||||
logger.warning(f" - richContent keys: {list(rc.keys()) if isinstance(rc, dict) else 'N/A'}")
|
||||
logger.warning(f" - richContent.nodes type: {type(rc.get('nodes'))}, count: {len(rc.get('nodes', []))}")
|
||||
logger.warning(f" - richContent.metadata type: {type(rc.get('metadata'))}")
|
||||
logger.warning(f" - richContent.documentStyle type: {type(rc.get('documentStyle'))}")
|
||||
logger.warning(f" - seoData type: {type(draft_post.get('seoData'))}")
|
||||
if 'seoData' in draft_post:
|
||||
seo = draft_post['seoData']
|
||||
logger.warning(f" - seoData keys: {list(seo.keys()) if isinstance(seo, dict) else 'N/A'}")
|
||||
logger.warning(f" - seoData.tags type: {type(seo.get('tags'))}, count: {len(seo.get('tags', []))}")
|
||||
logger.warning(f" - seoData.settings type: {type(seo.get('settings'))}")
|
||||
if 'categoryIds' in draft_post:
|
||||
logger.warning(f" - categoryIds type: {type(draft_post.get('categoryIds'))}, count: {len(draft_post.get('categoryIds', []))}")
|
||||
if 'tagIds' in draft_post:
|
||||
logger.warning(f" - tagIds type: {type(draft_post.get('tagIds'))}, count: {len(draft_post.get('tagIds', []))}")
|
||||
|
||||
# Log a sample of the payload JSON to see exact structure (first 2000 chars)
|
||||
try:
|
||||
import json
|
||||
payload_json = json.dumps(blog_data, indent=2, ensure_ascii=False)
|
||||
logger.warning(f"📄 Payload JSON preview (first 3000 chars):\n{payload_json[:3000]}...")
|
||||
|
||||
# Also log a deep structure inspection of richContent.nodes (first few nodes)
|
||||
if 'richContent' in blog_data['draftPost']:
|
||||
nodes = blog_data['draftPost']['richContent'].get('nodes', [])
|
||||
if nodes:
|
||||
logger.warning(f"🔍 Inspecting first 5 richContent.nodes:")
|
||||
for i, node in enumerate(nodes[:5]):
|
||||
logger.warning(f" Node {i+1}: type={node.get('type')}, keys={list(node.keys())}")
|
||||
# Check for any None values in node
|
||||
for key, value in node.items():
|
||||
if value is None:
|
||||
logger.error(f" ⚠️ Node {i+1}.{key} is None!")
|
||||
elif isinstance(value, dict):
|
||||
for k, v in value.items():
|
||||
if v is None:
|
||||
logger.error(f" ⚠️ Node {i+1}.{key}.{k} is None!")
|
||||
# Deep check: if it's a list-type node, inspect list items
|
||||
if node.get('type') in ['BULLETED_LIST', 'ORDERED_LIST']:
|
||||
list_items = node.get('nodes', [])
|
||||
if list_items:
|
||||
logger.warning(f" List has {len(list_items)} items, checking first LIST_ITEM:")
|
||||
first_item = list_items[0]
|
||||
logger.warning(f" LIST_ITEM keys: {list(first_item.keys())}")
|
||||
# Verify listItemData is NOT present (correct per Wix API spec)
|
||||
if 'listItemData' in first_item:
|
||||
logger.error(f" ❌ LIST_ITEM incorrectly has listItemData!")
|
||||
else:
|
||||
logger.debug(f" ✅ LIST_ITEM correctly has no listItemData")
|
||||
# Check nested PARAGRAPH nodes
|
||||
nested_nodes = first_item.get('nodes', [])
|
||||
if nested_nodes:
|
||||
logger.warning(f" LIST_ITEM has {len(nested_nodes)} nested nodes")
|
||||
for n_idx, n_node in enumerate(nested_nodes[:2]):
|
||||
logger.warning(f" Nested node {n_idx+1}: type={n_node.get('type')}, keys={list(n_node.keys())}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not serialize payload for logging: {e}")
|
||||
|
||||
# Note: All node validation is done by validate_ricos_content() which runs earlier
|
||||
# The recursive validation ensures all required data fields are present at any depth
|
||||
|
||||
# Final deep validation: Serialize and deserialize to catch any JSON-serialization issues
|
||||
# This will raise an error if there are any objects that can't be serialized
|
||||
try:
|
||||
import json
|
||||
test_json = json.dumps(blog_data, ensure_ascii=False)
|
||||
test_parsed = json.loads(test_json)
|
||||
logger.debug("✅ Payload JSON serialization test passed")
|
||||
except (TypeError, ValueError) as e:
|
||||
logger.error(f"❌ Payload JSON serialization failed: {e}")
|
||||
raise ValueError(f"Payload contains non-serializable data: {e}")
|
||||
|
||||
# Final check: Ensure documentStyle and metadata are valid objects (not None, not empty strings)
|
||||
rc = blog_data['draftPost']['richContent']
|
||||
if 'documentStyle' in rc:
|
||||
doc_style = rc['documentStyle']
|
||||
if doc_style is None or doc_style == "":
|
||||
logger.warning("⚠️ documentStyle is None or empty string, removing it")
|
||||
del rc['documentStyle']
|
||||
elif not isinstance(doc_style, dict):
|
||||
logger.warning(f"⚠️ documentStyle is not a dict ({type(doc_style)}), removing it")
|
||||
del rc['documentStyle']
|
||||
|
||||
if 'metadata' in rc:
|
||||
metadata = rc['metadata']
|
||||
if metadata is None or metadata == "":
|
||||
logger.warning("⚠️ metadata is None or empty string, removing it")
|
||||
del rc['metadata']
|
||||
elif not isinstance(metadata, dict):
|
||||
logger.warning(f"⚠️ metadata is not a dict ({type(metadata)}), removing it")
|
||||
del rc['metadata']
|
||||
|
||||
# Check for any None values in critical nested structures
|
||||
def check_none_in_dict(d, path=""):
|
||||
"""Recursively check for None values that shouldn't be there"""
|
||||
issues = []
|
||||
if isinstance(d, dict):
|
||||
for key, value in d.items():
|
||||
current_path = f"{path}.{key}" if path else key
|
||||
if value is None:
|
||||
# Some fields can legitimately be None, but most shouldn't
|
||||
if key not in ['decorations', 'nodeStyle', 'props']:
|
||||
issues.append(current_path)
|
||||
elif isinstance(value, dict):
|
||||
issues.extend(check_none_in_dict(value, current_path))
|
||||
elif isinstance(value, list):
|
||||
for i, item in enumerate(value):
|
||||
if item is None:
|
||||
issues.append(f"{current_path}[{i}]")
|
||||
elif isinstance(item, dict):
|
||||
issues.extend(check_none_in_dict(item, f"{current_path}[{i}]"))
|
||||
return issues
|
||||
|
||||
none_issues = check_none_in_dict(blog_data['draftPost']['richContent'])
|
||||
if none_issues:
|
||||
logger.error(f"❌ Found None values in richContent at: {none_issues[:10]}") # Limit to first 10
|
||||
# Remove None values from critical paths
|
||||
for issue_path in none_issues[:5]: # Fix first 5
|
||||
parts = issue_path.split('.')
|
||||
try:
|
||||
obj = blog_data['draftPost']['richContent']
|
||||
for part in parts[:-1]:
|
||||
if '[' in part:
|
||||
key, idx = part.split('[')
|
||||
idx = int(idx.rstrip(']'))
|
||||
obj = obj[key][idx]
|
||||
else:
|
||||
obj = obj[part]
|
||||
final_key = parts[-1]
|
||||
if '[' in final_key:
|
||||
key, idx = final_key.split('[')
|
||||
idx = int(idx.rstrip(']'))
|
||||
obj[key][idx] = {}
|
||||
else:
|
||||
obj[final_key] = {}
|
||||
logger.warning(f"Fixed None value at {issue_path}")
|
||||
except:
|
||||
pass
|
||||
|
||||
# Log the final payload structure one more time before sending
|
||||
logger.warning(f"📤 Final payload ready - draftPost keys: {list(blog_data['draftPost'].keys())}")
|
||||
logger.warning(f"📤 RichContent nodes count: {len(blog_data['draftPost']['richContent'].get('nodes', []))}")
|
||||
logger.warning(f"📤 RichContent has metadata: {bool(blog_data['draftPost']['richContent'].get('metadata'))}")
|
||||
logger.warning(f"📤 RichContent has documentStyle: {bool(blog_data['draftPost']['richContent'].get('documentStyle'))}")
|
||||
|
||||
# Try sending WITHOUT SEO data first to isolate the issue
|
||||
test_without_seo = False # Disabled - listItemData issue fixed
|
||||
if test_without_seo and 'seoData' in blog_data['draftPost']:
|
||||
logger.warning("🧪 TESTING WITHOUT SEO DATA to isolate issue...")
|
||||
# Clone the payload without SEO data
|
||||
test_payload_no_seo = {
|
||||
'draftPost': {
|
||||
'title': blog_data['draftPost']['title'],
|
||||
'memberId': blog_data['draftPost']['memberId'],
|
||||
'richContent': blog_data['draftPost']['richContent'],
|
||||
'excerpt': blog_data['draftPost'].get('excerpt', '')
|
||||
},
|
||||
'publish': False,
|
||||
'fieldsets': ['URL']
|
||||
}
|
||||
try:
|
||||
logger.warning("🧪 Attempting without SEO data...")
|
||||
test_result = blog_service.create_draft_post(access_token, test_payload_no_seo, extra_headers or None)
|
||||
logger.warning(f"✅ WITHOUT SEO DATA SUCCEEDED! Post ID: {test_result.get('draftPost', {}).get('id')}")
|
||||
logger.error("⚠️⚠️⚠️ ISSUE IS WITH SEO DATA STRUCTURE!")
|
||||
# If this succeeds, don't send the full payload, just return this result
|
||||
return test_result
|
||||
except Exception as e:
|
||||
logger.warning(f"❌ WITHOUT SEO DATA ALSO FAILED: {e}")
|
||||
logger.warning("⚠️ Issue is NOT with SEO data, continuing with full payload...")
|
||||
|
||||
# Try sending with minimal structure first to isolate the issue
|
||||
# Create a test payload with just required fields
|
||||
minimal_test = False # Set to True to test with minimal payload
|
||||
if minimal_test:
|
||||
logger.warning("🧪 TESTING WITH MINIMAL PAYLOAD (title + memberId + simple richContent)")
|
||||
test_payload = {
|
||||
'draftPost': {
|
||||
'title': blog_data['draftPost']['title'],
|
||||
'memberId': blog_data['draftPost']['memberId'],
|
||||
'richContent': {
|
||||
'nodes': [
|
||||
{
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': [
|
||||
{
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': 'Test paragraph',
|
||||
'decorations': []
|
||||
}
|
||||
}
|
||||
],
|
||||
'paragraphData': {}
|
||||
}
|
||||
],
|
||||
'metadata': {'version': 1, 'id': str(uuid.uuid4())},
|
||||
'documentStyle': {}
|
||||
}
|
||||
},
|
||||
'publish': False,
|
||||
'fieldsets': ['URL']
|
||||
}
|
||||
logger.warning("🧪 Attempting minimal payload first...")
|
||||
try:
|
||||
test_result = blog_service.create_draft_post(access_token, test_payload, extra_headers or None)
|
||||
logger.warning(f"✅ MINIMAL PAYLOAD SUCCEEDED! Post ID: {test_result.get('draftPost', {}).get('id')}")
|
||||
logger.warning("⚠️ Issue is with complex content, not basic structure")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ MINIMAL PAYLOAD ALSO FAILED: {e}")
|
||||
logger.error("⚠️ Issue is with basic structure or permissions")
|
||||
|
||||
result = blog_service.create_draft_post(access_token, blog_data, extra_headers or None)
|
||||
|
||||
# Log response
|
||||
draft_post = result.get('draftPost', {})
|
||||
logger.warning(f"✅ Blog post created successfully! Post ID: {draft_post.get('id', 'N/A')}")
|
||||
|
||||
# Check if SEO data was preserved in response
|
||||
if 'seoData' in draft_post:
|
||||
seo_response = draft_post['seoData']
|
||||
logger.warning(f"✅ SEO data confirmed in response: {len(seo_response.get('tags', []))} tags, {len(seo_response.get('settings', {}).get('keywords', []))} keywords")
|
||||
else:
|
||||
logger.warning("⚠️ No seoData in response - it may have been filtered out by Wix API")
|
||||
logger.warning(f"📋 Response fields: {list(draft_post.keys())}")
|
||||
|
||||
return result
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Failed to create blog post: {e}")
|
||||
if hasattr(e, 'response') and e.response is not None:
|
||||
logger.error(f"Response body: {e.response.text}")
|
||||
raise
|
||||
|
||||
@@ -1,58 +1,460 @@
|
||||
import re
|
||||
import uuid
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Parse inline markdown formatting (bold, italic, links) into Ricos text nodes.
|
||||
Returns a list of text nodes with decorations.
|
||||
Handles: **bold**, *italic*, [links](url), `code`, and combinations.
|
||||
"""
|
||||
if not text:
|
||||
return [{
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {'text': '', 'decorations': []}
|
||||
}]
|
||||
|
||||
nodes = []
|
||||
|
||||
# Process text character by character to handle nested/adjacent formatting
|
||||
# This is more robust than regex for complex cases
|
||||
i = 0
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
|
||||
while i < len(text):
|
||||
# Check for bold **text** (must come before single * check)
|
||||
if i < len(text) - 1 and text[i:i+2] == '**':
|
||||
# Save any accumulated text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
current_text = ''
|
||||
|
||||
# Find closing **
|
||||
end_bold = text.find('**', i + 2)
|
||||
if end_bold != -1:
|
||||
bold_text = text[i + 2:end_bold]
|
||||
# Recursively parse the bold text for nested formatting
|
||||
bold_nodes = parse_markdown_inline(bold_text)
|
||||
# Add BOLD decoration to all text nodes within
|
||||
for node in bold_nodes:
|
||||
if node['type'] == 'TEXT':
|
||||
node_decorations = node['textData'].get('decorations', []).copy()
|
||||
if 'BOLD' not in node_decorations:
|
||||
node_decorations.append('BOLD')
|
||||
node['textData']['decorations'] = node_decorations
|
||||
nodes.append(node)
|
||||
i = end_bold + 2
|
||||
continue
|
||||
|
||||
# Check for link [text](url)
|
||||
elif text[i] == '[':
|
||||
# Save any accumulated text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
|
||||
# Find matching ]
|
||||
link_end = text.find(']', i)
|
||||
if link_end != -1 and link_end < len(text) - 1 and text[link_end + 1] == '(':
|
||||
link_text = text[i + 1:link_end]
|
||||
url_start = link_end + 2
|
||||
url_end = text.find(')', url_start)
|
||||
if url_end != -1:
|
||||
url = text[url_start:url_end]
|
||||
# Create link node
|
||||
link_node_id = str(uuid.uuid4())
|
||||
text_node_id = str(uuid.uuid4())
|
||||
link_text_nodes = parse_markdown_inline(link_text)
|
||||
# Wrap link text in LINK node
|
||||
nodes.append({
|
||||
'id': link_node_id,
|
||||
'type': 'LINK',
|
||||
'nodes': link_text_nodes if link_text_nodes else [{
|
||||
'id': text_node_id,
|
||||
'type': 'TEXT',
|
||||
'textData': {'text': link_text, 'decorations': []}
|
||||
}],
|
||||
'linkData': {
|
||||
'link': {
|
||||
'url': url,
|
||||
'target': '_blank'
|
||||
}
|
||||
}
|
||||
})
|
||||
i = url_end + 1
|
||||
continue
|
||||
|
||||
# Check for code `text`
|
||||
elif text[i] == '`':
|
||||
# Save any accumulated text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
|
||||
# Find closing `
|
||||
code_end = text.find('`', i + 1)
|
||||
if code_end != -1:
|
||||
code_text = text[i + 1:code_end]
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': code_text,
|
||||
'decorations': ['CODE']
|
||||
}
|
||||
})
|
||||
i = code_end + 1
|
||||
continue
|
||||
|
||||
# Check for italic *text* (only if not part of **)
|
||||
elif text[i] == '*' and (i == 0 or text[i-1] != '*') and (i == len(text) - 1 or text[i+1] != '*'):
|
||||
# Save any accumulated text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
|
||||
# Find closing * (but not **)
|
||||
italic_end = text.find('*', i + 1)
|
||||
if italic_end != -1:
|
||||
# Make sure it's not part of **
|
||||
if italic_end == len(text) - 1 or text[italic_end + 1] != '*':
|
||||
italic_text = text[i + 1:italic_end]
|
||||
italic_nodes = parse_markdown_inline(italic_text)
|
||||
# Add ITALIC decoration
|
||||
for node in italic_nodes:
|
||||
if node['type'] == 'TEXT':
|
||||
node_decorations = node['textData'].get('decorations', []).copy()
|
||||
if 'ITALIC' not in node_decorations:
|
||||
node_decorations.append('ITALIC')
|
||||
node['textData']['decorations'] = node_decorations
|
||||
nodes.append(node)
|
||||
i = italic_end + 1
|
||||
continue
|
||||
|
||||
# Regular character
|
||||
current_text += text[i]
|
||||
i += 1
|
||||
|
||||
# Add any remaining text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
|
||||
# If no nodes created, return single plain text node
|
||||
if not nodes:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': text,
|
||||
'decorations': []
|
||||
}
|
||||
})
|
||||
|
||||
return nodes
|
||||
|
||||
|
||||
def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert simple markdown-like text into minimal valid Ricos JSON.
|
||||
Convert markdown content into valid Ricos JSON format.
|
||||
Supports headings, paragraphs, lists, bold, italic, links, and images.
|
||||
"""
|
||||
paragraphs = content.split('\n\n')
|
||||
if not content:
|
||||
content = "This is a post from ALwrity."
|
||||
|
||||
nodes = []
|
||||
|
||||
import uuid
|
||||
|
||||
for paragraph in paragraphs:
|
||||
text = paragraph.strip()
|
||||
if not text:
|
||||
lines = content.split('\n')
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
if not line:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
node_id = str(uuid.uuid4())
|
||||
text_node_id = str(uuid.uuid4())
|
||||
|
||||
if text.startswith('#'):
|
||||
level = len(text) - len(text.lstrip('#'))
|
||||
heading_text = text.lstrip('# ').strip()
|
||||
|
||||
# Check for headings
|
||||
if line.startswith('#'):
|
||||
level = len(line) - len(line.lstrip('#'))
|
||||
heading_text = line.lstrip('# ').strip()
|
||||
text_nodes = parse_markdown_inline(heading_text)
|
||||
nodes.append({
|
||||
'id': node_id,
|
||||
'type': 'HEADING',
|
||||
'nodes': [{
|
||||
'id': text_node_id,
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': heading_text,
|
||||
'decorations': []
|
||||
}
|
||||
}],
|
||||
'headingData': { 'level': min(level, 6) }
|
||||
'nodes': text_nodes,
|
||||
'headingData': {'level': min(level, 6)}
|
||||
})
|
||||
else:
|
||||
nodes.append({
|
||||
'id': node_id,
|
||||
i += 1
|
||||
|
||||
# Check for blockquotes
|
||||
elif line.startswith('>'):
|
||||
quote_text = line.lstrip('> ').strip()
|
||||
# Continue reading consecutive blockquote lines
|
||||
quote_lines = [quote_text]
|
||||
i += 1
|
||||
while i < len(lines) and lines[i].strip().startswith('>'):
|
||||
quote_lines.append(lines[i].strip().lstrip('> ').strip())
|
||||
i += 1
|
||||
quote_content = ' '.join(quote_lines)
|
||||
text_nodes = parse_markdown_inline(quote_content)
|
||||
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within BLOCKQUOTE
|
||||
paragraph_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': [{
|
||||
'id': text_node_id,
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': text,
|
||||
'decorations': []
|
||||
}
|
||||
}],
|
||||
'nodes': text_nodes,
|
||||
'paragraphData': {}
|
||||
})
|
||||
|
||||
}
|
||||
blockquote_node = {
|
||||
'id': node_id,
|
||||
'type': 'BLOCKQUOTE',
|
||||
'nodes': [paragraph_node],
|
||||
'blockquoteData': {}
|
||||
}
|
||||
nodes.append(blockquote_node)
|
||||
|
||||
# Check for unordered lists (handle both '- ' and '* ' markers)
|
||||
elif (line.startswith('- ') or line.startswith('* ') or
|
||||
(line.startswith('-') and len(line) > 1 and line[1] != '-') or
|
||||
(line.startswith('*') and len(line) > 1 and line[1] != '*')):
|
||||
list_items = []
|
||||
list_marker = '- ' if line.startswith('-') else '* '
|
||||
# Process list items
|
||||
while i < len(lines):
|
||||
current_line = lines[i].strip()
|
||||
# Check if this is a list item
|
||||
is_list_item = (current_line.startswith('- ') or current_line.startswith('* ') or
|
||||
(current_line.startswith('-') and len(current_line) > 1 and current_line[1] != '-') or
|
||||
(current_line.startswith('*') and len(current_line) > 1 and current_line[1] != '*'))
|
||||
|
||||
if not is_list_item:
|
||||
break
|
||||
|
||||
# Extract item text (handle both '- ' and '-item' formats)
|
||||
if current_line.startswith('- ') or current_line.startswith('* '):
|
||||
item_text = current_line[2:].strip()
|
||||
elif current_line.startswith('-'):
|
||||
item_text = current_line[1:].strip()
|
||||
elif current_line.startswith('*'):
|
||||
item_text = current_line[1:].strip()
|
||||
else:
|
||||
item_text = current_line
|
||||
|
||||
list_items.append(item_text)
|
||||
i += 1
|
||||
|
||||
# Check for nested items (indented with 2+ spaces)
|
||||
while i < len(lines):
|
||||
next_line = lines[i]
|
||||
# Must be indented and be a list marker
|
||||
if next_line.startswith(' ') and (next_line.strip().startswith('- ') or
|
||||
next_line.strip().startswith('* ') or
|
||||
(next_line.strip().startswith('-') and len(next_line.strip()) > 1) or
|
||||
(next_line.strip().startswith('*') and len(next_line.strip()) > 1)):
|
||||
nested_text = next_line.strip()
|
||||
if nested_text.startswith('- ') or nested_text.startswith('* '):
|
||||
nested_text = nested_text[2:].strip()
|
||||
elif nested_text.startswith('-'):
|
||||
nested_text = nested_text[1:].strip()
|
||||
elif nested_text.startswith('*'):
|
||||
nested_text = nested_text[1:].strip()
|
||||
list_items.append(nested_text)
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
|
||||
# Build list items with proper formatting
|
||||
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within LIST_ITEM
|
||||
# NOTE: LIST_ITEM nodes do NOT have a data field per Wix API schema
|
||||
# Wix API: omit empty data objects, don't include them as {}
|
||||
list_node_items = []
|
||||
for item_text in list_items:
|
||||
item_node_id = str(uuid.uuid4())
|
||||
text_nodes = parse_markdown_inline(item_text)
|
||||
paragraph_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': text_nodes,
|
||||
'paragraphData': {}
|
||||
}
|
||||
list_item_node = {
|
||||
'id': item_node_id,
|
||||
'type': 'LIST_ITEM',
|
||||
'nodes': [paragraph_node]
|
||||
}
|
||||
list_node_items.append(list_item_node)
|
||||
|
||||
bulleted_list_node = {
|
||||
'id': node_id,
|
||||
'type': 'BULLETED_LIST',
|
||||
'nodes': list_node_items,
|
||||
'bulletedListData': {}
|
||||
}
|
||||
nodes.append(bulleted_list_node)
|
||||
|
||||
# Check for ordered lists
|
||||
elif re.match(r'^\d+\.\s+', line):
|
||||
list_items = []
|
||||
while i < len(lines) and re.match(r'^\d+\.\s+', lines[i].strip()):
|
||||
item_text = re.sub(r'^\d+\.\s+', '', lines[i].strip())
|
||||
list_items.append(item_text)
|
||||
i += 1
|
||||
# Check for nested items
|
||||
while i < len(lines) and lines[i].strip().startswith(' ') and re.match(r'^\s+\d+\.\s+', lines[i].strip()):
|
||||
nested_text = re.sub(r'^\s+\d+\.\s+', '', lines[i].strip())
|
||||
list_items.append(nested_text)
|
||||
i += 1
|
||||
|
||||
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within LIST_ITEM
|
||||
# NOTE: LIST_ITEM nodes do NOT have a data field per Wix API schema
|
||||
# Wix API: omit empty data objects, don't include them as {}
|
||||
list_node_items = []
|
||||
for item_text in list_items:
|
||||
item_node_id = str(uuid.uuid4())
|
||||
text_nodes = parse_markdown_inline(item_text)
|
||||
paragraph_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': text_nodes,
|
||||
'paragraphData': {}
|
||||
}
|
||||
list_item_node = {
|
||||
'id': item_node_id,
|
||||
'type': 'LIST_ITEM',
|
||||
'nodes': [paragraph_node]
|
||||
}
|
||||
list_node_items.append(list_item_node)
|
||||
|
||||
ordered_list_node = {
|
||||
'id': node_id,
|
||||
'type': 'ORDERED_LIST',
|
||||
'nodes': list_node_items,
|
||||
'orderedListData': {}
|
||||
}
|
||||
nodes.append(ordered_list_node)
|
||||
|
||||
# Check for images
|
||||
elif line.startswith('!['):
|
||||
img_match = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', line)
|
||||
if img_match:
|
||||
alt_text = img_match.group(1)
|
||||
img_url = img_match.group(2)
|
||||
nodes.append({
|
||||
'id': node_id,
|
||||
'type': 'IMAGE',
|
||||
'nodes': [],
|
||||
'imageData': {
|
||||
'image': {
|
||||
'src': {'url': img_url},
|
||||
'altText': alt_text
|
||||
},
|
||||
'containerData': {
|
||||
'alignment': 'CENTER',
|
||||
'width': {'size': 'CONTENT'}
|
||||
}
|
||||
}
|
||||
})
|
||||
i += 1
|
||||
|
||||
# Regular paragraph
|
||||
else:
|
||||
# Collect consecutive non-empty lines as paragraph content
|
||||
para_lines = [line]
|
||||
i += 1
|
||||
while i < len(lines):
|
||||
next_line = lines[i].strip()
|
||||
if not next_line:
|
||||
break
|
||||
# Stop if next line is a special markdown element
|
||||
if (next_line.startswith('#') or
|
||||
next_line.startswith('- ') or
|
||||
next_line.startswith('* ') or
|
||||
next_line.startswith('>') or
|
||||
next_line.startswith('![') or
|
||||
re.match(r'^\d+\.\s+', next_line)):
|
||||
break
|
||||
para_lines.append(next_line)
|
||||
i += 1
|
||||
|
||||
para_text = ' '.join(para_lines)
|
||||
text_nodes = parse_markdown_inline(para_text)
|
||||
|
||||
# Only add paragraph if there are text nodes
|
||||
if text_nodes:
|
||||
paragraph_node = {
|
||||
'id': node_id,
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': text_nodes,
|
||||
'paragraphData': {}
|
||||
}
|
||||
nodes.append(paragraph_node)
|
||||
|
||||
# Ensure at least one node exists
|
||||
# Wix API: omit empty data objects, don't include them as {}
|
||||
if not nodes:
|
||||
fallback_paragraph = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': [{
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': content[:500] if content else "This is a post from ALwrity.",
|
||||
'decorations': []
|
||||
}
|
||||
}],
|
||||
'paragraphData': {}
|
||||
}
|
||||
nodes.append(fallback_paragraph)
|
||||
|
||||
return {
|
||||
'type': 'DOCUMENT',
|
||||
'id': str(uuid.uuid4()),
|
||||
'nodes': nodes,
|
||||
'metadata': { 'version': 1, 'id': str(uuid.uuid4()) },
|
||||
'metadata': {'version': 1, 'id': str(uuid.uuid4())},
|
||||
'documentStyle': {
|
||||
'paragraph': { 'decorations': [], 'nodeStyle': {}, 'lineHeight': '1.5' }
|
||||
'paragraph': {'decorations': [], 'nodeStyle': {}, 'lineHeight': '1.5'}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,12 @@ class WixMediaService:
|
||||
self.base_url = base_url
|
||||
|
||||
def import_image(self, access_token: str, image_url: str, display_name: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Import external image to Wix Media Manager.
|
||||
|
||||
Official endpoint: https://www.wixapis.com/site-media/v1/files/import
|
||||
Reference: https://dev.wix.com/docs/rest/assets/media/media-manager/files/import-file
|
||||
"""
|
||||
headers = {
|
||||
'Authorization': f'Bearer {access_token}',
|
||||
'Content-Type': 'application/json',
|
||||
@@ -16,7 +22,9 @@ class WixMediaService:
|
||||
'mediaType': 'IMAGE',
|
||||
'displayName': display_name,
|
||||
}
|
||||
response = requests.post(f"{self.base_url}/media/v1/files/import", headers=headers, json=payload)
|
||||
# Correct endpoint per Wix API documentation
|
||||
endpoint = f"{self.base_url}/site-media/v1/files/import"
|
||||
response = requests.post(endpoint, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
277
backend/services/integrations/wix/ricos_converter.py
Normal file
277
backend/services/integrations/wix/ricos_converter.py
Normal file
@@ -0,0 +1,277 @@
|
||||
"""
|
||||
Ricos Document Converter for Wix
|
||||
|
||||
Converts markdown content to Wix Ricos JSON format using either:
|
||||
1. Wix's official Ricos Documents API (preferred)
|
||||
2. Custom markdown parser (fallback)
|
||||
"""
|
||||
|
||||
import json
|
||||
import requests
|
||||
import jwt
|
||||
from typing import Dict, Any, Optional
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def markdown_to_html(markdown_content: str) -> str:
|
||||
"""
|
||||
Convert markdown content to HTML.
|
||||
Uses a simple markdown parser for basic conversion.
|
||||
|
||||
Args:
|
||||
markdown_content: Markdown content to convert
|
||||
|
||||
Returns:
|
||||
HTML string
|
||||
"""
|
||||
try:
|
||||
# Try using markdown library if available
|
||||
import markdown
|
||||
html = markdown.markdown(markdown_content, extensions=['fenced_code', 'tables'])
|
||||
return html
|
||||
except ImportError:
|
||||
# Fallback: Simple regex-based conversion for basic markdown
|
||||
logger.warning("markdown library not available, using basic markdown-to-HTML conversion")
|
||||
import re
|
||||
|
||||
if not markdown_content or not markdown_content.strip():
|
||||
return "<p>This is a post from ALwrity.</p>"
|
||||
|
||||
lines = markdown_content.split('\n')
|
||||
result = []
|
||||
in_list = False
|
||||
list_type = None # 'ul' or 'ol'
|
||||
in_code_block = False
|
||||
code_block_content = []
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
# Handle code blocks first
|
||||
if line.startswith('```'):
|
||||
if not in_code_block:
|
||||
in_code_block = True
|
||||
code_block_content = []
|
||||
i += 1
|
||||
continue
|
||||
else:
|
||||
in_code_block = False
|
||||
result.append(f'<pre><code>{"\n".join(code_block_content)}</code></pre>')
|
||||
code_block_content = []
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if in_code_block:
|
||||
code_block_content.append(lines[i])
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Close any open lists
|
||||
if in_list and not (line.startswith('- ') or line.startswith('* ') or re.match(r'^\d+\.\s+', line)):
|
||||
result.append(f'</{list_type}>')
|
||||
in_list = False
|
||||
list_type = None
|
||||
|
||||
if not line:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Headers
|
||||
if line.startswith('###'):
|
||||
result.append(f'<h3>{line[3:].strip()}</h3>')
|
||||
elif line.startswith('##'):
|
||||
result.append(f'<h2>{line[2:].strip()}</h2>')
|
||||
elif line.startswith('#'):
|
||||
result.append(f'<h1>{line[1:].strip()}</h1>')
|
||||
# Lists
|
||||
elif line.startswith('- ') or line.startswith('* '):
|
||||
if not in_list or list_type != 'ul':
|
||||
if in_list:
|
||||
result.append(f'</{list_type}>')
|
||||
result.append('<ul>')
|
||||
in_list = True
|
||||
list_type = 'ul'
|
||||
# Process inline formatting in list item
|
||||
item_text = line[2:].strip()
|
||||
item_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', item_text)
|
||||
item_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', item_text)
|
||||
result.append(f'<li>{item_text}</li>')
|
||||
elif re.match(r'^\d+\.\s+', line):
|
||||
if not in_list or list_type != 'ol':
|
||||
if in_list:
|
||||
result.append(f'</{list_type}>')
|
||||
result.append('<ol>')
|
||||
in_list = True
|
||||
list_type = 'ol'
|
||||
# Process inline formatting in list item
|
||||
match = re.match(r'^\d+\.\s+(.*)', line)
|
||||
if match:
|
||||
item_text = match.group(1)
|
||||
item_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', item_text)
|
||||
item_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', item_text)
|
||||
result.append(f'<li>{item_text}</li>')
|
||||
# Blockquotes
|
||||
elif line.startswith('>'):
|
||||
quote_text = line[1:].strip()
|
||||
quote_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', quote_text)
|
||||
quote_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', quote_text)
|
||||
result.append(f'<blockquote><p>{quote_text}</p></blockquote>')
|
||||
# Regular paragraphs
|
||||
else:
|
||||
para_text = line
|
||||
# Process inline formatting
|
||||
para_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', para_text)
|
||||
para_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', para_text)
|
||||
para_text = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', r'<a href="\2">\1</a>', para_text)
|
||||
para_text = re.sub(r'`([^`]+)`', r'<code>\1</code>', para_text)
|
||||
result.append(f'<p>{para_text}</p>')
|
||||
|
||||
i += 1
|
||||
|
||||
# Close any open lists
|
||||
if in_list:
|
||||
result.append(f'</{list_type}>')
|
||||
|
||||
# Ensure we have at least one paragraph
|
||||
if not result:
|
||||
result.append('<p>This is a post from ALwrity.</p>')
|
||||
|
||||
html = '\n'.join(result)
|
||||
|
||||
logger.debug(f"Converted {len(markdown_content)} chars markdown to {len(html)} chars HTML")
|
||||
return html
|
||||
|
||||
|
||||
def convert_via_wix_api(markdown_content: str, access_token: str, base_url: str = 'https://www.wixapis.com') -> Dict[str, Any]:
|
||||
"""
|
||||
Convert markdown to Ricos using Wix's official Ricos Documents API.
|
||||
Uses HTML format for better reliability (per Wix documentation, HTML is fully supported).
|
||||
|
||||
Reference: https://dev.wix.com/docs/api-reference/assets/rich-content/ricos-documents/convert-to-ricos-document
|
||||
|
||||
Args:
|
||||
markdown_content: Markdown content to convert (will be converted to HTML)
|
||||
access_token: Wix access token
|
||||
base_url: Wix API base URL (default: https://www.wixapis.com)
|
||||
|
||||
Returns:
|
||||
Ricos JSON document
|
||||
"""
|
||||
# Validate content is not empty
|
||||
markdown_stripped = markdown_content.strip() if markdown_content else ""
|
||||
if not markdown_stripped:
|
||||
logger.error("Markdown content is empty or whitespace-only")
|
||||
raise ValueError("Content cannot be empty for Wix Ricos API conversion")
|
||||
|
||||
logger.debug(f"Converting markdown to HTML: input_length={len(markdown_stripped)} chars")
|
||||
|
||||
# Convert markdown to HTML for better reliability with Wix API
|
||||
# HTML format is more structured and less prone to parsing errors
|
||||
html_content = markdown_to_html(markdown_stripped)
|
||||
|
||||
# Validate HTML content is not empty - CRITICAL for Wix API
|
||||
html_stripped = html_content.strip() if html_content else ""
|
||||
if not html_stripped or len(html_stripped) == 0:
|
||||
logger.error(f"HTML conversion produced empty content! Markdown length: {len(markdown_stripped)}")
|
||||
logger.error(f"Markdown sample: {markdown_stripped[:500]}...")
|
||||
logger.error(f"HTML result: '{html_content}' (type: {type(html_content)})")
|
||||
# Fallback: use a minimal valid HTML if conversion failed
|
||||
html_content = "<p>Content from ALwrity blog writer.</p>"
|
||||
logger.warning("Using fallback HTML due to empty conversion result")
|
||||
else:
|
||||
html_content = html_stripped
|
||||
|
||||
logger.debug(f"✅ Converted markdown to HTML: {len(html_content)} chars, preview: {html_content[:200]}...")
|
||||
|
||||
headers = {
|
||||
'Authorization': f'Bearer {access_token}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
# Add wix-site-id if available from token
|
||||
try:
|
||||
token_str = str(access_token)
|
||||
if token_str and token_str.startswith('OauthNG.JWS.'):
|
||||
jwt_part = token_str[12:]
|
||||
payload = jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
|
||||
data_payload = payload.get('data', {})
|
||||
if isinstance(data_payload, str):
|
||||
try:
|
||||
data_payload = json.loads(data_payload)
|
||||
except:
|
||||
pass
|
||||
instance_data = data_payload.get('instance', {})
|
||||
meta_site_id = instance_data.get('metaSiteId')
|
||||
if isinstance(meta_site_id, str) and meta_site_id:
|
||||
headers['wix-site-id'] = meta_site_id
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract site ID from token: {e}")
|
||||
|
||||
# Call Wix Ricos Documents API: Convert to Ricos Document
|
||||
# Official endpoint: https://www.wixapis.com/ricos/v1/ricos-document/convert/to-ricos
|
||||
# Reference: https://dev.wix.com/docs/rest/assets/rich-content/ricos-documents/convert-to-ricos-document
|
||||
endpoint = f"{base_url}/ricos/v1/ricos-document/convert/to-ricos"
|
||||
|
||||
# Ensure HTML content is not empty or just whitespace
|
||||
html_stripped = html_content.strip() if html_content else ""
|
||||
if not html_stripped or len(html_stripped) == 0:
|
||||
logger.error(f"HTML content is empty after conversion. Markdown length: {len(markdown_content)}")
|
||||
logger.error(f"Markdown preview (first 500 chars): {markdown_content[:500] if markdown_content else 'N/A'}")
|
||||
raise ValueError(f"HTML content cannot be empty. Original markdown had {len(markdown_content)} characters.")
|
||||
|
||||
# Payload structure per Wix API: html/markdown/plainText field at root, optional plugins
|
||||
payload = {
|
||||
'html': html_stripped, # Direct field, not nested in options
|
||||
'plugins': [] # Optional: empty array uses default plugins
|
||||
}
|
||||
|
||||
logger.warning(f"📤 Sending to Wix Ricos API: html_length={len(payload['html'])}, plugins_count={len(payload['plugins'])}")
|
||||
logger.debug(f"HTML preview (first 300 chars): {html_stripped[:300]}...")
|
||||
|
||||
try:
|
||||
# Log the exact payload being sent (for debugging)
|
||||
logger.warning(f"📤 Wix Ricos API Request:")
|
||||
logger.warning(f" Endpoint: {endpoint}")
|
||||
logger.warning(f" Payload keys: {list(payload.keys())}")
|
||||
logger.warning(f" HTML length: {len(payload.get('html', ''))}")
|
||||
logger.warning(f" Plugins: {payload.get('plugins', [])}")
|
||||
logger.debug(f" Full payload (first 500 chars of HTML): {str(payload)[:500]}")
|
||||
|
||||
response = requests.post(
|
||||
endpoint,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Extract the ricos document from response
|
||||
# Response structure: { "document": { "nodes": [...], "metadata": {...}, "documentStyle": {...} } }
|
||||
ricos_document = result.get('document')
|
||||
if not ricos_document:
|
||||
# Fallback: try other possible response fields
|
||||
ricos_document = result.get('ricosDocument') or result.get('ricos') or result
|
||||
|
||||
if not ricos_document:
|
||||
logger.error(f"Unexpected response structure from Wix API: {list(result.keys())}")
|
||||
logger.error(f"Response: {result}")
|
||||
raise ValueError("Wix API did not return a valid Ricos document")
|
||||
|
||||
logger.warning(f"✅ Successfully converted HTML to Ricos via Wix API: {len(ricos_document.get('nodes', []))} nodes")
|
||||
return ricos_document
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"❌ Wix Ricos API conversion failed: {e}")
|
||||
if hasattr(e, 'response') and e.response is not None:
|
||||
logger.error(f" Response status: {e.response.status_code}")
|
||||
logger.error(f" Response headers: {dict(e.response.headers)}")
|
||||
try:
|
||||
error_body = e.response.json()
|
||||
logger.error(f" Response JSON: {error_body}")
|
||||
except:
|
||||
logger.error(f" Response text: {e.response.text}")
|
||||
logger.error(f" Request payload was: {json.dumps(payload, indent=2)[:1000]}...") # First 1000 chars
|
||||
raise
|
||||
|
||||
300
backend/services/integrations/wix/seo.py
Normal file
300
backend/services/integrations/wix/seo.py
Normal file
@@ -0,0 +1,300 @@
|
||||
"""
|
||||
SEO Data Builder for Wix Blog Posts
|
||||
|
||||
Builds Wix-compatible seoData objects from ALwrity SEO metadata.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def build_seo_data(seo_metadata: Dict[str, Any], default_title: str = None) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Build Wix seoData object from our SEO metadata format.
|
||||
|
||||
Args:
|
||||
seo_metadata: SEO metadata dict with fields like:
|
||||
- seo_title: SEO optimized title
|
||||
- meta_description: Meta description
|
||||
- focus_keyword: Main keyword
|
||||
- blog_tags: List of tag strings (for keywords)
|
||||
- open_graph: Open Graph data dict
|
||||
- canonical_url: Canonical URL
|
||||
default_title: Fallback title if seo_title not provided
|
||||
|
||||
Returns:
|
||||
Wix seoData object with settings.keywords and tags array, or None if empty
|
||||
"""
|
||||
seo_data = {
|
||||
'settings': {
|
||||
'keywords': []
|
||||
},
|
||||
'tags': []
|
||||
}
|
||||
|
||||
# Build keywords array
|
||||
keywords_list = []
|
||||
|
||||
# Add main keyword (focus_keyword) if provided
|
||||
focus_keyword = seo_metadata.get('focus_keyword')
|
||||
if focus_keyword:
|
||||
keywords_list.append({
|
||||
'term': str(focus_keyword),
|
||||
'isMain': True
|
||||
})
|
||||
|
||||
# Add additional keywords from blog_tags or other sources
|
||||
blog_tags = seo_metadata.get('blog_tags', [])
|
||||
if isinstance(blog_tags, list):
|
||||
for tag in blog_tags:
|
||||
tag_str = str(tag).strip()
|
||||
if tag_str and tag_str != focus_keyword: # Don't duplicate main keyword
|
||||
keywords_list.append({
|
||||
'term': tag_str,
|
||||
'isMain': False
|
||||
})
|
||||
|
||||
# Add social hashtags as keywords if available
|
||||
social_hashtags = seo_metadata.get('social_hashtags', [])
|
||||
if isinstance(social_hashtags, list):
|
||||
for hashtag in social_hashtags:
|
||||
# Remove # if present
|
||||
hashtag_str = str(hashtag).strip().lstrip('#')
|
||||
if hashtag_str and hashtag_str != focus_keyword:
|
||||
keywords_list.append({
|
||||
'term': hashtag_str,
|
||||
'isMain': False
|
||||
})
|
||||
|
||||
seo_data['settings']['keywords'] = keywords_list
|
||||
|
||||
# Validate keywords list is not empty (or ensure at least one keyword exists)
|
||||
if not seo_data['settings']['keywords']:
|
||||
logger.warning("No keywords found in SEO metadata, adding empty keywords array")
|
||||
|
||||
# Build tags array (meta tags, Open Graph, etc.)
|
||||
tags_list = []
|
||||
|
||||
# Meta description
|
||||
meta_description = seo_metadata.get('meta_description')
|
||||
if meta_description:
|
||||
tags_list.append({
|
||||
'type': 'meta',
|
||||
'props': {
|
||||
'name': 'description',
|
||||
'content': str(meta_description)
|
||||
},
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
# SEO title - 'title' type uses 'children' field, not 'props.content'
|
||||
seo_title = seo_metadata.get('seo_title') or default_title
|
||||
if seo_title:
|
||||
tags_list.append({
|
||||
'type': 'title',
|
||||
'children': str(seo_title), # Title tags use 'children', not 'props.content'
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
# Open Graph tags
|
||||
open_graph = seo_metadata.get('open_graph', {})
|
||||
if isinstance(open_graph, dict):
|
||||
# OG Title
|
||||
og_title = open_graph.get('title') or seo_title
|
||||
if og_title:
|
||||
tags_list.append({
|
||||
'type': 'meta',
|
||||
'props': {
|
||||
'property': 'og:title',
|
||||
'content': str(og_title)
|
||||
},
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
# OG Description
|
||||
og_description = open_graph.get('description') or meta_description
|
||||
if og_description:
|
||||
tags_list.append({
|
||||
'type': 'meta',
|
||||
'props': {
|
||||
'property': 'og:description',
|
||||
'content': str(og_description)
|
||||
},
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
# OG Image
|
||||
og_image = open_graph.get('image')
|
||||
if og_image:
|
||||
# Skip base64 images for OG tags (Wix needs URLs)
|
||||
if isinstance(og_image, str) and (og_image.startswith('http://') or og_image.startswith('https://')):
|
||||
tags_list.append({
|
||||
'type': 'meta',
|
||||
'props': {
|
||||
'property': 'og:image',
|
||||
'content': og_image
|
||||
},
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
# OG Type
|
||||
tags_list.append({
|
||||
'type': 'meta',
|
||||
'props': {
|
||||
'property': 'og:type',
|
||||
'content': 'article'
|
||||
},
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
# OG URL (canonical or provided URL)
|
||||
og_url = open_graph.get('url') or seo_metadata.get('canonical_url')
|
||||
if og_url:
|
||||
tags_list.append({
|
||||
'type': 'meta',
|
||||
'props': {
|
||||
'property': 'og:url',
|
||||
'content': str(og_url)
|
||||
},
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
# Twitter Card tags
|
||||
twitter_card = seo_metadata.get('twitter_card', {})
|
||||
if isinstance(twitter_card, dict):
|
||||
twitter_title = twitter_card.get('title') or seo_title
|
||||
if twitter_title:
|
||||
tags_list.append({
|
||||
'type': 'meta',
|
||||
'props': {
|
||||
'name': 'twitter:title',
|
||||
'content': str(twitter_title)
|
||||
},
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
twitter_description = twitter_card.get('description') or meta_description
|
||||
if twitter_description:
|
||||
tags_list.append({
|
||||
'type': 'meta',
|
||||
'props': {
|
||||
'name': 'twitter:description',
|
||||
'content': str(twitter_description)
|
||||
},
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
twitter_image = twitter_card.get('image')
|
||||
if twitter_image and isinstance(twitter_image, str) and (twitter_image.startswith('http://') or twitter_image.startswith('https://')):
|
||||
tags_list.append({
|
||||
'type': 'meta',
|
||||
'props': {
|
||||
'name': 'twitter:image',
|
||||
'content': twitter_image
|
||||
},
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
twitter_card_type = twitter_card.get('card', 'summary_large_image')
|
||||
tags_list.append({
|
||||
'type': 'meta',
|
||||
'props': {
|
||||
'name': 'twitter:card',
|
||||
'content': str(twitter_card_type)
|
||||
},
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
# Canonical URL as link tag
|
||||
canonical_url = seo_metadata.get('canonical_url')
|
||||
if canonical_url:
|
||||
tags_list.append({
|
||||
'type': 'link',
|
||||
'props': {
|
||||
'rel': 'canonical',
|
||||
'href': str(canonical_url)
|
||||
},
|
||||
'custom': True,
|
||||
'disabled': False
|
||||
})
|
||||
|
||||
# Validate all tags have required fields before adding
|
||||
validated_tags = []
|
||||
for tag in tags_list:
|
||||
if not isinstance(tag, dict):
|
||||
logger.warning(f"Skipping invalid tag (not a dict): {type(tag)}")
|
||||
continue
|
||||
# Ensure required fields exist
|
||||
if 'type' not in tag:
|
||||
logger.warning("Skipping tag missing 'type' field")
|
||||
continue
|
||||
# Ensure 'custom' and 'disabled' fields exist
|
||||
if 'custom' not in tag:
|
||||
tag['custom'] = True
|
||||
if 'disabled' not in tag:
|
||||
tag['disabled'] = False
|
||||
# Validate tag structure based on type
|
||||
tag_type = tag.get('type')
|
||||
if tag_type == 'title':
|
||||
if 'children' not in tag or not tag['children']:
|
||||
logger.warning("Skipping title tag with missing/invalid 'children' field")
|
||||
continue
|
||||
elif tag_type == 'meta':
|
||||
if 'props' not in tag or not isinstance(tag['props'], dict):
|
||||
logger.warning("Skipping meta tag with missing/invalid 'props' field")
|
||||
continue
|
||||
if 'name' not in tag['props'] and 'property' not in tag['props']:
|
||||
logger.warning("Skipping meta tag with missing 'name' or 'property' in props")
|
||||
continue
|
||||
# Ensure 'content' exists and is not empty
|
||||
if 'content' not in tag['props'] or not str(tag['props'].get('content', '')).strip():
|
||||
logger.warning(f"Skipping meta tag with missing/empty 'content': {tag.get('props', {})}")
|
||||
continue
|
||||
elif tag_type == 'link':
|
||||
if 'props' not in tag or not isinstance(tag['props'], dict):
|
||||
logger.warning("Skipping link tag with missing/invalid 'props' field")
|
||||
continue
|
||||
# Ensure 'href' exists and is not empty for link tags
|
||||
if 'href' not in tag['props'] or not str(tag['props'].get('href', '')).strip():
|
||||
logger.warning(f"Skipping link tag with missing/empty 'href': {tag.get('props', {})}")
|
||||
continue
|
||||
validated_tags.append(tag)
|
||||
|
||||
seo_data['tags'] = validated_tags
|
||||
|
||||
# Final validation: ensure seoData structure is complete
|
||||
if not isinstance(seo_data['settings'], dict):
|
||||
logger.error("seoData.settings is not a dict, creating default")
|
||||
seo_data['settings'] = {'keywords': []}
|
||||
if not isinstance(seo_data['settings'].get('keywords'), list):
|
||||
logger.error("seoData.settings.keywords is not a list, creating empty list")
|
||||
seo_data['settings']['keywords'] = []
|
||||
if not isinstance(seo_data['tags'], list):
|
||||
logger.error("seoData.tags is not a list, creating empty list")
|
||||
seo_data['tags'] = []
|
||||
|
||||
# CRITICAL: Per Wix API patterns, omit empty structures instead of including them as {}
|
||||
# If keywords is empty, omit settings entirely
|
||||
if not seo_data['settings'].get('keywords'):
|
||||
logger.debug("No keywords found, omitting settings from seoData")
|
||||
seo_data.pop('settings', None)
|
||||
|
||||
logger.debug(f"Built SEO data: {len(validated_tags)} tags, {len(keywords_list)} keywords")
|
||||
|
||||
# Only return seoData if we have at least keywords or tags
|
||||
if keywords_list or validated_tags:
|
||||
return seo_data
|
||||
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user