Research component integration, Copilotkit implementation, SEO copilotkit implementation, Wix SEO metadata complete, Wix SEO metadata review

This commit is contained in:
ajaysi
2025-11-03 16:01:44 +05:30
parent de4328175d
commit e69107b07c
94 changed files with 9748 additions and 1565 deletions

View File

@@ -2,4 +2,14 @@
Wix integration modular services package.
"""
from services.integrations.wix.seo import build_seo_data
from services.integrations.wix.ricos_converter import markdown_to_html, convert_via_wix_api
from services.integrations.wix.blog_publisher import create_blog_post
__all__ = [
'build_seo_data',
'markdown_to_html',
'convert_via_wix_api',
'create_blog_post',
]

View File

@@ -20,6 +20,40 @@ class WixBlogService:
return h
def create_draft_post(self, access_token: str, payload: Dict[str, Any], extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
# Log the exact payload being sent for debugging
import json
logger.warning(f"📤 Sending to Wix Blog API:")
logger.warning(f" Endpoint: {self.base_url}/blog/v3/draft-posts")
logger.warning(f" Payload top-level keys: {list(payload.keys())}")
if 'draftPost' in payload:
dp = payload['draftPost']
logger.warning(f" draftPost keys: {list(dp.keys())}")
if 'richContent' in dp:
rc = dp['richContent']
logger.warning(f" richContent keys: {list(rc.keys()) if isinstance(rc, dict) else 'N/A'}")
if isinstance(rc, dict) and 'nodes' in rc:
nodes = rc['nodes']
logger.warning(f" richContent.nodes count: {len(nodes) if isinstance(nodes, list) else 'N/A'}")
# Inspect first LIST_ITEM node if any
for i, node in enumerate(nodes[:10]):
if isinstance(node, dict) and node.get('type') == 'LIST_ITEM':
logger.warning(f" Found LIST_ITEM at index {i}:")
logger.warning(f" Keys: {list(node.keys())}")
logger.warning(f" Has listItemData: {'listItemData' in node}")
if 'listItemData' in node:
logger.warning(f" listItemData type: {type(node['listItemData'])}, value: {node['listItemData']}")
if 'nodes' in node:
nested = node['nodes']
logger.warning(f" Nested nodes count: {len(nested) if isinstance(nested, list) else 'N/A'}")
for j, n_node in enumerate(nested[:3]):
if isinstance(n_node, dict):
logger.warning(f" Nested node {j}: type={n_node.get('type')}, keys={list(n_node.keys())}")
if n_node.get('type') == 'PARAGRAPH' and 'paragraphData' in n_node:
logger.warning(f" paragraphData type: {type(n_node['paragraphData'])}, value: {n_node['paragraphData']}")
break # Only inspect first LIST_ITEM
logger.warning(f" Full Payload JSON (first 8000 chars):\n{json.dumps(payload, indent=2, ensure_ascii=False)[:8000]}...")
response = requests.post(f"{self.base_url}/blog/v3/draft-posts", headers=self.headers(access_token, extra_headers), json=payload)
response.raise_for_status()
return response.json()

View File

@@ -0,0 +1,716 @@
"""
Blog Post Publisher for Wix
Handles blog post creation, validation, and publishing to Wix.
"""
import json
import uuid
import requests
import jwt
from typing import Dict, Any, Optional, List
from loguru import logger
from services.integrations.wix.blog import WixBlogService
from services.integrations.wix.content import convert_content_to_ricos
from services.integrations.wix.ricos_converter import convert_via_wix_api
from services.integrations.wix.seo import build_seo_data
def validate_ricos_content(ricos_content: Dict[str, Any]) -> Dict[str, Any]:
"""
Validate and normalize Ricos document structure.
Args:
ricos_content: Ricos document dict
Returns:
Validated and normalized Ricos document
"""
# Validate Ricos document structure before using
if not ricos_content or not isinstance(ricos_content, dict):
logger.error("Invalid Ricos content - not a dict")
raise ValueError("Failed to convert content to valid Ricos format")
if 'type' not in ricos_content:
ricos_content['type'] = 'DOCUMENT'
logger.debug("Added missing richContent type 'DOCUMENT'")
if ricos_content.get('type') != 'DOCUMENT':
logger.warning(f"richContent type expected 'DOCUMENT', got {ricos_content.get('type')}, correcting")
ricos_content['type'] = 'DOCUMENT'
if 'id' not in ricos_content or not isinstance(ricos_content.get('id'), str):
ricos_content['id'] = str(uuid.uuid4())
logger.debug("Added missing richContent id")
if 'nodes' not in ricos_content:
logger.warning("Ricos document missing 'nodes' field, adding empty nodes array")
ricos_content['nodes'] = []
logger.debug(f"Ricos document structure: nodes={len(ricos_content.get('nodes', []))}")
# Validate richContent is a proper object with nodes array
# Per Wix API: richContent must be a RichContent object with nodes array
if not isinstance(ricos_content, dict):
raise ValueError(f"richContent must be a dict object, got {type(ricos_content)}")
# Ensure nodes array exists and is valid
if 'nodes' not in ricos_content:
logger.warning("richContent missing 'nodes', adding empty array")
ricos_content['nodes'] = []
if not isinstance(ricos_content['nodes'], list):
raise ValueError(f"richContent.nodes must be a list, got {type(ricos_content['nodes'])}")
# Recursive function to validate and fix nodes at any depth
def validate_node_recursive(node: Dict[str, Any], path: str = "root") -> None:
"""
Recursively validate a node and all its nested children, ensuring:
1. All required data fields exist for each node type
2. All 'nodes' arrays are proper lists
3. No None values in critical fields
"""
if not isinstance(node, dict):
logger.error(f"{path}: Node is not a dict: {type(node)}")
return
# Ensure type and id exist
if 'type' not in node:
logger.error(f"{path}: Missing 'type' field - REQUIRED")
node['type'] = 'PARAGRAPH' # Default fallback
if 'id' not in node:
node['id'] = str(uuid.uuid4())
logger.debug(f"{path}: Added missing 'id'")
node_type = node.get('type')
# CRITICAL: Per Wix API schema, data fields like paragraphData, bulletedListData, etc.
# are OPTIONAL and should be OMITTED entirely when empty, not included as {}
# Only validate fields that have required properties
# Special handling: Remove listItemData if it exists (not in Wix API schema)
if node_type == 'LIST_ITEM' and 'listItemData' in node:
logger.debug(f"{path}: Removing incorrect listItemData field from LIST_ITEM")
del node['listItemData']
# Only validate HEADING nodes - they require headingData with level property
if node_type == 'HEADING':
if 'headingData' not in node or not isinstance(node.get('headingData'), dict):
logger.warning(f"{path} (HEADING): Missing headingData, adding default level 1")
node['headingData'] = {'level': 1}
elif 'level' not in node['headingData']:
logger.warning(f"{path} (HEADING): Missing level in headingData, adding default")
node['headingData']['level'] = 1
# TEXT nodes must have textData
if node_type == 'TEXT':
if 'textData' not in node or not isinstance(node.get('textData'), dict):
logger.error(f"{path} (TEXT): Missing/invalid textData - node will be problematic")
node['textData'] = {'text': '', 'decorations': []}
# LINK and IMAGE nodes must have their data fields
if node_type == 'LINK' and ('linkData' not in node or not isinstance(node.get('linkData'), dict)):
logger.error(f"{path} (LINK): Missing/invalid linkData - node will be problematic")
if node_type == 'IMAGE' and ('imageData' not in node or not isinstance(node.get('imageData'), dict)):
logger.error(f"{path} (IMAGE): Missing/invalid imageData - node will be problematic")
# Remove None values from any data fields that exist (Wix API rejects None)
for data_field in ['headingData', 'paragraphData', 'blockquoteData', 'bulletedListData',
'orderedListData', 'textData', 'linkData', 'imageData']:
if data_field in node and isinstance(node[data_field], dict):
data_value = node[data_field]
keys_to_remove = [k for k, v in data_value.items() if v is None]
if keys_to_remove:
logger.debug(f"{path} ({node_type}): Removing None values from {data_field}: {keys_to_remove}")
for key in keys_to_remove:
del data_value[key]
# Ensure 'nodes' field exists for container nodes
container_types = ['HEADING', 'PARAGRAPH', 'BLOCKQUOTE', 'LIST_ITEM', 'LINK',
'BULLETED_LIST', 'ORDERED_LIST']
if node_type in container_types:
if 'nodes' not in node:
logger.warning(f"{path} ({node_type}): Missing 'nodes' field, adding empty array")
node['nodes'] = []
elif not isinstance(node['nodes'], list):
logger.error(f"{path} ({node_type}): Invalid 'nodes' field (not a list), fixing")
node['nodes'] = []
# Recursively validate all nested nodes
for nested_idx, nested_node in enumerate(node['nodes']):
nested_path = f"{path}.nodes[{nested_idx}]"
validate_node_recursive(nested_node, nested_path)
# Validate all top-level nodes recursively
for idx, node in enumerate(ricos_content['nodes']):
validate_node_recursive(node, f"nodes[{idx}]")
# Ensure documentStyle exists and is a dict (required by Wix API when provided)
if 'metadata' not in ricos_content or not isinstance(ricos_content.get('metadata'), dict):
ricos_content['metadata'] = {'version': 1, 'id': str(uuid.uuid4())}
logger.debug("Added default metadata to richContent")
else:
ricos_content['metadata'].setdefault('version', 1)
ricos_content['metadata'].setdefault('id', str(uuid.uuid4()))
if 'documentStyle' not in ricos_content or not isinstance(ricos_content.get('documentStyle'), dict):
ricos_content['documentStyle'] = {
'paragraph': {
'decorations': [],
'nodeStyle': {},
'lineHeight': '1.5'
}
}
logger.debug("Added default documentStyle to richContent")
logger.debug(f"✅ Validated richContent: {len(ricos_content['nodes'])} nodes, has_metadata={bool(ricos_content.get('metadata'))}, has_documentStyle={bool(ricos_content.get('documentStyle'))}")
return ricos_content
def validate_payload_no_none(obj, path=""):
"""Recursively validate that no None values exist in the payload"""
if obj is None:
raise ValueError(f"Found None value at path: {path}")
if isinstance(obj, dict):
for key, value in obj.items():
validate_payload_no_none(value, f"{path}.{key}" if path else key)
elif isinstance(obj, list):
for idx, item in enumerate(obj):
validate_payload_no_none(item, f"{path}[{idx}]" if path else f"[{idx}]")
def create_blog_post(
blog_service: WixBlogService,
access_token: str,
title: str,
content: str,
member_id: str,
cover_image_url: str = None,
category_ids: List[str] = None,
tag_ids: List[str] = None,
publish: bool = True,
seo_metadata: Dict[str, Any] = None,
import_image_func = None,
lookup_categories_func = None,
lookup_tags_func = None,
base_url: str = 'https://www.wixapis.com'
) -> Dict[str, Any]:
"""
Create and optionally publish a blog post on Wix
Args:
blog_service: WixBlogService instance
access_token: Valid access token
title: Blog post title
content: Blog post content (markdown)
member_id: Required for third-party apps - the member ID of the post author
cover_image_url: Optional cover image URL
category_ids: Optional list of category IDs or names
tag_ids: Optional list of tag IDs or names
publish: Whether to publish immediately or save as draft
seo_metadata: Optional SEO metadata dict
import_image_func: Function to import images (optional)
lookup_categories_func: Function to lookup/create categories (optional)
lookup_tags_func: Function to lookup/create tags (optional)
base_url: Wix API base URL
Returns:
Created blog post information
"""
if not member_id:
raise ValueError("memberId is required for third-party apps creating blog posts")
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json'
}
# Build valid Ricos rich content
# Ensure content is not empty
if not content or not content.strip():
content = "This is a post from ALwrity."
logger.warning("⚠️ Content was empty, using default text")
# Try Wix API first (more reliable), fall back to custom parser
ricos_content = None
try:
logger.warning("🔄 Attempting to convert markdown to Ricos via Wix API...")
ricos_content = convert_via_wix_api(content, access_token, base_url)
logger.warning(f"✅ Wix API conversion successful. Ricos document has {len(ricos_content.get('nodes', []))} nodes")
except Exception as e:
logger.warning(f"⚠️ Wix Ricos API conversion failed: {e}. Falling back to custom parser...")
# Fall back to custom parser
ricos_content = convert_content_to_ricos(content, None)
logger.warning(f"✅ Custom parser conversion complete. Ricos document has {len(ricos_content.get('nodes', []))} nodes")
# Validate Ricos content
ricos_content = validate_ricos_content(ricos_content)
# Minimal payload per Wix docs: title, memberId, and richContent
# CRITICAL: Only include fields that have valid values (no None, no empty strings for required fields)
blog_data = {
'draftPost': {
'title': str(title).strip() if title else "Untitled",
'memberId': str(member_id).strip(), # Required for third-party apps (validated above)
'richContent': ricos_content, # Must be a valid Ricos document object
},
'publish': bool(publish),
'fieldsets': ['URL'] # Simplified fieldsets
}
# Add excerpt only if content exists and is not empty (avoid None or empty strings)
excerpt = (content or '').strip()[:200] if content else None
if excerpt and len(excerpt) > 0:
blog_data['draftPost']['excerpt'] = str(excerpt)
# Add cover image if provided
if cover_image_url and import_image_func:
try:
media_id = import_image_func(access_token, cover_image_url, f'Cover: {title}')
# Ensure media_id is a string and not None
if media_id and isinstance(media_id, str):
blog_data['draftPost']['media'] = {
'wixMedia': {
'image': {'id': str(media_id).strip()}
},
'displayed': True,
'custom': True
}
else:
logger.warning(f"Invalid media_id type or value: {type(media_id)}, skipping media")
except Exception as e:
logger.warning(f"Failed to import cover image: {e}")
# Handle categories - can be either IDs (list of strings) or names (for lookup)
category_ids_to_use = None
if category_ids:
# Check if these are IDs (UUIDs) or names
if isinstance(category_ids, list) and len(category_ids) > 0:
# Assume IDs if first item looks like UUID (has hyphens and is long)
first_item = str(category_ids[0])
if '-' in first_item and len(first_item) > 30:
category_ids_to_use = category_ids
elif lookup_categories_func:
# These are names, need to lookup/create
extra_headers = {}
if 'wix-site-id' in headers:
extra_headers['wix-site-id'] = headers['wix-site-id']
category_ids_to_use = lookup_categories_func(
access_token, category_ids, extra_headers if extra_headers else None
)
# Handle tags - can be either IDs (list of strings) or names (for lookup)
tag_ids_to_use = None
if tag_ids:
# Check if these are IDs (UUIDs) or names
if isinstance(tag_ids, list) and len(tag_ids) > 0:
# Assume IDs if first item looks like UUID (has hyphens and is long)
first_item = str(tag_ids[0])
if '-' in first_item and len(first_item) > 30:
tag_ids_to_use = tag_ids
elif lookup_tags_func:
# These are names, need to lookup/create
extra_headers = {}
if 'wix-site-id' in headers:
extra_headers['wix-site-id'] = headers['wix-site-id']
tag_ids_to_use = lookup_tags_func(
access_token, tag_ids, extra_headers if extra_headers else None
)
# Add categories if we have IDs (must be non-empty list of strings)
# CRITICAL: Wix API rejects empty arrays or arrays with None/empty strings
if category_ids_to_use and isinstance(category_ids_to_use, list) and len(category_ids_to_use) > 0:
# Filter out None, empty strings, and ensure all are valid UUID strings
valid_category_ids = [str(cid).strip() for cid in category_ids_to_use if cid and str(cid).strip()]
if valid_category_ids:
blog_data['draftPost']['categoryIds'] = valid_category_ids
logger.debug(f"Added {len(valid_category_ids)} category IDs")
else:
logger.warning("All category IDs were invalid, not including categoryIds in payload")
# Add tags if we have IDs (must be non-empty list of strings)
# CRITICAL: Wix API rejects empty arrays or arrays with None/empty strings
if tag_ids_to_use and isinstance(tag_ids_to_use, list) and len(tag_ids_to_use) > 0:
# Filter out None, empty strings, and ensure all are valid UUID strings
valid_tag_ids = [str(tid).strip() for tid in tag_ids_to_use if tid and str(tid).strip()]
if valid_tag_ids:
blog_data['draftPost']['tagIds'] = valid_tag_ids
logger.debug(f"Added {len(valid_tag_ids)} tag IDs")
else:
logger.warning("All tag IDs were invalid, not including tagIds in payload")
# Build SEO data from metadata if provided
# TESTING: Skip SEO data temporarily to confirm richContent fix
test_skip_seo = True
if test_skip_seo:
logger.warning("🧪 TESTING: Skipping SEO data to isolate richContent vs seoData issue")
seo_data = None
elif seo_metadata:
logger.warning(f"📊 Building SEO data from metadata. Keys: {list(seo_metadata.keys())}")
seo_data = build_seo_data(seo_metadata, title)
if seo_data:
# Log detailed SEO structure
logger.warning(f"📋 SEO data built: {len(seo_data.get('tags', []))} tags, {len(seo_data.get('settings', {}).get('keywords', []))} keywords")
# Log each SEO tag for debugging (key ones only to avoid too much output)
if seo_data.get('tags'):
for idx, tag in enumerate(seo_data['tags'][:3]): # First 3 tags only
tag_type = tag.get('type')
if tag_type == 'title':
logger.warning(f" SEO tag {idx+1}: type={tag_type}, children={str(tag.get('children', ''))[:50]}...")
else:
props = tag.get('props', {})
content_preview = str(props.get('content', props.get('href', props.get('name', ''))))[:50]
logger.warning(f" SEO tag {idx+1}: type={tag_type}, props={list(props.keys())}, content={content_preview}...")
if len(seo_data['tags']) > 3:
logger.warning(f" ... and {len(seo_data['tags']) - 3} more SEO tags")
blog_data['draftPost']['seoData'] = seo_data
logger.warning(f"✅ Added seoData to blog post with {len(seo_data.get('tags', []))} tags")
else:
logger.warning("⚠️ SEO data was empty after building - check build_seo_data function")
# Add SEO slug if provided (separate field from seoData)
if seo_metadata and seo_metadata.get('url_slug'):
blog_data['draftPost']['seoSlug'] = str(seo_metadata.get('url_slug')).strip()
logger.warning(f"✅ Added SEO slug: {blog_data['draftPost']['seoSlug']}")
if test_skip_seo:
logger.warning("⚠️ SEO data skipped for testing - will add back once richContent is confirmed working")
elif not seo_metadata:
logger.warning("⚠️ No SEO metadata provided to create_blog_post")
# Log the payload structure for debugging (without sensitive data)
logger.warning(f"📝 Creating blog post with title: '{title}'")
logger.warning(f"📋 Draft post fields: {list(blog_data['draftPost'].keys())}")
# Detailed SEO logging
if 'seoData' in blog_data['draftPost']:
seo_data_debug = blog_data['draftPost']['seoData']
logger.warning(f"📊 SEO data in payload: {len(seo_data_debug.get('tags', []))} tags, {len(seo_data_debug.get('settings', {}).get('keywords', []))} keywords")
# Log sample SEO tags (first 2 only to avoid too much output)
if seo_data_debug.get('tags'):
logger.warning("📋 SEO Tags sample:")
for i, tag in enumerate(seo_data_debug['tags'][:2]): # First 2 tags
logger.warning(f" Tag {i+1}: type={tag.get('type')}, custom={tag.get('custom')}, disabled={tag.get('disabled')}")
if len(seo_data_debug['tags']) > 2:
logger.warning(f" ... and {len(seo_data_debug['tags']) - 2} more tags")
if seo_data_debug.get('settings', {}).get('keywords'):
keywords_list = [k.get('term') for k in seo_data_debug['settings']['keywords'][:3]]
logger.warning(f"🔑 Keywords: {keywords_list}")
# Log FULL seoData structure for debugging
import json
try:
seo_json = json.dumps(seo_data_debug, indent=2, ensure_ascii=False)
logger.warning(f"📄 FULL seoData JSON:\n{seo_json[:2000]}...") # First 2000 chars
except Exception as e:
logger.error(f"Failed to serialize seoData: {e}")
else:
logger.warning("⚠️ No seoData in draft post payload!")
try:
# Add wix-site-id header if we can extract it from token
extra_headers = {}
try:
token_str = str(access_token)
if token_str and token_str.startswith('OauthNG.JWS.'):
jwt_part = token_str[12:]
payload = jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
data_payload = payload.get('data', {})
if isinstance(data_payload, str):
try:
data_payload = json.loads(data_payload)
except:
pass
instance_data = data_payload.get('instance', {})
meta_site_id = instance_data.get('metaSiteId')
if isinstance(meta_site_id, str) and meta_site_id:
extra_headers['wix-site-id'] = meta_site_id
headers['wix-site-id'] = meta_site_id
except Exception as e:
logger.debug(f"Could not extract site ID from token: {e}")
# Make the API call
logger.warning(f"🚀 Calling Wix API: POST /blog/v3/draft-posts")
logger.warning(f"📦 Payload: title='{blog_data['draftPost'].get('title')}', has_seoData={'seoData' in blog_data['draftPost']}, has_richContent={'richContent' in blog_data['draftPost']}")
# Validate payload structure before sending
draft_post = blog_data.get('draftPost', {})
if not isinstance(draft_post, dict):
raise ValueError("draftPost must be a dict object")
# Validate richContent structure
if 'richContent' in draft_post:
rc = draft_post['richContent']
if not isinstance(rc, dict):
raise ValueError(f"richContent must be a dict, got {type(rc)}")
if 'nodes' not in rc:
raise ValueError("richContent missing 'nodes' field")
if not isinstance(rc['nodes'], list):
raise ValueError(f"richContent.nodes must be a list, got {type(rc['nodes'])}")
logger.debug(f"✅ richContent validation passed: {len(rc.get('nodes', []))} nodes")
# Validate seoData structure if present
if 'seoData' in draft_post:
seo = draft_post['seoData']
if not isinstance(seo, dict):
raise ValueError(f"seoData must be a dict, got {type(seo)}")
if 'tags' in seo and not isinstance(seo['tags'], list):
raise ValueError(f"seoData.tags must be a list, got {type(seo.get('tags'))}")
if 'settings' in seo and not isinstance(seo['settings'], dict):
raise ValueError(f"seoData.settings must be a dict, got {type(seo.get('settings'))}")
logger.debug(f"✅ seoData validation passed: {len(seo.get('tags', []))} tags")
# Final validation: Ensure no None values in any nested objects
# Wix API rejects None values and expects proper types
try:
validate_payload_no_none(blog_data, "blog_data")
logger.debug("✅ Payload validation passed: No None values found")
except ValueError as e:
logger.error(f"❌ Payload validation failed: {e}")
raise
# Log full payload structure for debugging (sanitized)
logger.warning(f"📦 Full payload structure validation:")
logger.warning(f" - draftPost type: {type(draft_post)}")
logger.warning(f" - draftPost keys: {list(draft_post.keys())}")
logger.warning(f" - richContent type: {type(draft_post.get('richContent'))}")
if 'richContent' in draft_post:
rc = draft_post['richContent']
logger.warning(f" - richContent keys: {list(rc.keys()) if isinstance(rc, dict) else 'N/A'}")
logger.warning(f" - richContent.nodes type: {type(rc.get('nodes'))}, count: {len(rc.get('nodes', []))}")
logger.warning(f" - richContent.metadata type: {type(rc.get('metadata'))}")
logger.warning(f" - richContent.documentStyle type: {type(rc.get('documentStyle'))}")
logger.warning(f" - seoData type: {type(draft_post.get('seoData'))}")
if 'seoData' in draft_post:
seo = draft_post['seoData']
logger.warning(f" - seoData keys: {list(seo.keys()) if isinstance(seo, dict) else 'N/A'}")
logger.warning(f" - seoData.tags type: {type(seo.get('tags'))}, count: {len(seo.get('tags', []))}")
logger.warning(f" - seoData.settings type: {type(seo.get('settings'))}")
if 'categoryIds' in draft_post:
logger.warning(f" - categoryIds type: {type(draft_post.get('categoryIds'))}, count: {len(draft_post.get('categoryIds', []))}")
if 'tagIds' in draft_post:
logger.warning(f" - tagIds type: {type(draft_post.get('tagIds'))}, count: {len(draft_post.get('tagIds', []))}")
# Log a sample of the payload JSON to see exact structure (first 2000 chars)
try:
import json
payload_json = json.dumps(blog_data, indent=2, ensure_ascii=False)
logger.warning(f"📄 Payload JSON preview (first 3000 chars):\n{payload_json[:3000]}...")
# Also log a deep structure inspection of richContent.nodes (first few nodes)
if 'richContent' in blog_data['draftPost']:
nodes = blog_data['draftPost']['richContent'].get('nodes', [])
if nodes:
logger.warning(f"🔍 Inspecting first 5 richContent.nodes:")
for i, node in enumerate(nodes[:5]):
logger.warning(f" Node {i+1}: type={node.get('type')}, keys={list(node.keys())}")
# Check for any None values in node
for key, value in node.items():
if value is None:
logger.error(f" ⚠️ Node {i+1}.{key} is None!")
elif isinstance(value, dict):
for k, v in value.items():
if v is None:
logger.error(f" ⚠️ Node {i+1}.{key}.{k} is None!")
# Deep check: if it's a list-type node, inspect list items
if node.get('type') in ['BULLETED_LIST', 'ORDERED_LIST']:
list_items = node.get('nodes', [])
if list_items:
logger.warning(f" List has {len(list_items)} items, checking first LIST_ITEM:")
first_item = list_items[0]
logger.warning(f" LIST_ITEM keys: {list(first_item.keys())}")
# Verify listItemData is NOT present (correct per Wix API spec)
if 'listItemData' in first_item:
logger.error(f" ❌ LIST_ITEM incorrectly has listItemData!")
else:
logger.debug(f" ✅ LIST_ITEM correctly has no listItemData")
# Check nested PARAGRAPH nodes
nested_nodes = first_item.get('nodes', [])
if nested_nodes:
logger.warning(f" LIST_ITEM has {len(nested_nodes)} nested nodes")
for n_idx, n_node in enumerate(nested_nodes[:2]):
logger.warning(f" Nested node {n_idx+1}: type={n_node.get('type')}, keys={list(n_node.keys())}")
except Exception as e:
logger.warning(f"Could not serialize payload for logging: {e}")
# Note: All node validation is done by validate_ricos_content() which runs earlier
# The recursive validation ensures all required data fields are present at any depth
# Final deep validation: Serialize and deserialize to catch any JSON-serialization issues
# This will raise an error if there are any objects that can't be serialized
try:
import json
test_json = json.dumps(blog_data, ensure_ascii=False)
test_parsed = json.loads(test_json)
logger.debug("✅ Payload JSON serialization test passed")
except (TypeError, ValueError) as e:
logger.error(f"❌ Payload JSON serialization failed: {e}")
raise ValueError(f"Payload contains non-serializable data: {e}")
# Final check: Ensure documentStyle and metadata are valid objects (not None, not empty strings)
rc = blog_data['draftPost']['richContent']
if 'documentStyle' in rc:
doc_style = rc['documentStyle']
if doc_style is None or doc_style == "":
logger.warning("⚠️ documentStyle is None or empty string, removing it")
del rc['documentStyle']
elif not isinstance(doc_style, dict):
logger.warning(f"⚠️ documentStyle is not a dict ({type(doc_style)}), removing it")
del rc['documentStyle']
if 'metadata' in rc:
metadata = rc['metadata']
if metadata is None or metadata == "":
logger.warning("⚠️ metadata is None or empty string, removing it")
del rc['metadata']
elif not isinstance(metadata, dict):
logger.warning(f"⚠️ metadata is not a dict ({type(metadata)}), removing it")
del rc['metadata']
# Check for any None values in critical nested structures
def check_none_in_dict(d, path=""):
"""Recursively check for None values that shouldn't be there"""
issues = []
if isinstance(d, dict):
for key, value in d.items():
current_path = f"{path}.{key}" if path else key
if value is None:
# Some fields can legitimately be None, but most shouldn't
if key not in ['decorations', 'nodeStyle', 'props']:
issues.append(current_path)
elif isinstance(value, dict):
issues.extend(check_none_in_dict(value, current_path))
elif isinstance(value, list):
for i, item in enumerate(value):
if item is None:
issues.append(f"{current_path}[{i}]")
elif isinstance(item, dict):
issues.extend(check_none_in_dict(item, f"{current_path}[{i}]"))
return issues
none_issues = check_none_in_dict(blog_data['draftPost']['richContent'])
if none_issues:
logger.error(f"❌ Found None values in richContent at: {none_issues[:10]}") # Limit to first 10
# Remove None values from critical paths
for issue_path in none_issues[:5]: # Fix first 5
parts = issue_path.split('.')
try:
obj = blog_data['draftPost']['richContent']
for part in parts[:-1]:
if '[' in part:
key, idx = part.split('[')
idx = int(idx.rstrip(']'))
obj = obj[key][idx]
else:
obj = obj[part]
final_key = parts[-1]
if '[' in final_key:
key, idx = final_key.split('[')
idx = int(idx.rstrip(']'))
obj[key][idx] = {}
else:
obj[final_key] = {}
logger.warning(f"Fixed None value at {issue_path}")
except:
pass
# Log the final payload structure one more time before sending
logger.warning(f"📤 Final payload ready - draftPost keys: {list(blog_data['draftPost'].keys())}")
logger.warning(f"📤 RichContent nodes count: {len(blog_data['draftPost']['richContent'].get('nodes', []))}")
logger.warning(f"📤 RichContent has metadata: {bool(blog_data['draftPost']['richContent'].get('metadata'))}")
logger.warning(f"📤 RichContent has documentStyle: {bool(blog_data['draftPost']['richContent'].get('documentStyle'))}")
# Try sending WITHOUT SEO data first to isolate the issue
test_without_seo = False # Disabled - listItemData issue fixed
if test_without_seo and 'seoData' in blog_data['draftPost']:
logger.warning("🧪 TESTING WITHOUT SEO DATA to isolate issue...")
# Clone the payload without SEO data
test_payload_no_seo = {
'draftPost': {
'title': blog_data['draftPost']['title'],
'memberId': blog_data['draftPost']['memberId'],
'richContent': blog_data['draftPost']['richContent'],
'excerpt': blog_data['draftPost'].get('excerpt', '')
},
'publish': False,
'fieldsets': ['URL']
}
try:
logger.warning("🧪 Attempting without SEO data...")
test_result = blog_service.create_draft_post(access_token, test_payload_no_seo, extra_headers or None)
logger.warning(f"✅ WITHOUT SEO DATA SUCCEEDED! Post ID: {test_result.get('draftPost', {}).get('id')}")
logger.error("⚠️⚠️⚠️ ISSUE IS WITH SEO DATA STRUCTURE!")
# If this succeeds, don't send the full payload, just return this result
return test_result
except Exception as e:
logger.warning(f"❌ WITHOUT SEO DATA ALSO FAILED: {e}")
logger.warning("⚠️ Issue is NOT with SEO data, continuing with full payload...")
# Try sending with minimal structure first to isolate the issue
# Create a test payload with just required fields
minimal_test = False # Set to True to test with minimal payload
if minimal_test:
logger.warning("🧪 TESTING WITH MINIMAL PAYLOAD (title + memberId + simple richContent)")
test_payload = {
'draftPost': {
'title': blog_data['draftPost']['title'],
'memberId': blog_data['draftPost']['memberId'],
'richContent': {
'nodes': [
{
'id': str(uuid.uuid4()),
'type': 'PARAGRAPH',
'nodes': [
{
'id': str(uuid.uuid4()),
'type': 'TEXT',
'textData': {
'text': 'Test paragraph',
'decorations': []
}
}
],
'paragraphData': {}
}
],
'metadata': {'version': 1, 'id': str(uuid.uuid4())},
'documentStyle': {}
}
},
'publish': False,
'fieldsets': ['URL']
}
logger.warning("🧪 Attempting minimal payload first...")
try:
test_result = blog_service.create_draft_post(access_token, test_payload, extra_headers or None)
logger.warning(f"✅ MINIMAL PAYLOAD SUCCEEDED! Post ID: {test_result.get('draftPost', {}).get('id')}")
logger.warning("⚠️ Issue is with complex content, not basic structure")
except Exception as e:
logger.error(f"❌ MINIMAL PAYLOAD ALSO FAILED: {e}")
logger.error("⚠️ Issue is with basic structure or permissions")
result = blog_service.create_draft_post(access_token, blog_data, extra_headers or None)
# Log response
draft_post = result.get('draftPost', {})
logger.warning(f"✅ Blog post created successfully! Post ID: {draft_post.get('id', 'N/A')}")
# Check if SEO data was preserved in response
if 'seoData' in draft_post:
seo_response = draft_post['seoData']
logger.warning(f"✅ SEO data confirmed in response: {len(seo_response.get('tags', []))} tags, {len(seo_response.get('settings', {}).get('keywords', []))} keywords")
else:
logger.warning("⚠️ No seoData in response - it may have been filtered out by Wix API")
logger.warning(f"📋 Response fields: {list(draft_post.keys())}")
return result
except requests.RequestException as e:
logger.error(f"Failed to create blog post: {e}")
if hasattr(e, 'response') and e.response is not None:
logger.error(f"Response body: {e.response.text}")
raise

View File

@@ -1,58 +1,460 @@
import re
import uuid
from typing import Any, Dict, List
def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
"""
Parse inline markdown formatting (bold, italic, links) into Ricos text nodes.
Returns a list of text nodes with decorations.
Handles: **bold**, *italic*, [links](url), `code`, and combinations.
"""
if not text:
return [{
'id': str(uuid.uuid4()),
'type': 'TEXT',
'textData': {'text': '', 'decorations': []}
}]
nodes = []
# Process text character by character to handle nested/adjacent formatting
# This is more robust than regex for complex cases
i = 0
current_text = ''
current_decorations = []
while i < len(text):
# Check for bold **text** (must come before single * check)
if i < len(text) - 1 and text[i:i+2] == '**':
# Save any accumulated text
if current_text:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'textData': {
'text': current_text,
'decorations': current_decorations.copy()
}
})
current_text = ''
# Find closing **
end_bold = text.find('**', i + 2)
if end_bold != -1:
bold_text = text[i + 2:end_bold]
# Recursively parse the bold text for nested formatting
bold_nodes = parse_markdown_inline(bold_text)
# Add BOLD decoration to all text nodes within
for node in bold_nodes:
if node['type'] == 'TEXT':
node_decorations = node['textData'].get('decorations', []).copy()
if 'BOLD' not in node_decorations:
node_decorations.append('BOLD')
node['textData']['decorations'] = node_decorations
nodes.append(node)
i = end_bold + 2
continue
# Check for link [text](url)
elif text[i] == '[':
# Save any accumulated text
if current_text:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'textData': {
'text': current_text,
'decorations': current_decorations.copy()
}
})
current_text = ''
current_decorations = []
# Find matching ]
link_end = text.find(']', i)
if link_end != -1 and link_end < len(text) - 1 and text[link_end + 1] == '(':
link_text = text[i + 1:link_end]
url_start = link_end + 2
url_end = text.find(')', url_start)
if url_end != -1:
url = text[url_start:url_end]
# Create link node
link_node_id = str(uuid.uuid4())
text_node_id = str(uuid.uuid4())
link_text_nodes = parse_markdown_inline(link_text)
# Wrap link text in LINK node
nodes.append({
'id': link_node_id,
'type': 'LINK',
'nodes': link_text_nodes if link_text_nodes else [{
'id': text_node_id,
'type': 'TEXT',
'textData': {'text': link_text, 'decorations': []}
}],
'linkData': {
'link': {
'url': url,
'target': '_blank'
}
}
})
i = url_end + 1
continue
# Check for code `text`
elif text[i] == '`':
# Save any accumulated text
if current_text:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'textData': {
'text': current_text,
'decorations': current_decorations.copy()
}
})
current_text = ''
current_decorations = []
# Find closing `
code_end = text.find('`', i + 1)
if code_end != -1:
code_text = text[i + 1:code_end]
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'textData': {
'text': code_text,
'decorations': ['CODE']
}
})
i = code_end + 1
continue
# Check for italic *text* (only if not part of **)
elif text[i] == '*' and (i == 0 or text[i-1] != '*') and (i == len(text) - 1 or text[i+1] != '*'):
# Save any accumulated text
if current_text:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'textData': {
'text': current_text,
'decorations': current_decorations.copy()
}
})
current_text = ''
current_decorations = []
# Find closing * (but not **)
italic_end = text.find('*', i + 1)
if italic_end != -1:
# Make sure it's not part of **
if italic_end == len(text) - 1 or text[italic_end + 1] != '*':
italic_text = text[i + 1:italic_end]
italic_nodes = parse_markdown_inline(italic_text)
# Add ITALIC decoration
for node in italic_nodes:
if node['type'] == 'TEXT':
node_decorations = node['textData'].get('decorations', []).copy()
if 'ITALIC' not in node_decorations:
node_decorations.append('ITALIC')
node['textData']['decorations'] = node_decorations
nodes.append(node)
i = italic_end + 1
continue
# Regular character
current_text += text[i]
i += 1
# Add any remaining text
if current_text:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'textData': {
'text': current_text,
'decorations': current_decorations.copy()
}
})
# If no nodes created, return single plain text node
if not nodes:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'textData': {
'text': text,
'decorations': []
}
})
return nodes
def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str, Any]:
"""
Convert simple markdown-like text into minimal valid Ricos JSON.
Convert markdown content into valid Ricos JSON format.
Supports headings, paragraphs, lists, bold, italic, links, and images.
"""
paragraphs = content.split('\n\n')
if not content:
content = "This is a post from ALwrity."
nodes = []
import uuid
for paragraph in paragraphs:
text = paragraph.strip()
if not text:
lines = content.split('\n')
i = 0
while i < len(lines):
line = lines[i].strip()
if not line:
i += 1
continue
node_id = str(uuid.uuid4())
text_node_id = str(uuid.uuid4())
if text.startswith('#'):
level = len(text) - len(text.lstrip('#'))
heading_text = text.lstrip('# ').strip()
# Check for headings
if line.startswith('#'):
level = len(line) - len(line.lstrip('#'))
heading_text = line.lstrip('# ').strip()
text_nodes = parse_markdown_inline(heading_text)
nodes.append({
'id': node_id,
'type': 'HEADING',
'nodes': [{
'id': text_node_id,
'type': 'TEXT',
'textData': {
'text': heading_text,
'decorations': []
}
}],
'headingData': { 'level': min(level, 6) }
'nodes': text_nodes,
'headingData': {'level': min(level, 6)}
})
else:
nodes.append({
'id': node_id,
i += 1
# Check for blockquotes
elif line.startswith('>'):
quote_text = line.lstrip('> ').strip()
# Continue reading consecutive blockquote lines
quote_lines = [quote_text]
i += 1
while i < len(lines) and lines[i].strip().startswith('>'):
quote_lines.append(lines[i].strip().lstrip('> ').strip())
i += 1
quote_content = ' '.join(quote_lines)
text_nodes = parse_markdown_inline(quote_content)
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within BLOCKQUOTE
paragraph_node = {
'id': str(uuid.uuid4()),
'type': 'PARAGRAPH',
'nodes': [{
'id': text_node_id,
'type': 'TEXT',
'textData': {
'text': text,
'decorations': []
}
}],
'nodes': text_nodes,
'paragraphData': {}
})
}
blockquote_node = {
'id': node_id,
'type': 'BLOCKQUOTE',
'nodes': [paragraph_node],
'blockquoteData': {}
}
nodes.append(blockquote_node)
# Check for unordered lists (handle both '- ' and '* ' markers)
elif (line.startswith('- ') or line.startswith('* ') or
(line.startswith('-') and len(line) > 1 and line[1] != '-') or
(line.startswith('*') and len(line) > 1 and line[1] != '*')):
list_items = []
list_marker = '- ' if line.startswith('-') else '* '
# Process list items
while i < len(lines):
current_line = lines[i].strip()
# Check if this is a list item
is_list_item = (current_line.startswith('- ') or current_line.startswith('* ') or
(current_line.startswith('-') and len(current_line) > 1 and current_line[1] != '-') or
(current_line.startswith('*') and len(current_line) > 1 and current_line[1] != '*'))
if not is_list_item:
break
# Extract item text (handle both '- ' and '-item' formats)
if current_line.startswith('- ') or current_line.startswith('* '):
item_text = current_line[2:].strip()
elif current_line.startswith('-'):
item_text = current_line[1:].strip()
elif current_line.startswith('*'):
item_text = current_line[1:].strip()
else:
item_text = current_line
list_items.append(item_text)
i += 1
# Check for nested items (indented with 2+ spaces)
while i < len(lines):
next_line = lines[i]
# Must be indented and be a list marker
if next_line.startswith(' ') and (next_line.strip().startswith('- ') or
next_line.strip().startswith('* ') or
(next_line.strip().startswith('-') and len(next_line.strip()) > 1) or
(next_line.strip().startswith('*') and len(next_line.strip()) > 1)):
nested_text = next_line.strip()
if nested_text.startswith('- ') or nested_text.startswith('* '):
nested_text = nested_text[2:].strip()
elif nested_text.startswith('-'):
nested_text = nested_text[1:].strip()
elif nested_text.startswith('*'):
nested_text = nested_text[1:].strip()
list_items.append(nested_text)
i += 1
else:
break
# Build list items with proper formatting
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within LIST_ITEM
# NOTE: LIST_ITEM nodes do NOT have a data field per Wix API schema
# Wix API: omit empty data objects, don't include them as {}
list_node_items = []
for item_text in list_items:
item_node_id = str(uuid.uuid4())
text_nodes = parse_markdown_inline(item_text)
paragraph_node = {
'id': str(uuid.uuid4()),
'type': 'PARAGRAPH',
'nodes': text_nodes,
'paragraphData': {}
}
list_item_node = {
'id': item_node_id,
'type': 'LIST_ITEM',
'nodes': [paragraph_node]
}
list_node_items.append(list_item_node)
bulleted_list_node = {
'id': node_id,
'type': 'BULLETED_LIST',
'nodes': list_node_items,
'bulletedListData': {}
}
nodes.append(bulleted_list_node)
# Check for ordered lists
elif re.match(r'^\d+\.\s+', line):
list_items = []
while i < len(lines) and re.match(r'^\d+\.\s+', lines[i].strip()):
item_text = re.sub(r'^\d+\.\s+', '', lines[i].strip())
list_items.append(item_text)
i += 1
# Check for nested items
while i < len(lines) and lines[i].strip().startswith(' ') and re.match(r'^\s+\d+\.\s+', lines[i].strip()):
nested_text = re.sub(r'^\s+\d+\.\s+', '', lines[i].strip())
list_items.append(nested_text)
i += 1
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within LIST_ITEM
# NOTE: LIST_ITEM nodes do NOT have a data field per Wix API schema
# Wix API: omit empty data objects, don't include them as {}
list_node_items = []
for item_text in list_items:
item_node_id = str(uuid.uuid4())
text_nodes = parse_markdown_inline(item_text)
paragraph_node = {
'id': str(uuid.uuid4()),
'type': 'PARAGRAPH',
'nodes': text_nodes,
'paragraphData': {}
}
list_item_node = {
'id': item_node_id,
'type': 'LIST_ITEM',
'nodes': [paragraph_node]
}
list_node_items.append(list_item_node)
ordered_list_node = {
'id': node_id,
'type': 'ORDERED_LIST',
'nodes': list_node_items,
'orderedListData': {}
}
nodes.append(ordered_list_node)
# Check for images
elif line.startswith('!['):
img_match = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', line)
if img_match:
alt_text = img_match.group(1)
img_url = img_match.group(2)
nodes.append({
'id': node_id,
'type': 'IMAGE',
'nodes': [],
'imageData': {
'image': {
'src': {'url': img_url},
'altText': alt_text
},
'containerData': {
'alignment': 'CENTER',
'width': {'size': 'CONTENT'}
}
}
})
i += 1
# Regular paragraph
else:
# Collect consecutive non-empty lines as paragraph content
para_lines = [line]
i += 1
while i < len(lines):
next_line = lines[i].strip()
if not next_line:
break
# Stop if next line is a special markdown element
if (next_line.startswith('#') or
next_line.startswith('- ') or
next_line.startswith('* ') or
next_line.startswith('>') or
next_line.startswith('![') or
re.match(r'^\d+\.\s+', next_line)):
break
para_lines.append(next_line)
i += 1
para_text = ' '.join(para_lines)
text_nodes = parse_markdown_inline(para_text)
# Only add paragraph if there are text nodes
if text_nodes:
paragraph_node = {
'id': node_id,
'type': 'PARAGRAPH',
'nodes': text_nodes,
'paragraphData': {}
}
nodes.append(paragraph_node)
# Ensure at least one node exists
# Wix API: omit empty data objects, don't include them as {}
if not nodes:
fallback_paragraph = {
'id': str(uuid.uuid4()),
'type': 'PARAGRAPH',
'nodes': [{
'id': str(uuid.uuid4()),
'type': 'TEXT',
'textData': {
'text': content[:500] if content else "This is a post from ALwrity.",
'decorations': []
}
}],
'paragraphData': {}
}
nodes.append(fallback_paragraph)
return {
'type': 'DOCUMENT',
'id': str(uuid.uuid4()),
'nodes': nodes,
'metadata': { 'version': 1, 'id': str(uuid.uuid4()) },
'metadata': {'version': 1, 'id': str(uuid.uuid4())},
'documentStyle': {
'paragraph': { 'decorations': [], 'nodeStyle': {}, 'lineHeight': '1.5' }
'paragraph': {'decorations': [], 'nodeStyle': {}, 'lineHeight': '1.5'}
}
}

View File

@@ -7,6 +7,12 @@ class WixMediaService:
self.base_url = base_url
def import_image(self, access_token: str, image_url: str, display_name: str) -> Dict[str, Any]:
"""
Import external image to Wix Media Manager.
Official endpoint: https://www.wixapis.com/site-media/v1/files/import
Reference: https://dev.wix.com/docs/rest/assets/media/media-manager/files/import-file
"""
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json',
@@ -16,7 +22,9 @@ class WixMediaService:
'mediaType': 'IMAGE',
'displayName': display_name,
}
response = requests.post(f"{self.base_url}/media/v1/files/import", headers=headers, json=payload)
# Correct endpoint per Wix API documentation
endpoint = f"{self.base_url}/site-media/v1/files/import"
response = requests.post(endpoint, headers=headers, json=payload)
response.raise_for_status()
return response.json()

View File

@@ -0,0 +1,277 @@
"""
Ricos Document Converter for Wix
Converts markdown content to Wix Ricos JSON format using either:
1. Wix's official Ricos Documents API (preferred)
2. Custom markdown parser (fallback)
"""
import json
import requests
import jwt
from typing import Dict, Any, Optional
from loguru import logger
def markdown_to_html(markdown_content: str) -> str:
"""
Convert markdown content to HTML.
Uses a simple markdown parser for basic conversion.
Args:
markdown_content: Markdown content to convert
Returns:
HTML string
"""
try:
# Try using markdown library if available
import markdown
html = markdown.markdown(markdown_content, extensions=['fenced_code', 'tables'])
return html
except ImportError:
# Fallback: Simple regex-based conversion for basic markdown
logger.warning("markdown library not available, using basic markdown-to-HTML conversion")
import re
if not markdown_content or not markdown_content.strip():
return "<p>This is a post from ALwrity.</p>"
lines = markdown_content.split('\n')
result = []
in_list = False
list_type = None # 'ul' or 'ol'
in_code_block = False
code_block_content = []
i = 0
while i < len(lines):
line = lines[i].strip()
# Handle code blocks first
if line.startswith('```'):
if not in_code_block:
in_code_block = True
code_block_content = []
i += 1
continue
else:
in_code_block = False
result.append(f'<pre><code>{"\n".join(code_block_content)}</code></pre>')
code_block_content = []
i += 1
continue
if in_code_block:
code_block_content.append(lines[i])
i += 1
continue
# Close any open lists
if in_list and not (line.startswith('- ') or line.startswith('* ') or re.match(r'^\d+\.\s+', line)):
result.append(f'</{list_type}>')
in_list = False
list_type = None
if not line:
i += 1
continue
# Headers
if line.startswith('###'):
result.append(f'<h3>{line[3:].strip()}</h3>')
elif line.startswith('##'):
result.append(f'<h2>{line[2:].strip()}</h2>')
elif line.startswith('#'):
result.append(f'<h1>{line[1:].strip()}</h1>')
# Lists
elif line.startswith('- ') or line.startswith('* '):
if not in_list or list_type != 'ul':
if in_list:
result.append(f'</{list_type}>')
result.append('<ul>')
in_list = True
list_type = 'ul'
# Process inline formatting in list item
item_text = line[2:].strip()
item_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', item_text)
item_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', item_text)
result.append(f'<li>{item_text}</li>')
elif re.match(r'^\d+\.\s+', line):
if not in_list or list_type != 'ol':
if in_list:
result.append(f'</{list_type}>')
result.append('<ol>')
in_list = True
list_type = 'ol'
# Process inline formatting in list item
match = re.match(r'^\d+\.\s+(.*)', line)
if match:
item_text = match.group(1)
item_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', item_text)
item_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', item_text)
result.append(f'<li>{item_text}</li>')
# Blockquotes
elif line.startswith('>'):
quote_text = line[1:].strip()
quote_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', quote_text)
quote_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', quote_text)
result.append(f'<blockquote><p>{quote_text}</p></blockquote>')
# Regular paragraphs
else:
para_text = line
# Process inline formatting
para_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', para_text)
para_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', para_text)
para_text = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', r'<a href="\2">\1</a>', para_text)
para_text = re.sub(r'`([^`]+)`', r'<code>\1</code>', para_text)
result.append(f'<p>{para_text}</p>')
i += 1
# Close any open lists
if in_list:
result.append(f'</{list_type}>')
# Ensure we have at least one paragraph
if not result:
result.append('<p>This is a post from ALwrity.</p>')
html = '\n'.join(result)
logger.debug(f"Converted {len(markdown_content)} chars markdown to {len(html)} chars HTML")
return html
def convert_via_wix_api(markdown_content: str, access_token: str, base_url: str = 'https://www.wixapis.com') -> Dict[str, Any]:
"""
Convert markdown to Ricos using Wix's official Ricos Documents API.
Uses HTML format for better reliability (per Wix documentation, HTML is fully supported).
Reference: https://dev.wix.com/docs/api-reference/assets/rich-content/ricos-documents/convert-to-ricos-document
Args:
markdown_content: Markdown content to convert (will be converted to HTML)
access_token: Wix access token
base_url: Wix API base URL (default: https://www.wixapis.com)
Returns:
Ricos JSON document
"""
# Validate content is not empty
markdown_stripped = markdown_content.strip() if markdown_content else ""
if not markdown_stripped:
logger.error("Markdown content is empty or whitespace-only")
raise ValueError("Content cannot be empty for Wix Ricos API conversion")
logger.debug(f"Converting markdown to HTML: input_length={len(markdown_stripped)} chars")
# Convert markdown to HTML for better reliability with Wix API
# HTML format is more structured and less prone to parsing errors
html_content = markdown_to_html(markdown_stripped)
# Validate HTML content is not empty - CRITICAL for Wix API
html_stripped = html_content.strip() if html_content else ""
if not html_stripped or len(html_stripped) == 0:
logger.error(f"HTML conversion produced empty content! Markdown length: {len(markdown_stripped)}")
logger.error(f"Markdown sample: {markdown_stripped[:500]}...")
logger.error(f"HTML result: '{html_content}' (type: {type(html_content)})")
# Fallback: use a minimal valid HTML if conversion failed
html_content = "<p>Content from ALwrity blog writer.</p>"
logger.warning("Using fallback HTML due to empty conversion result")
else:
html_content = html_stripped
logger.debug(f"✅ Converted markdown to HTML: {len(html_content)} chars, preview: {html_content[:200]}...")
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json'
}
# Add wix-site-id if available from token
try:
token_str = str(access_token)
if token_str and token_str.startswith('OauthNG.JWS.'):
jwt_part = token_str[12:]
payload = jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
data_payload = payload.get('data', {})
if isinstance(data_payload, str):
try:
data_payload = json.loads(data_payload)
except:
pass
instance_data = data_payload.get('instance', {})
meta_site_id = instance_data.get('metaSiteId')
if isinstance(meta_site_id, str) and meta_site_id:
headers['wix-site-id'] = meta_site_id
except Exception as e:
logger.debug(f"Could not extract site ID from token: {e}")
# Call Wix Ricos Documents API: Convert to Ricos Document
# Official endpoint: https://www.wixapis.com/ricos/v1/ricos-document/convert/to-ricos
# Reference: https://dev.wix.com/docs/rest/assets/rich-content/ricos-documents/convert-to-ricos-document
endpoint = f"{base_url}/ricos/v1/ricos-document/convert/to-ricos"
# Ensure HTML content is not empty or just whitespace
html_stripped = html_content.strip() if html_content else ""
if not html_stripped or len(html_stripped) == 0:
logger.error(f"HTML content is empty after conversion. Markdown length: {len(markdown_content)}")
logger.error(f"Markdown preview (first 500 chars): {markdown_content[:500] if markdown_content else 'N/A'}")
raise ValueError(f"HTML content cannot be empty. Original markdown had {len(markdown_content)} characters.")
# Payload structure per Wix API: html/markdown/plainText field at root, optional plugins
payload = {
'html': html_stripped, # Direct field, not nested in options
'plugins': [] # Optional: empty array uses default plugins
}
logger.warning(f"📤 Sending to Wix Ricos API: html_length={len(payload['html'])}, plugins_count={len(payload['plugins'])}")
logger.debug(f"HTML preview (first 300 chars): {html_stripped[:300]}...")
try:
# Log the exact payload being sent (for debugging)
logger.warning(f"📤 Wix Ricos API Request:")
logger.warning(f" Endpoint: {endpoint}")
logger.warning(f" Payload keys: {list(payload.keys())}")
logger.warning(f" HTML length: {len(payload.get('html', ''))}")
logger.warning(f" Plugins: {payload.get('plugins', [])}")
logger.debug(f" Full payload (first 500 chars of HTML): {str(payload)[:500]}")
response = requests.post(
endpoint,
headers=headers,
json=payload,
timeout=30
)
response.raise_for_status()
result = response.json()
# Extract the ricos document from response
# Response structure: { "document": { "nodes": [...], "metadata": {...}, "documentStyle": {...} } }
ricos_document = result.get('document')
if not ricos_document:
# Fallback: try other possible response fields
ricos_document = result.get('ricosDocument') or result.get('ricos') or result
if not ricos_document:
logger.error(f"Unexpected response structure from Wix API: {list(result.keys())}")
logger.error(f"Response: {result}")
raise ValueError("Wix API did not return a valid Ricos document")
logger.warning(f"✅ Successfully converted HTML to Ricos via Wix API: {len(ricos_document.get('nodes', []))} nodes")
return ricos_document
except requests.RequestException as e:
logger.error(f"❌ Wix Ricos API conversion failed: {e}")
if hasattr(e, 'response') and e.response is not None:
logger.error(f" Response status: {e.response.status_code}")
logger.error(f" Response headers: {dict(e.response.headers)}")
try:
error_body = e.response.json()
logger.error(f" Response JSON: {error_body}")
except:
logger.error(f" Response text: {e.response.text}")
logger.error(f" Request payload was: {json.dumps(payload, indent=2)[:1000]}...") # First 1000 chars
raise

View File

@@ -0,0 +1,300 @@
"""
SEO Data Builder for Wix Blog Posts
Builds Wix-compatible seoData objects from ALwrity SEO metadata.
"""
from typing import Dict, Any, Optional
from loguru import logger
def build_seo_data(seo_metadata: Dict[str, Any], default_title: str = None) -> Optional[Dict[str, Any]]:
"""
Build Wix seoData object from our SEO metadata format.
Args:
seo_metadata: SEO metadata dict with fields like:
- seo_title: SEO optimized title
- meta_description: Meta description
- focus_keyword: Main keyword
- blog_tags: List of tag strings (for keywords)
- open_graph: Open Graph data dict
- canonical_url: Canonical URL
default_title: Fallback title if seo_title not provided
Returns:
Wix seoData object with settings.keywords and tags array, or None if empty
"""
seo_data = {
'settings': {
'keywords': []
},
'tags': []
}
# Build keywords array
keywords_list = []
# Add main keyword (focus_keyword) if provided
focus_keyword = seo_metadata.get('focus_keyword')
if focus_keyword:
keywords_list.append({
'term': str(focus_keyword),
'isMain': True
})
# Add additional keywords from blog_tags or other sources
blog_tags = seo_metadata.get('blog_tags', [])
if isinstance(blog_tags, list):
for tag in blog_tags:
tag_str = str(tag).strip()
if tag_str and tag_str != focus_keyword: # Don't duplicate main keyword
keywords_list.append({
'term': tag_str,
'isMain': False
})
# Add social hashtags as keywords if available
social_hashtags = seo_metadata.get('social_hashtags', [])
if isinstance(social_hashtags, list):
for hashtag in social_hashtags:
# Remove # if present
hashtag_str = str(hashtag).strip().lstrip('#')
if hashtag_str and hashtag_str != focus_keyword:
keywords_list.append({
'term': hashtag_str,
'isMain': False
})
seo_data['settings']['keywords'] = keywords_list
# Validate keywords list is not empty (or ensure at least one keyword exists)
if not seo_data['settings']['keywords']:
logger.warning("No keywords found in SEO metadata, adding empty keywords array")
# Build tags array (meta tags, Open Graph, etc.)
tags_list = []
# Meta description
meta_description = seo_metadata.get('meta_description')
if meta_description:
tags_list.append({
'type': 'meta',
'props': {
'name': 'description',
'content': str(meta_description)
},
'custom': True,
'disabled': False
})
# SEO title - 'title' type uses 'children' field, not 'props.content'
seo_title = seo_metadata.get('seo_title') or default_title
if seo_title:
tags_list.append({
'type': 'title',
'children': str(seo_title), # Title tags use 'children', not 'props.content'
'custom': True,
'disabled': False
})
# Open Graph tags
open_graph = seo_metadata.get('open_graph', {})
if isinstance(open_graph, dict):
# OG Title
og_title = open_graph.get('title') or seo_title
if og_title:
tags_list.append({
'type': 'meta',
'props': {
'property': 'og:title',
'content': str(og_title)
},
'custom': True,
'disabled': False
})
# OG Description
og_description = open_graph.get('description') or meta_description
if og_description:
tags_list.append({
'type': 'meta',
'props': {
'property': 'og:description',
'content': str(og_description)
},
'custom': True,
'disabled': False
})
# OG Image
og_image = open_graph.get('image')
if og_image:
# Skip base64 images for OG tags (Wix needs URLs)
if isinstance(og_image, str) and (og_image.startswith('http://') or og_image.startswith('https://')):
tags_list.append({
'type': 'meta',
'props': {
'property': 'og:image',
'content': og_image
},
'custom': True,
'disabled': False
})
# OG Type
tags_list.append({
'type': 'meta',
'props': {
'property': 'og:type',
'content': 'article'
},
'custom': True,
'disabled': False
})
# OG URL (canonical or provided URL)
og_url = open_graph.get('url') or seo_metadata.get('canonical_url')
if og_url:
tags_list.append({
'type': 'meta',
'props': {
'property': 'og:url',
'content': str(og_url)
},
'custom': True,
'disabled': False
})
# Twitter Card tags
twitter_card = seo_metadata.get('twitter_card', {})
if isinstance(twitter_card, dict):
twitter_title = twitter_card.get('title') or seo_title
if twitter_title:
tags_list.append({
'type': 'meta',
'props': {
'name': 'twitter:title',
'content': str(twitter_title)
},
'custom': True,
'disabled': False
})
twitter_description = twitter_card.get('description') or meta_description
if twitter_description:
tags_list.append({
'type': 'meta',
'props': {
'name': 'twitter:description',
'content': str(twitter_description)
},
'custom': True,
'disabled': False
})
twitter_image = twitter_card.get('image')
if twitter_image and isinstance(twitter_image, str) and (twitter_image.startswith('http://') or twitter_image.startswith('https://')):
tags_list.append({
'type': 'meta',
'props': {
'name': 'twitter:image',
'content': twitter_image
},
'custom': True,
'disabled': False
})
twitter_card_type = twitter_card.get('card', 'summary_large_image')
tags_list.append({
'type': 'meta',
'props': {
'name': 'twitter:card',
'content': str(twitter_card_type)
},
'custom': True,
'disabled': False
})
# Canonical URL as link tag
canonical_url = seo_metadata.get('canonical_url')
if canonical_url:
tags_list.append({
'type': 'link',
'props': {
'rel': 'canonical',
'href': str(canonical_url)
},
'custom': True,
'disabled': False
})
# Validate all tags have required fields before adding
validated_tags = []
for tag in tags_list:
if not isinstance(tag, dict):
logger.warning(f"Skipping invalid tag (not a dict): {type(tag)}")
continue
# Ensure required fields exist
if 'type' not in tag:
logger.warning("Skipping tag missing 'type' field")
continue
# Ensure 'custom' and 'disabled' fields exist
if 'custom' not in tag:
tag['custom'] = True
if 'disabled' not in tag:
tag['disabled'] = False
# Validate tag structure based on type
tag_type = tag.get('type')
if tag_type == 'title':
if 'children' not in tag or not tag['children']:
logger.warning("Skipping title tag with missing/invalid 'children' field")
continue
elif tag_type == 'meta':
if 'props' not in tag or not isinstance(tag['props'], dict):
logger.warning("Skipping meta tag with missing/invalid 'props' field")
continue
if 'name' not in tag['props'] and 'property' not in tag['props']:
logger.warning("Skipping meta tag with missing 'name' or 'property' in props")
continue
# Ensure 'content' exists and is not empty
if 'content' not in tag['props'] or not str(tag['props'].get('content', '')).strip():
logger.warning(f"Skipping meta tag with missing/empty 'content': {tag.get('props', {})}")
continue
elif tag_type == 'link':
if 'props' not in tag or not isinstance(tag['props'], dict):
logger.warning("Skipping link tag with missing/invalid 'props' field")
continue
# Ensure 'href' exists and is not empty for link tags
if 'href' not in tag['props'] or not str(tag['props'].get('href', '')).strip():
logger.warning(f"Skipping link tag with missing/empty 'href': {tag.get('props', {})}")
continue
validated_tags.append(tag)
seo_data['tags'] = validated_tags
# Final validation: ensure seoData structure is complete
if not isinstance(seo_data['settings'], dict):
logger.error("seoData.settings is not a dict, creating default")
seo_data['settings'] = {'keywords': []}
if not isinstance(seo_data['settings'].get('keywords'), list):
logger.error("seoData.settings.keywords is not a list, creating empty list")
seo_data['settings']['keywords'] = []
if not isinstance(seo_data['tags'], list):
logger.error("seoData.tags is not a list, creating empty list")
seo_data['tags'] = []
# CRITICAL: Per Wix API patterns, omit empty structures instead of including them as {}
# If keywords is empty, omit settings entirely
if not seo_data['settings'].get('keywords'):
logger.debug("No keywords found, omitting settings from seoData")
seo_data.pop('settings', None)
logger.debug(f"Built SEO data: {len(validated_tags)} tags, {len(keywords_list)} keywords")
# Only return seoData if we have at least keywords or tags
if keywords_list or validated_tags:
return seo_data
return None