Research component integration, Copilotkit implementation, SEO copilotkit implementation, Wix SEO metadata complete, Wix SEO metadata review
This commit is contained in:
277
backend/services/integrations/wix/ricos_converter.py
Normal file
277
backend/services/integrations/wix/ricos_converter.py
Normal file
@@ -0,0 +1,277 @@
|
||||
"""
|
||||
Ricos Document Converter for Wix
|
||||
|
||||
Converts markdown content to Wix Ricos JSON format using either:
|
||||
1. Wix's official Ricos Documents API (preferred)
|
||||
2. Custom markdown parser (fallback)
|
||||
"""
|
||||
|
||||
import json
|
||||
import requests
|
||||
import jwt
|
||||
from typing import Dict, Any, Optional
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def markdown_to_html(markdown_content: str) -> str:
|
||||
"""
|
||||
Convert markdown content to HTML.
|
||||
Uses a simple markdown parser for basic conversion.
|
||||
|
||||
Args:
|
||||
markdown_content: Markdown content to convert
|
||||
|
||||
Returns:
|
||||
HTML string
|
||||
"""
|
||||
try:
|
||||
# Try using markdown library if available
|
||||
import markdown
|
||||
html = markdown.markdown(markdown_content, extensions=['fenced_code', 'tables'])
|
||||
return html
|
||||
except ImportError:
|
||||
# Fallback: Simple regex-based conversion for basic markdown
|
||||
logger.warning("markdown library not available, using basic markdown-to-HTML conversion")
|
||||
import re
|
||||
|
||||
if not markdown_content or not markdown_content.strip():
|
||||
return "<p>This is a post from ALwrity.</p>"
|
||||
|
||||
lines = markdown_content.split('\n')
|
||||
result = []
|
||||
in_list = False
|
||||
list_type = None # 'ul' or 'ol'
|
||||
in_code_block = False
|
||||
code_block_content = []
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
# Handle code blocks first
|
||||
if line.startswith('```'):
|
||||
if not in_code_block:
|
||||
in_code_block = True
|
||||
code_block_content = []
|
||||
i += 1
|
||||
continue
|
||||
else:
|
||||
in_code_block = False
|
||||
result.append(f'<pre><code>{"\n".join(code_block_content)}</code></pre>')
|
||||
code_block_content = []
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if in_code_block:
|
||||
code_block_content.append(lines[i])
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Close any open lists
|
||||
if in_list and not (line.startswith('- ') or line.startswith('* ') or re.match(r'^\d+\.\s+', line)):
|
||||
result.append(f'</{list_type}>')
|
||||
in_list = False
|
||||
list_type = None
|
||||
|
||||
if not line:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Headers
|
||||
if line.startswith('###'):
|
||||
result.append(f'<h3>{line[3:].strip()}</h3>')
|
||||
elif line.startswith('##'):
|
||||
result.append(f'<h2>{line[2:].strip()}</h2>')
|
||||
elif line.startswith('#'):
|
||||
result.append(f'<h1>{line[1:].strip()}</h1>')
|
||||
# Lists
|
||||
elif line.startswith('- ') or line.startswith('* '):
|
||||
if not in_list or list_type != 'ul':
|
||||
if in_list:
|
||||
result.append(f'</{list_type}>')
|
||||
result.append('<ul>')
|
||||
in_list = True
|
||||
list_type = 'ul'
|
||||
# Process inline formatting in list item
|
||||
item_text = line[2:].strip()
|
||||
item_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', item_text)
|
||||
item_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', item_text)
|
||||
result.append(f'<li>{item_text}</li>')
|
||||
elif re.match(r'^\d+\.\s+', line):
|
||||
if not in_list or list_type != 'ol':
|
||||
if in_list:
|
||||
result.append(f'</{list_type}>')
|
||||
result.append('<ol>')
|
||||
in_list = True
|
||||
list_type = 'ol'
|
||||
# Process inline formatting in list item
|
||||
match = re.match(r'^\d+\.\s+(.*)', line)
|
||||
if match:
|
||||
item_text = match.group(1)
|
||||
item_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', item_text)
|
||||
item_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', item_text)
|
||||
result.append(f'<li>{item_text}</li>')
|
||||
# Blockquotes
|
||||
elif line.startswith('>'):
|
||||
quote_text = line[1:].strip()
|
||||
quote_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', quote_text)
|
||||
quote_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', quote_text)
|
||||
result.append(f'<blockquote><p>{quote_text}</p></blockquote>')
|
||||
# Regular paragraphs
|
||||
else:
|
||||
para_text = line
|
||||
# Process inline formatting
|
||||
para_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', para_text)
|
||||
para_text = re.sub(r'\*(.*?)\*', r'<em>\1</em>', para_text)
|
||||
para_text = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', r'<a href="\2">\1</a>', para_text)
|
||||
para_text = re.sub(r'`([^`]+)`', r'<code>\1</code>', para_text)
|
||||
result.append(f'<p>{para_text}</p>')
|
||||
|
||||
i += 1
|
||||
|
||||
# Close any open lists
|
||||
if in_list:
|
||||
result.append(f'</{list_type}>')
|
||||
|
||||
# Ensure we have at least one paragraph
|
||||
if not result:
|
||||
result.append('<p>This is a post from ALwrity.</p>')
|
||||
|
||||
html = '\n'.join(result)
|
||||
|
||||
logger.debug(f"Converted {len(markdown_content)} chars markdown to {len(html)} chars HTML")
|
||||
return html
|
||||
|
||||
|
||||
def convert_via_wix_api(markdown_content: str, access_token: str, base_url: str = 'https://www.wixapis.com') -> Dict[str, Any]:
|
||||
"""
|
||||
Convert markdown to Ricos using Wix's official Ricos Documents API.
|
||||
Uses HTML format for better reliability (per Wix documentation, HTML is fully supported).
|
||||
|
||||
Reference: https://dev.wix.com/docs/api-reference/assets/rich-content/ricos-documents/convert-to-ricos-document
|
||||
|
||||
Args:
|
||||
markdown_content: Markdown content to convert (will be converted to HTML)
|
||||
access_token: Wix access token
|
||||
base_url: Wix API base URL (default: https://www.wixapis.com)
|
||||
|
||||
Returns:
|
||||
Ricos JSON document
|
||||
"""
|
||||
# Validate content is not empty
|
||||
markdown_stripped = markdown_content.strip() if markdown_content else ""
|
||||
if not markdown_stripped:
|
||||
logger.error("Markdown content is empty or whitespace-only")
|
||||
raise ValueError("Content cannot be empty for Wix Ricos API conversion")
|
||||
|
||||
logger.debug(f"Converting markdown to HTML: input_length={len(markdown_stripped)} chars")
|
||||
|
||||
# Convert markdown to HTML for better reliability with Wix API
|
||||
# HTML format is more structured and less prone to parsing errors
|
||||
html_content = markdown_to_html(markdown_stripped)
|
||||
|
||||
# Validate HTML content is not empty - CRITICAL for Wix API
|
||||
html_stripped = html_content.strip() if html_content else ""
|
||||
if not html_stripped or len(html_stripped) == 0:
|
||||
logger.error(f"HTML conversion produced empty content! Markdown length: {len(markdown_stripped)}")
|
||||
logger.error(f"Markdown sample: {markdown_stripped[:500]}...")
|
||||
logger.error(f"HTML result: '{html_content}' (type: {type(html_content)})")
|
||||
# Fallback: use a minimal valid HTML if conversion failed
|
||||
html_content = "<p>Content from ALwrity blog writer.</p>"
|
||||
logger.warning("Using fallback HTML due to empty conversion result")
|
||||
else:
|
||||
html_content = html_stripped
|
||||
|
||||
logger.debug(f"✅ Converted markdown to HTML: {len(html_content)} chars, preview: {html_content[:200]}...")
|
||||
|
||||
headers = {
|
||||
'Authorization': f'Bearer {access_token}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
# Add wix-site-id if available from token
|
||||
try:
|
||||
token_str = str(access_token)
|
||||
if token_str and token_str.startswith('OauthNG.JWS.'):
|
||||
jwt_part = token_str[12:]
|
||||
payload = jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
|
||||
data_payload = payload.get('data', {})
|
||||
if isinstance(data_payload, str):
|
||||
try:
|
||||
data_payload = json.loads(data_payload)
|
||||
except:
|
||||
pass
|
||||
instance_data = data_payload.get('instance', {})
|
||||
meta_site_id = instance_data.get('metaSiteId')
|
||||
if isinstance(meta_site_id, str) and meta_site_id:
|
||||
headers['wix-site-id'] = meta_site_id
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract site ID from token: {e}")
|
||||
|
||||
# Call Wix Ricos Documents API: Convert to Ricos Document
|
||||
# Official endpoint: https://www.wixapis.com/ricos/v1/ricos-document/convert/to-ricos
|
||||
# Reference: https://dev.wix.com/docs/rest/assets/rich-content/ricos-documents/convert-to-ricos-document
|
||||
endpoint = f"{base_url}/ricos/v1/ricos-document/convert/to-ricos"
|
||||
|
||||
# Ensure HTML content is not empty or just whitespace
|
||||
html_stripped = html_content.strip() if html_content else ""
|
||||
if not html_stripped or len(html_stripped) == 0:
|
||||
logger.error(f"HTML content is empty after conversion. Markdown length: {len(markdown_content)}")
|
||||
logger.error(f"Markdown preview (first 500 chars): {markdown_content[:500] if markdown_content else 'N/A'}")
|
||||
raise ValueError(f"HTML content cannot be empty. Original markdown had {len(markdown_content)} characters.")
|
||||
|
||||
# Payload structure per Wix API: html/markdown/plainText field at root, optional plugins
|
||||
payload = {
|
||||
'html': html_stripped, # Direct field, not nested in options
|
||||
'plugins': [] # Optional: empty array uses default plugins
|
||||
}
|
||||
|
||||
logger.warning(f"📤 Sending to Wix Ricos API: html_length={len(payload['html'])}, plugins_count={len(payload['plugins'])}")
|
||||
logger.debug(f"HTML preview (first 300 chars): {html_stripped[:300]}...")
|
||||
|
||||
try:
|
||||
# Log the exact payload being sent (for debugging)
|
||||
logger.warning(f"📤 Wix Ricos API Request:")
|
||||
logger.warning(f" Endpoint: {endpoint}")
|
||||
logger.warning(f" Payload keys: {list(payload.keys())}")
|
||||
logger.warning(f" HTML length: {len(payload.get('html', ''))}")
|
||||
logger.warning(f" Plugins: {payload.get('plugins', [])}")
|
||||
logger.debug(f" Full payload (first 500 chars of HTML): {str(payload)[:500]}")
|
||||
|
||||
response = requests.post(
|
||||
endpoint,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Extract the ricos document from response
|
||||
# Response structure: { "document": { "nodes": [...], "metadata": {...}, "documentStyle": {...} } }
|
||||
ricos_document = result.get('document')
|
||||
if not ricos_document:
|
||||
# Fallback: try other possible response fields
|
||||
ricos_document = result.get('ricosDocument') or result.get('ricos') or result
|
||||
|
||||
if not ricos_document:
|
||||
logger.error(f"Unexpected response structure from Wix API: {list(result.keys())}")
|
||||
logger.error(f"Response: {result}")
|
||||
raise ValueError("Wix API did not return a valid Ricos document")
|
||||
|
||||
logger.warning(f"✅ Successfully converted HTML to Ricos via Wix API: {len(ricos_document.get('nodes', []))} nodes")
|
||||
return ricos_document
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"❌ Wix Ricos API conversion failed: {e}")
|
||||
if hasattr(e, 'response') and e.response is not None:
|
||||
logger.error(f" Response status: {e.response.status_code}")
|
||||
logger.error(f" Response headers: {dict(e.response.headers)}")
|
||||
try:
|
||||
error_body = e.response.json()
|
||||
logger.error(f" Response JSON: {error_body}")
|
||||
except:
|
||||
logger.error(f" Response text: {e.response.text}")
|
||||
logger.error(f" Request payload was: {json.dumps(payload, indent=2)[:1000]}...") # First 1000 chars
|
||||
raise
|
||||
|
||||
Reference in New Issue
Block a user