Research component integration, Copilotkit implementation, SEO copilotkit implementation, Wix SEO metadata complete, Wix SEO metadata review
This commit is contained in:
@@ -1,58 +1,460 @@
|
||||
import re
|
||||
import uuid
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Parse inline markdown formatting (bold, italic, links) into Ricos text nodes.
|
||||
Returns a list of text nodes with decorations.
|
||||
Handles: **bold**, *italic*, [links](url), `code`, and combinations.
|
||||
"""
|
||||
if not text:
|
||||
return [{
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {'text': '', 'decorations': []}
|
||||
}]
|
||||
|
||||
nodes = []
|
||||
|
||||
# Process text character by character to handle nested/adjacent formatting
|
||||
# This is more robust than regex for complex cases
|
||||
i = 0
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
|
||||
while i < len(text):
|
||||
# Check for bold **text** (must come before single * check)
|
||||
if i < len(text) - 1 and text[i:i+2] == '**':
|
||||
# Save any accumulated text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
current_text = ''
|
||||
|
||||
# Find closing **
|
||||
end_bold = text.find('**', i + 2)
|
||||
if end_bold != -1:
|
||||
bold_text = text[i + 2:end_bold]
|
||||
# Recursively parse the bold text for nested formatting
|
||||
bold_nodes = parse_markdown_inline(bold_text)
|
||||
# Add BOLD decoration to all text nodes within
|
||||
for node in bold_nodes:
|
||||
if node['type'] == 'TEXT':
|
||||
node_decorations = node['textData'].get('decorations', []).copy()
|
||||
if 'BOLD' not in node_decorations:
|
||||
node_decorations.append('BOLD')
|
||||
node['textData']['decorations'] = node_decorations
|
||||
nodes.append(node)
|
||||
i = end_bold + 2
|
||||
continue
|
||||
|
||||
# Check for link [text](url)
|
||||
elif text[i] == '[':
|
||||
# Save any accumulated text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
|
||||
# Find matching ]
|
||||
link_end = text.find(']', i)
|
||||
if link_end != -1 and link_end < len(text) - 1 and text[link_end + 1] == '(':
|
||||
link_text = text[i + 1:link_end]
|
||||
url_start = link_end + 2
|
||||
url_end = text.find(')', url_start)
|
||||
if url_end != -1:
|
||||
url = text[url_start:url_end]
|
||||
# Create link node
|
||||
link_node_id = str(uuid.uuid4())
|
||||
text_node_id = str(uuid.uuid4())
|
||||
link_text_nodes = parse_markdown_inline(link_text)
|
||||
# Wrap link text in LINK node
|
||||
nodes.append({
|
||||
'id': link_node_id,
|
||||
'type': 'LINK',
|
||||
'nodes': link_text_nodes if link_text_nodes else [{
|
||||
'id': text_node_id,
|
||||
'type': 'TEXT',
|
||||
'textData': {'text': link_text, 'decorations': []}
|
||||
}],
|
||||
'linkData': {
|
||||
'link': {
|
||||
'url': url,
|
||||
'target': '_blank'
|
||||
}
|
||||
}
|
||||
})
|
||||
i = url_end + 1
|
||||
continue
|
||||
|
||||
# Check for code `text`
|
||||
elif text[i] == '`':
|
||||
# Save any accumulated text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
|
||||
# Find closing `
|
||||
code_end = text.find('`', i + 1)
|
||||
if code_end != -1:
|
||||
code_text = text[i + 1:code_end]
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': code_text,
|
||||
'decorations': ['CODE']
|
||||
}
|
||||
})
|
||||
i = code_end + 1
|
||||
continue
|
||||
|
||||
# Check for italic *text* (only if not part of **)
|
||||
elif text[i] == '*' and (i == 0 or text[i-1] != '*') and (i == len(text) - 1 or text[i+1] != '*'):
|
||||
# Save any accumulated text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
current_text = ''
|
||||
current_decorations = []
|
||||
|
||||
# Find closing * (but not **)
|
||||
italic_end = text.find('*', i + 1)
|
||||
if italic_end != -1:
|
||||
# Make sure it's not part of **
|
||||
if italic_end == len(text) - 1 or text[italic_end + 1] != '*':
|
||||
italic_text = text[i + 1:italic_end]
|
||||
italic_nodes = parse_markdown_inline(italic_text)
|
||||
# Add ITALIC decoration
|
||||
for node in italic_nodes:
|
||||
if node['type'] == 'TEXT':
|
||||
node_decorations = node['textData'].get('decorations', []).copy()
|
||||
if 'ITALIC' not in node_decorations:
|
||||
node_decorations.append('ITALIC')
|
||||
node['textData']['decorations'] = node_decorations
|
||||
nodes.append(node)
|
||||
i = italic_end + 1
|
||||
continue
|
||||
|
||||
# Regular character
|
||||
current_text += text[i]
|
||||
i += 1
|
||||
|
||||
# Add any remaining text
|
||||
if current_text:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': current_text,
|
||||
'decorations': current_decorations.copy()
|
||||
}
|
||||
})
|
||||
|
||||
# If no nodes created, return single plain text node
|
||||
if not nodes:
|
||||
nodes.append({
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': text,
|
||||
'decorations': []
|
||||
}
|
||||
})
|
||||
|
||||
return nodes
|
||||
|
||||
|
||||
def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert simple markdown-like text into minimal valid Ricos JSON.
|
||||
Convert markdown content into valid Ricos JSON format.
|
||||
Supports headings, paragraphs, lists, bold, italic, links, and images.
|
||||
"""
|
||||
paragraphs = content.split('\n\n')
|
||||
if not content:
|
||||
content = "This is a post from ALwrity."
|
||||
|
||||
nodes = []
|
||||
|
||||
import uuid
|
||||
|
||||
for paragraph in paragraphs:
|
||||
text = paragraph.strip()
|
||||
if not text:
|
||||
lines = content.split('\n')
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
if not line:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
node_id = str(uuid.uuid4())
|
||||
text_node_id = str(uuid.uuid4())
|
||||
|
||||
if text.startswith('#'):
|
||||
level = len(text) - len(text.lstrip('#'))
|
||||
heading_text = text.lstrip('# ').strip()
|
||||
|
||||
# Check for headings
|
||||
if line.startswith('#'):
|
||||
level = len(line) - len(line.lstrip('#'))
|
||||
heading_text = line.lstrip('# ').strip()
|
||||
text_nodes = parse_markdown_inline(heading_text)
|
||||
nodes.append({
|
||||
'id': node_id,
|
||||
'type': 'HEADING',
|
||||
'nodes': [{
|
||||
'id': text_node_id,
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': heading_text,
|
||||
'decorations': []
|
||||
}
|
||||
}],
|
||||
'headingData': { 'level': min(level, 6) }
|
||||
'nodes': text_nodes,
|
||||
'headingData': {'level': min(level, 6)}
|
||||
})
|
||||
else:
|
||||
nodes.append({
|
||||
'id': node_id,
|
||||
i += 1
|
||||
|
||||
# Check for blockquotes
|
||||
elif line.startswith('>'):
|
||||
quote_text = line.lstrip('> ').strip()
|
||||
# Continue reading consecutive blockquote lines
|
||||
quote_lines = [quote_text]
|
||||
i += 1
|
||||
while i < len(lines) and lines[i].strip().startswith('>'):
|
||||
quote_lines.append(lines[i].strip().lstrip('> ').strip())
|
||||
i += 1
|
||||
quote_content = ' '.join(quote_lines)
|
||||
text_nodes = parse_markdown_inline(quote_content)
|
||||
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within BLOCKQUOTE
|
||||
paragraph_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': [{
|
||||
'id': text_node_id,
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': text,
|
||||
'decorations': []
|
||||
}
|
||||
}],
|
||||
'nodes': text_nodes,
|
||||
'paragraphData': {}
|
||||
})
|
||||
|
||||
}
|
||||
blockquote_node = {
|
||||
'id': node_id,
|
||||
'type': 'BLOCKQUOTE',
|
||||
'nodes': [paragraph_node],
|
||||
'blockquoteData': {}
|
||||
}
|
||||
nodes.append(blockquote_node)
|
||||
|
||||
# Check for unordered lists (handle both '- ' and '* ' markers)
|
||||
elif (line.startswith('- ') or line.startswith('* ') or
|
||||
(line.startswith('-') and len(line) > 1 and line[1] != '-') or
|
||||
(line.startswith('*') and len(line) > 1 and line[1] != '*')):
|
||||
list_items = []
|
||||
list_marker = '- ' if line.startswith('-') else '* '
|
||||
# Process list items
|
||||
while i < len(lines):
|
||||
current_line = lines[i].strip()
|
||||
# Check if this is a list item
|
||||
is_list_item = (current_line.startswith('- ') or current_line.startswith('* ') or
|
||||
(current_line.startswith('-') and len(current_line) > 1 and current_line[1] != '-') or
|
||||
(current_line.startswith('*') and len(current_line) > 1 and current_line[1] != '*'))
|
||||
|
||||
if not is_list_item:
|
||||
break
|
||||
|
||||
# Extract item text (handle both '- ' and '-item' formats)
|
||||
if current_line.startswith('- ') or current_line.startswith('* '):
|
||||
item_text = current_line[2:].strip()
|
||||
elif current_line.startswith('-'):
|
||||
item_text = current_line[1:].strip()
|
||||
elif current_line.startswith('*'):
|
||||
item_text = current_line[1:].strip()
|
||||
else:
|
||||
item_text = current_line
|
||||
|
||||
list_items.append(item_text)
|
||||
i += 1
|
||||
|
||||
# Check for nested items (indented with 2+ spaces)
|
||||
while i < len(lines):
|
||||
next_line = lines[i]
|
||||
# Must be indented and be a list marker
|
||||
if next_line.startswith(' ') and (next_line.strip().startswith('- ') or
|
||||
next_line.strip().startswith('* ') or
|
||||
(next_line.strip().startswith('-') and len(next_line.strip()) > 1) or
|
||||
(next_line.strip().startswith('*') and len(next_line.strip()) > 1)):
|
||||
nested_text = next_line.strip()
|
||||
if nested_text.startswith('- ') or nested_text.startswith('* '):
|
||||
nested_text = nested_text[2:].strip()
|
||||
elif nested_text.startswith('-'):
|
||||
nested_text = nested_text[1:].strip()
|
||||
elif nested_text.startswith('*'):
|
||||
nested_text = nested_text[1:].strip()
|
||||
list_items.append(nested_text)
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
|
||||
# Build list items with proper formatting
|
||||
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within LIST_ITEM
|
||||
# NOTE: LIST_ITEM nodes do NOT have a data field per Wix API schema
|
||||
# Wix API: omit empty data objects, don't include them as {}
|
||||
list_node_items = []
|
||||
for item_text in list_items:
|
||||
item_node_id = str(uuid.uuid4())
|
||||
text_nodes = parse_markdown_inline(item_text)
|
||||
paragraph_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': text_nodes,
|
||||
'paragraphData': {}
|
||||
}
|
||||
list_item_node = {
|
||||
'id': item_node_id,
|
||||
'type': 'LIST_ITEM',
|
||||
'nodes': [paragraph_node]
|
||||
}
|
||||
list_node_items.append(list_item_node)
|
||||
|
||||
bulleted_list_node = {
|
||||
'id': node_id,
|
||||
'type': 'BULLETED_LIST',
|
||||
'nodes': list_node_items,
|
||||
'bulletedListData': {}
|
||||
}
|
||||
nodes.append(bulleted_list_node)
|
||||
|
||||
# Check for ordered lists
|
||||
elif re.match(r'^\d+\.\s+', line):
|
||||
list_items = []
|
||||
while i < len(lines) and re.match(r'^\d+\.\s+', lines[i].strip()):
|
||||
item_text = re.sub(r'^\d+\.\s+', '', lines[i].strip())
|
||||
list_items.append(item_text)
|
||||
i += 1
|
||||
# Check for nested items
|
||||
while i < len(lines) and lines[i].strip().startswith(' ') and re.match(r'^\s+\d+\.\s+', lines[i].strip()):
|
||||
nested_text = re.sub(r'^\s+\d+\.\s+', '', lines[i].strip())
|
||||
list_items.append(nested_text)
|
||||
i += 1
|
||||
|
||||
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within LIST_ITEM
|
||||
# NOTE: LIST_ITEM nodes do NOT have a data field per Wix API schema
|
||||
# Wix API: omit empty data objects, don't include them as {}
|
||||
list_node_items = []
|
||||
for item_text in list_items:
|
||||
item_node_id = str(uuid.uuid4())
|
||||
text_nodes = parse_markdown_inline(item_text)
|
||||
paragraph_node = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': text_nodes,
|
||||
'paragraphData': {}
|
||||
}
|
||||
list_item_node = {
|
||||
'id': item_node_id,
|
||||
'type': 'LIST_ITEM',
|
||||
'nodes': [paragraph_node]
|
||||
}
|
||||
list_node_items.append(list_item_node)
|
||||
|
||||
ordered_list_node = {
|
||||
'id': node_id,
|
||||
'type': 'ORDERED_LIST',
|
||||
'nodes': list_node_items,
|
||||
'orderedListData': {}
|
||||
}
|
||||
nodes.append(ordered_list_node)
|
||||
|
||||
# Check for images
|
||||
elif line.startswith('!['):
|
||||
img_match = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', line)
|
||||
if img_match:
|
||||
alt_text = img_match.group(1)
|
||||
img_url = img_match.group(2)
|
||||
nodes.append({
|
||||
'id': node_id,
|
||||
'type': 'IMAGE',
|
||||
'nodes': [],
|
||||
'imageData': {
|
||||
'image': {
|
||||
'src': {'url': img_url},
|
||||
'altText': alt_text
|
||||
},
|
||||
'containerData': {
|
||||
'alignment': 'CENTER',
|
||||
'width': {'size': 'CONTENT'}
|
||||
}
|
||||
}
|
||||
})
|
||||
i += 1
|
||||
|
||||
# Regular paragraph
|
||||
else:
|
||||
# Collect consecutive non-empty lines as paragraph content
|
||||
para_lines = [line]
|
||||
i += 1
|
||||
while i < len(lines):
|
||||
next_line = lines[i].strip()
|
||||
if not next_line:
|
||||
break
|
||||
# Stop if next line is a special markdown element
|
||||
if (next_line.startswith('#') or
|
||||
next_line.startswith('- ') or
|
||||
next_line.startswith('* ') or
|
||||
next_line.startswith('>') or
|
||||
next_line.startswith('![') or
|
||||
re.match(r'^\d+\.\s+', next_line)):
|
||||
break
|
||||
para_lines.append(next_line)
|
||||
i += 1
|
||||
|
||||
para_text = ' '.join(para_lines)
|
||||
text_nodes = parse_markdown_inline(para_text)
|
||||
|
||||
# Only add paragraph if there are text nodes
|
||||
if text_nodes:
|
||||
paragraph_node = {
|
||||
'id': node_id,
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': text_nodes,
|
||||
'paragraphData': {}
|
||||
}
|
||||
nodes.append(paragraph_node)
|
||||
|
||||
# Ensure at least one node exists
|
||||
# Wix API: omit empty data objects, don't include them as {}
|
||||
if not nodes:
|
||||
fallback_paragraph = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'PARAGRAPH',
|
||||
'nodes': [{
|
||||
'id': str(uuid.uuid4()),
|
||||
'type': 'TEXT',
|
||||
'textData': {
|
||||
'text': content[:500] if content else "This is a post from ALwrity.",
|
||||
'decorations': []
|
||||
}
|
||||
}],
|
||||
'paragraphData': {}
|
||||
}
|
||||
nodes.append(fallback_paragraph)
|
||||
|
||||
return {
|
||||
'type': 'DOCUMENT',
|
||||
'id': str(uuid.uuid4()),
|
||||
'nodes': nodes,
|
||||
'metadata': { 'version': 1, 'id': str(uuid.uuid4()) },
|
||||
'metadata': {'version': 1, 'id': str(uuid.uuid4())},
|
||||
'documentStyle': {
|
||||
'paragraph': { 'decorations': [], 'nodeStyle': {}, 'lineHeight': '1.5' }
|
||||
'paragraph': {'decorations': [], 'nodeStyle': {}, 'lineHeight': '1.5'}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user