- Blog writer enhancements and bug fixes - Wix integration improvements - Frontend UI updates - GSC dashboard docs cleanup - Image studio assets - LinkedIn requirements file - Various dependency updates
582 lines
21 KiB
Python
582 lines
21 KiB
Python
import re
|
|
import uuid
|
|
from typing import Any, Dict, List
|
|
|
|
|
|
def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Parse inline markdown formatting (bold, italic, links, code, strikethrough) into Ricos text nodes.
|
|
Returns a list of text nodes with decorations.
|
|
Handles: **bold**, *italic*, [links](url), `code`, ~strikethrough~, and combinations.
|
|
"""
|
|
if not text:
|
|
return [{
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'TEXT',
|
|
'nodes': [],
|
|
'textData': {'text': '', 'decorations': []}
|
|
}]
|
|
|
|
nodes = []
|
|
i = 0
|
|
current_text = ''
|
|
|
|
def flush_text():
|
|
nonlocal current_text
|
|
if current_text:
|
|
nodes.append({
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'TEXT',
|
|
'nodes': [],
|
|
'textData': {'text': current_text, 'decorations': []}
|
|
})
|
|
current_text = ''
|
|
|
|
while i < len(text):
|
|
# Bold **text**
|
|
if i < len(text) - 1 and text[i:i+2] == '**':
|
|
flush_text()
|
|
end_bold = text.find('**', i + 2)
|
|
if end_bold != -1:
|
|
bold_text = text[i + 2:end_bold]
|
|
bold_nodes = parse_markdown_inline(bold_text)
|
|
for node in bold_nodes:
|
|
if node['type'] == 'TEXT':
|
|
decs = node['textData'].get('decorations', []).copy()
|
|
if not any(d.get('type') == 'BOLD' for d in decs if isinstance(d, dict)):
|
|
decs.append({'type': 'BOLD'})
|
|
node['textData']['decorations'] = decs
|
|
nodes.append(node)
|
|
i = end_bold + 2
|
|
continue
|
|
|
|
# Strikethrough ~text~
|
|
elif text[i] == '~':
|
|
flush_text()
|
|
end_strike = text.find('~', i + 1)
|
|
if end_strike != -1:
|
|
strike_text = text[i + 1:end_strike]
|
|
strike_nodes = parse_markdown_inline(strike_text)
|
|
for node in strike_nodes:
|
|
if node['type'] == 'TEXT':
|
|
decs = node['textData'].get('decorations', []).copy()
|
|
if not any(d.get('type') == 'STRIKETHROUGH' for d in decs if isinstance(d, dict)):
|
|
decs.append({'type': 'STRIKETHROUGH'})
|
|
node['textData']['decorations'] = decs
|
|
nodes.append(node)
|
|
i = end_strike + 1
|
|
continue
|
|
|
|
# Link [text](url)
|
|
elif text[i] == '[':
|
|
flush_text()
|
|
link_end = text.find(']', i)
|
|
if link_end != -1 and link_end < len(text) - 1 and text[link_end + 1] == '(':
|
|
link_text = text[i + 1:link_end]
|
|
url_start = link_end + 2
|
|
url_end = text.find(')', url_start)
|
|
if url_end != -1:
|
|
url = text[url_start:url_end]
|
|
nodes.append({
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'TEXT',
|
|
'nodes': [],
|
|
'textData': {
|
|
'text': link_text,
|
|
'decorations': [{
|
|
'type': 'LINK',
|
|
'linkData': {
|
|
'link': {
|
|
'url': url,
|
|
'target': 'BLANK'
|
|
}
|
|
}
|
|
}]
|
|
}
|
|
})
|
|
i = url_end + 1
|
|
continue
|
|
|
|
# Inline code `text`
|
|
elif text[i] == '`':
|
|
flush_text()
|
|
code_end = text.find('`', i + 1)
|
|
if code_end != -1:
|
|
code_text = text[i + 1:code_end]
|
|
# Wix doesn't have a CODE decoration, but we can preserve the text
|
|
nodes.append({
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'TEXT',
|
|
'nodes': [],
|
|
'textData': {
|
|
'text': code_text,
|
|
'decorations': [] # CODE is not a valid decoration in Wix API
|
|
}
|
|
})
|
|
i = code_end + 1
|
|
continue
|
|
|
|
# Italic *text* (must come after ** check)
|
|
elif text[i] == '*' and (i == 0 or text[i-1] != '*') and (i == len(text) - 1 or text[i+1] != '*'):
|
|
flush_text()
|
|
italic_end = text.find('*', i + 1)
|
|
if italic_end != -1:
|
|
# Make sure it's not part of **
|
|
if italic_end == len(text) - 1 or text[italic_end + 1] != '*':
|
|
italic_text = text[i + 1:italic_end]
|
|
italic_nodes = parse_markdown_inline(italic_text)
|
|
for node in italic_nodes:
|
|
if node['type'] == 'TEXT':
|
|
decs = node['textData'].get('decorations', []).copy()
|
|
if not any(d.get('type') == 'ITALIC' for d in decs if isinstance(d, dict)):
|
|
decs.append({'type': 'ITALIC'})
|
|
node['textData']['decorations'] = decs
|
|
nodes.append(node)
|
|
i = italic_end + 1
|
|
continue
|
|
|
|
# Regular character
|
|
current_text += text[i]
|
|
i += 1
|
|
|
|
flush_text()
|
|
|
|
# If no nodes created, return single plain text node
|
|
if not nodes:
|
|
nodes.append({
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'TEXT',
|
|
'nodes': [],
|
|
'textData': {'text': text, 'decorations': []}
|
|
})
|
|
|
|
return nodes
|
|
|
|
|
|
def _make_code_block_node(code_text: str, language: str = '') -> Dict[str, Any]:
|
|
"""Create a Ricos CODE_BLOCK node."""
|
|
lines = code_text.split('\n')
|
|
text_nodes = []
|
|
for line in lines:
|
|
text_nodes.append({
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'TEXT',
|
|
'nodes': [],
|
|
'textData': {'text': line, 'decorations': []}
|
|
})
|
|
|
|
return {
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'CODE_BLOCK',
|
|
'nodes': text_nodes,
|
|
'codeBlockData': {
|
|
'language': language or 'text',
|
|
'textWrap': True
|
|
}
|
|
}
|
|
|
|
|
|
def _make_horizontal_rule_node() -> Dict[str, Any]:
|
|
"""Create a Ricos DIVIDER node."""
|
|
return {
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'DIVIDER',
|
|
'nodes': [],
|
|
'dividerData': {
|
|
'type': 'LINE',
|
|
'lineStyle': {
|
|
'width': 'LARGE',
|
|
'alignment': 'CENTER'
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
def _parse_markdown_table(lines: List[str], start_idx: int) -> tuple:
|
|
"""
|
|
Parse a markdown table starting at start_idx.
|
|
Returns (table_rows, alignments, next_idx) where table_rows is a list of lists of cell text,
|
|
and alignments is a list of column alignments ('left', 'center', 'right', None).
|
|
|
|
Markdown tables look like:
|
|
| Header 1 | Header 2 |
|
|
|----------|----------|
|
|
| Cell 1 | Cell 2 |
|
|
|
|
Alignment is detected from the separator row:
|
|
|:--------|:--------:|--------:|
|
|
"""
|
|
rows = []
|
|
alignments = None
|
|
i = start_idx
|
|
|
|
while i < len(lines):
|
|
line = lines[i].strip()
|
|
if not line or '|' not in line:
|
|
break
|
|
|
|
cells = [cell.strip() for cell in line.strip('|').split('|')]
|
|
|
|
# Detect separator row (contains only dashes, colons, pipes, spaces)
|
|
if i > start_idx and all(
|
|
set(cell.strip()) <= set('-:| ') for cell in cells
|
|
):
|
|
alignments = []
|
|
for cell in cells:
|
|
cell = cell.strip()
|
|
if cell.startswith(':') and cell.endswith(':'):
|
|
alignments.append('center')
|
|
elif cell.endswith(':'):
|
|
alignments.append('right')
|
|
elif cell.startswith(':'):
|
|
alignments.append('left')
|
|
else:
|
|
alignments.append(None)
|
|
i += 1
|
|
continue
|
|
|
|
rows.append(cells)
|
|
i += 1
|
|
|
|
return rows, alignments or [None] * (len(rows[0]) if rows else 1), i
|
|
|
|
|
|
def _make_table_node(header_row: List[str], body_rows: List[List[str]], alignments: List) -> Dict[str, Any]:
|
|
"""Create a Ricos TABLE node with header and body rows, with formatting."""
|
|
table_rows = []
|
|
|
|
all_rows = [header_row] + body_rows
|
|
for row_idx, row_cells in enumerate(all_rows):
|
|
cell_nodes = []
|
|
for col_idx, cell_text in enumerate(row_cells):
|
|
text_nodes = parse_markdown_inline(cell_text)
|
|
# Bold header row cells
|
|
if row_idx == 0 and text_nodes:
|
|
for node in text_nodes:
|
|
if node.get('type') == 'TEXT':
|
|
decs = node['textData'].get('decorations', [])
|
|
if not any(d.get('type') == 'BOLD' for d in decs if isinstance(d, dict)):
|
|
decs_copy = decs.copy()
|
|
decs_copy.append({'type': 'BOLD'})
|
|
node['textData']['decorations'] = decs_copy
|
|
|
|
paragraph_node = {
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'PARAGRAPH',
|
|
'nodes': text_nodes if text_nodes else [{
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'TEXT',
|
|
'nodes': [],
|
|
'textData': {'text': cell_text or ' ', 'decorations': []}
|
|
}],
|
|
}
|
|
|
|
cell_style = {'verticalAlign': 'top'}
|
|
if row_idx == 0:
|
|
cell_style['borderWidth'] = {'top': 2, 'bottom': 1, 'left': 1, 'right': 1}
|
|
# Apply column alignment
|
|
if alignments and col_idx < len(alignments) and alignments[col_idx]:
|
|
cell_style['textAlign'] = alignments[col_idx]
|
|
|
|
cell_node = {
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'TABLE_CELL',
|
|
'nodes': [paragraph_node],
|
|
'tableCellData': {'style': cell_style},
|
|
}
|
|
cell_nodes.append(cell_node)
|
|
|
|
row_node = {
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'TABLE_ROW',
|
|
'nodes': cell_nodes,
|
|
}
|
|
table_rows.append(row_node)
|
|
|
|
num_cols = max(len(row) for row in all_rows) if all_rows else 1
|
|
return {
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'TABLE',
|
|
'nodes': table_rows,
|
|
'tableData': {
|
|
'cols': num_cols,
|
|
'rows': len(table_rows),
|
|
'headerRow': 0 if header_row else -1,
|
|
},
|
|
}
|
|
|
|
|
|
def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str, Any]:
|
|
"""
|
|
Convert markdown content into valid Ricos JSON format.
|
|
|
|
Supports:
|
|
- Headings (# to ######)
|
|
- Paragraphs with inline formatting
|
|
- Unordered lists (-, *)
|
|
- Ordered lists (1., 2.)
|
|
- Blockquotes (>)
|
|
- Code blocks (```language ... ```)
|
|
- Inline images ()
|
|
- Horizontal rules (---, ***, ___)
|
|
- Tables (| Header | Header |)
|
|
"""
|
|
if not content:
|
|
content = "This is a post from ALwrity."
|
|
|
|
nodes = []
|
|
lines = content.split('\n')
|
|
i = 0
|
|
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
stripped = line.strip()
|
|
|
|
if not stripped:
|
|
i += 1
|
|
continue
|
|
|
|
node_id = str(uuid.uuid4())
|
|
|
|
# Code blocks (```language ... ```)
|
|
if stripped.startswith('```'):
|
|
language = stripped[3:].strip() or ''
|
|
code_lines = []
|
|
i += 1
|
|
while i < len(lines):
|
|
if lines[i].strip() == '```':
|
|
i += 1
|
|
break
|
|
code_lines.append(lines[i])
|
|
i += 1
|
|
code_text = '\n'.join(code_lines)
|
|
if code_text.strip():
|
|
nodes.append(_make_code_block_node(code_text, language))
|
|
continue
|
|
|
|
# Horizontal rules
|
|
if re.match(r'^(---+|\*\*\*|___+)$', stripped):
|
|
nodes.append(_make_horizontal_rule_node())
|
|
i += 1
|
|
continue
|
|
|
|
# Markdown tables (lines starting with |)
|
|
if stripped.startswith('|') and i + 1 < len(lines) and '|' in lines[i + 1]:
|
|
table_rows, alignments, next_idx = _parse_markdown_table(lines, i)
|
|
if table_rows and len(table_rows) >= 1:
|
|
header_row = table_rows[0]
|
|
body_rows = table_rows[1:] if len(table_rows) > 1 else []
|
|
nodes.append(_make_table_node(header_row, body_rows, alignments))
|
|
i = next_idx
|
|
continue
|
|
|
|
# Headings
|
|
if stripped.startswith('#'):
|
|
level = len(stripped) - len(stripped.lstrip('#'))
|
|
heading_text = stripped.lstrip('# ').strip()
|
|
text_nodes = parse_markdown_inline(heading_text)
|
|
nodes.append({
|
|
'id': node_id,
|
|
'type': 'HEADING',
|
|
'nodes': text_nodes,
|
|
'headingData': {'level': min(level, 6)}
|
|
})
|
|
i += 1
|
|
continue
|
|
|
|
# Blockquotes
|
|
if stripped.startswith('>'):
|
|
quote_lines = [stripped.lstrip('> ').strip()]
|
|
i += 1
|
|
while i < len(lines) and lines[i].strip().startswith('>'):
|
|
quote_lines.append(lines[i].strip().lstrip('> ').strip())
|
|
i += 1
|
|
quote_content = ' '.join(quote_lines)
|
|
text_nodes = parse_markdown_inline(quote_content)
|
|
paragraph_node = {
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'PARAGRAPH',
|
|
'nodes': text_nodes,
|
|
}
|
|
nodes.append({
|
|
'id': node_id,
|
|
'type': 'BLOCKQUOTE',
|
|
'nodes': [paragraph_node],
|
|
})
|
|
continue
|
|
|
|
# Unordered lists (including task lists)
|
|
if (stripped.startswith('- ') or stripped.startswith('* ') or
|
|
(stripped.startswith('-') and len(stripped) > 1 and stripped[1] != '-') or
|
|
(stripped.startswith('*') and len(stripped) > 1 and stripped[1] != '*')):
|
|
list_items = []
|
|
|
|
while i < len(lines):
|
|
current_line = lines[i].strip()
|
|
is_list_item = (current_line.startswith('- ') or current_line.startswith('* ') or
|
|
(current_line.startswith('-') and len(current_line) > 1 and current_line[1] != '-') or
|
|
(current_line.startswith('*') and len(current_line) > 1 and current_line[1] != '*'))
|
|
|
|
if not is_list_item:
|
|
break
|
|
|
|
if current_line.startswith('- ') or current_line.startswith('* '):
|
|
item_text = current_line[2:].strip()
|
|
elif current_line.startswith('-') or current_line.startswith('*'):
|
|
item_text = current_line[1:].strip()
|
|
else:
|
|
item_text = current_line
|
|
|
|
list_items.append(item_text)
|
|
i += 1
|
|
|
|
# Check for nested items (indented with 2+ spaces)
|
|
while i < len(lines):
|
|
next_line = lines[i]
|
|
if (next_line.startswith(' ') and
|
|
(next_line.strip().startswith('- ') or next_line.strip().startswith('* '))):
|
|
nested_text = next_line.strip()
|
|
if nested_text.startswith('- ') or nested_text.startswith('* '):
|
|
nested_text = nested_text[2:].strip()
|
|
elif nested_text.startswith('-') or nested_text.startswith('*'):
|
|
nested_text = nested_text[1:].strip()
|
|
list_items.append(nested_text)
|
|
i += 1
|
|
else:
|
|
break
|
|
|
|
list_node_items = []
|
|
for item_text in list_items:
|
|
# Detect task list items: "- [ ] task" or "- [x] task"
|
|
task_match = re.match(r'^\[([ xX])\]\s*(.*)', item_text)
|
|
if task_match:
|
|
checked = task_match.group(1).lower() == 'x'
|
|
prefix = '☑ ' if checked else '☐ '
|
|
text_nodes = parse_markdown_inline(prefix + task_match.group(2))
|
|
else:
|
|
text_nodes = parse_markdown_inline(item_text)
|
|
paragraph_node = {
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'PARAGRAPH',
|
|
'nodes': text_nodes,
|
|
}
|
|
list_node_items.append({
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'LIST_ITEM',
|
|
'nodes': [paragraph_node]
|
|
})
|
|
|
|
nodes.append({
|
|
'id': node_id,
|
|
'type': 'BULLETED_LIST',
|
|
'nodes': list_node_items,
|
|
})
|
|
continue
|
|
|
|
# Ordered lists
|
|
if re.match(r'^\d+\.\s+', stripped):
|
|
list_items = []
|
|
while i < len(lines) and re.match(r'^\d+\.\s+', lines[i].strip()):
|
|
item_text = re.sub(r'^\d+\.\s+', '', lines[i].strip())
|
|
list_items.append(item_text)
|
|
i += 1
|
|
# Check for nested items
|
|
while i < len(lines) and lines[i].strip().startswith(' ') and re.match(r'^\s+\d+\.\s+', lines[i].strip()):
|
|
nested_text = re.sub(r'^\s+\d+\.\s+', '', lines[i].strip())
|
|
list_items.append(nested_text)
|
|
i += 1
|
|
|
|
list_node_items = []
|
|
for item_text in list_items:
|
|
text_nodes = parse_markdown_inline(item_text)
|
|
paragraph_node = {
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'PARAGRAPH',
|
|
'nodes': text_nodes,
|
|
}
|
|
list_node_items.append({
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'LIST_ITEM',
|
|
'nodes': [paragraph_node]
|
|
})
|
|
|
|
nodes.append({
|
|
'id': node_id,
|
|
'type': 'ORDERED_LIST',
|
|
'nodes': list_node_items,
|
|
})
|
|
continue
|
|
|
|
# Images
|
|
if stripped.startswith('!['):
|
|
img_match = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', stripped)
|
|
if img_match:
|
|
alt_text = img_match.group(1)
|
|
img_url = img_match.group(2)
|
|
nodes.append({
|
|
'id': node_id,
|
|
'type': 'IMAGE',
|
|
'nodes': [],
|
|
'imageData': {
|
|
'image': {
|
|
'src': {'url': img_url},
|
|
'altText': alt_text
|
|
},
|
|
'containerData': {
|
|
'alignment': 'CENTER',
|
|
'width': {'size': 'CONTENT'}
|
|
}
|
|
}
|
|
})
|
|
i += 1
|
|
continue
|
|
|
|
# Regular paragraph
|
|
para_lines = [stripped]
|
|
i += 1
|
|
while i < len(lines):
|
|
next_line = lines[i].strip()
|
|
if not next_line:
|
|
break
|
|
# Stop if next line is a special markdown element
|
|
if (next_line.startswith('#') or
|
|
next_line.startswith('- ') or
|
|
next_line.startswith('* ') or
|
|
next_line.startswith('>') or
|
|
next_line.startswith('![') or
|
|
next_line.startswith('```') or
|
|
next_line.startswith('|') or
|
|
re.match(r'^(---+|\*\*\*|___+)$', next_line) or
|
|
re.match(r'^\d+\.\s+', next_line)):
|
|
break
|
|
para_lines.append(next_line)
|
|
i += 1
|
|
|
|
para_text = ' '.join(para_lines)
|
|
text_nodes = parse_markdown_inline(para_text)
|
|
|
|
if text_nodes:
|
|
nodes.append({
|
|
'id': node_id,
|
|
'type': 'PARAGRAPH',
|
|
'nodes': text_nodes,
|
|
})
|
|
|
|
# Ensure at least one node exists
|
|
if not nodes:
|
|
nodes.append({
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'PARAGRAPH',
|
|
'nodes': [{
|
|
'id': str(uuid.uuid4()),
|
|
'type': 'TEXT',
|
|
'nodes': [],
|
|
'textData': {
|
|
'text': content[:500] if content else "This is a post from ALwrity.",
|
|
'decorations': []
|
|
}
|
|
}],
|
|
})
|
|
|
|
return {'nodes': nodes}
|