ALwrity/backend/services/integrations/wix/content.py

import re
import uuid
from typing import Any, Dict, List


def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
    """
    Parse inline markdown formatting (bold, italic, links, code, strikethrough) into Ricos text nodes.
    Returns a list of text nodes with decorations.
    Handles: **bold**, *italic*, [links](url), `code`, ~strikethrough~, and combinations.
    """
    if not text:
        return [{
            'id': str(uuid.uuid4()),
            'type': 'TEXT',
            'nodes': [],
            'textData': {'text': '', 'decorations': []}
        }]

    nodes = []
    i = 0
    current_text = ''

    def flush_text():
        nonlocal current_text
        if current_text:
            nodes.append({
                'id': str(uuid.uuid4()),
                'type': 'TEXT',
                'nodes': [],
                'textData': {'text': current_text, 'decorations': []}
            })
            current_text = ''

    while i < len(text):
        # Bold **text**
        if i < len(text) - 1 and text[i:i+2] == '**':
            flush_text()
            end_bold = text.find('**', i + 2)
            if end_bold != -1:
                bold_text = text[i + 2:end_bold]
                bold_nodes = parse_markdown_inline(bold_text)
                for node in bold_nodes:
                    if node['type'] == 'TEXT':
                        decs = node['textData'].get('decorations', []).copy()
                        if not any(d.get('type') == 'BOLD' for d in decs if isinstance(d, dict)):
                            decs.append({'type': 'BOLD'})
                        node['textData']['decorations'] = decs
                    nodes.append(node)
                i = end_bold + 2
                continue

        # Strikethrough ~text~
        elif text[i] == '~':
            flush_text()
            end_strike = text.find('~', i + 1)
            if end_strike != -1:
                strike_text = text[i + 1:end_strike]
                strike_nodes = parse_markdown_inline(strike_text)
                for node in strike_nodes:
                    if node['type'] == 'TEXT':
                        decs = node['textData'].get('decorations', []).copy()
                        if not any(d.get('type') == 'STRIKETHROUGH' for d in decs if isinstance(d, dict)):
                            decs.append({'type': 'STRIKETHROUGH'})
                        node['textData']['decorations'] = decs
                    nodes.append(node)
                i = end_strike + 1
                continue

        # Link [text](url)
        elif text[i] == '[':
            flush_text()
            link_end = text.find(']', i)
            if link_end != -1 and link_end < len(text) - 1 and text[link_end + 1] == '(':
                link_text = text[i + 1:link_end]
                url_start = link_end + 2
                url_end = text.find(')', url_start)
                if url_end != -1:
                    url = text[url_start:url_end]
                    nodes.append({
                        'id': str(uuid.uuid4()),
                        'type': 'TEXT',
                        'nodes': [],
                        'textData': {
                            'text': link_text,
                            'decorations': [{
                                'type': 'LINK',
                                'linkData': {
                                    'link': {
                                        'url': url,
                                        'target': 'BLANK'
                                    }
                                }
                            }]
                        }
                    })
                    i = url_end + 1
                    continue

        # Inline code `text`
        elif text[i] == '`':
            flush_text()
            code_end = text.find('`', i + 1)
            if code_end != -1:
                code_text = text[i + 1:code_end]
                # Wix doesn't have a CODE decoration, but we can preserve the text
                nodes.append({
                    'id': str(uuid.uuid4()),
                    'type': 'TEXT',
                    'nodes': [],
                    'textData': {
                        'text': code_text,
                        'decorations': []  # CODE is not a valid decoration in Wix API
                    }
                })
                i = code_end + 1
                continue

        # Italic *text* (must come after ** check)
        elif text[i] == '*' and (i == 0 or text[i-1] != '*') and (i == len(text) - 1 or text[i+1] != '*'):
            flush_text()
            italic_end = text.find('*', i + 1)
            if italic_end != -1:
                # Make sure it's not part of **
                if italic_end == len(text) - 1 or text[italic_end + 1] != '*':
                    italic_text = text[i + 1:italic_end]
                    italic_nodes = parse_markdown_inline(italic_text)
                    for node in italic_nodes:
                        if node['type'] == 'TEXT':
                            decs = node['textData'].get('decorations', []).copy()
                            if not any(d.get('type') == 'ITALIC' for d in decs if isinstance(d, dict)):
                                decs.append({'type': 'ITALIC'})
                            node['textData']['decorations'] = decs
                        nodes.append(node)
                    i = italic_end + 1
                    continue

        # Regular character
        current_text += text[i]
        i += 1

    flush_text()

    # If no nodes created, return single plain text node
    if not nodes:
        nodes.append({
            'id': str(uuid.uuid4()),
            'type': 'TEXT',
            'nodes': [],
            'textData': {'text': text, 'decorations': []}
        })

    return nodes


def _make_code_block_node(code_text: str, language: str = '') -> Dict[str, Any]:
    """Create a Ricos CODE_BLOCK node."""
    lines = code_text.split('\n')
    text_nodes = []
    for line in lines:
        text_nodes.append({
            'id': str(uuid.uuid4()),
            'type': 'TEXT',
            'nodes': [],
            'textData': {'text': line, 'decorations': []}
        })

    return {
        'id': str(uuid.uuid4()),
        'type': 'CODE_BLOCK',
        'nodes': text_nodes,
        'codeBlockData': {
            'language': language or 'text',
            'textWrap': True
        }
    }


def _make_horizontal_rule_node() -> Dict[str, Any]:
    """Create a Ricos DIVIDER node."""
    return {
        'id': str(uuid.uuid4()),
        'type': 'DIVIDER',
        'nodes': [],
        'dividerData': {
            'type': 'LINE',
            'lineStyle': {
                'width': 'LARGE',
                'alignment': 'CENTER'
            }
        }
    }


def _parse_markdown_table(lines: List[str], start_idx: int) -> tuple:
    """
    Parse a markdown table starting at start_idx.
    Returns (table_rows, alignments, next_idx) where table_rows is a list of lists of cell text,
    and alignments is a list of column alignments ('left', 'center', 'right', None).

    Markdown tables look like:
    | Header 1 | Header 2 |
    |----------|----------|
    | Cell 1   | Cell 2   |

    Alignment is detected from the separator row:
    |:--------|:--------:|--------:|
    """
    rows = []
    alignments = None
    i = start_idx

    while i < len(lines):
        line = lines[i].strip()
        if not line or '|' not in line:
            break

        cells = [cell.strip() for cell in line.strip('|').split('|')]

        # Detect separator row (contains only dashes, colons, pipes, spaces)
        if i > start_idx and all(
            set(cell.strip()) <= set('-:| ') for cell in cells
        ):
            alignments = []
            for cell in cells:
                cell = cell.strip()
                if cell.startswith(':') and cell.endswith(':'):
                    alignments.append('center')
                elif cell.endswith(':'):
                    alignments.append('right')
                elif cell.startswith(':'):
                    alignments.append('left')
                else:
                    alignments.append(None)
            i += 1
            continue

        rows.append(cells)
        i += 1

    return rows, alignments or [None] * (len(rows[0]) if rows else 1), i


def _make_table_node(header_row: List[str], body_rows: List[List[str]], alignments: List) -> Dict[str, Any]:
    """Create a Ricos TABLE node with header and body rows, with formatting."""
    table_rows = []

    all_rows = [header_row] + body_rows
    for row_idx, row_cells in enumerate(all_rows):
        cell_nodes = []
        for col_idx, cell_text in enumerate(row_cells):
            text_nodes = parse_markdown_inline(cell_text)
            # Bold header row cells
            if row_idx == 0 and text_nodes:
                for node in text_nodes:
                    if node.get('type') == 'TEXT':
                        decs = node['textData'].get('decorations', [])
                        if not any(d.get('type') == 'BOLD' for d in decs if isinstance(d, dict)):
                            decs_copy = decs.copy()
                            decs_copy.append({'type': 'BOLD'})
                            node['textData']['decorations'] = decs_copy

            paragraph_node = {
                'id': str(uuid.uuid4()),
                'type': 'PARAGRAPH',
                'nodes': text_nodes if text_nodes else [{
                    'id': str(uuid.uuid4()),
                    'type': 'TEXT',
                    'nodes': [],
                    'textData': {'text': cell_text or ' ', 'decorations': []}
                }],
            }

            cell_style = {'verticalAlign': 'top'}
            if row_idx == 0:
                cell_style['borderWidth'] = {'top': 2, 'bottom': 1, 'left': 1, 'right': 1}
            # Apply column alignment
            if alignments and col_idx < len(alignments) and alignments[col_idx]:
                cell_style['textAlign'] = alignments[col_idx]

            cell_node = {
                'id': str(uuid.uuid4()),
                'type': 'TABLE_CELL',
                'nodes': [paragraph_node],
                'tableCellData': {'style': cell_style},
            }
            cell_nodes.append(cell_node)

        row_node = {
            'id': str(uuid.uuid4()),
            'type': 'TABLE_ROW',
            'nodes': cell_nodes,
        }
        table_rows.append(row_node)

    num_cols = max(len(row) for row in all_rows) if all_rows else 1
    return {
        'id': str(uuid.uuid4()),
        'type': 'TABLE',
        'nodes': table_rows,
        'tableData': {
            'cols': num_cols,
            'rows': len(table_rows),
            'headerRow': 0 if header_row else -1,
        },
    }


def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str, Any]:
    """
    Convert markdown content into valid Ricos JSON format.

    Supports:
    - Headings (# to ######)
    - Paragraphs with inline formatting
    - Unordered lists (-, *)
    - Ordered lists (1., 2.)
    - Blockquotes (>)
    - Code blocks (```language ... ```)
    - Inline images (![alt](url))
    - Horizontal rules (---, ***, ___)
    - Tables (| Header | Header |)
    """
    if not content:
        content = "This is a post from ALwrity."

    nodes = []
    lines = content.split('\n')
    i = 0

    while i < len(lines):
        line = lines[i]
        stripped = line.strip()

        if not stripped:
            i += 1
            continue

        node_id = str(uuid.uuid4())

        # Code blocks (```language ... ```)
        if stripped.startswith('```'):
            language = stripped[3:].strip() or ''
            code_lines = []
            i += 1
            while i < len(lines):
                if lines[i].strip() == '```':
                    i += 1
                    break
                code_lines.append(lines[i])
                i += 1
            code_text = '\n'.join(code_lines)
            if code_text.strip():
                nodes.append(_make_code_block_node(code_text, language))
            continue

        # Horizontal rules
        if re.match(r'^(---+|\*\*\*|___+)$', stripped):
            nodes.append(_make_horizontal_rule_node())
            i += 1
            continue

        # Markdown tables (lines starting with |)
        if stripped.startswith('|') and i + 1 < len(lines) and '|' in lines[i + 1]:
            table_rows, alignments, next_idx = _parse_markdown_table(lines, i)
            if table_rows and len(table_rows) >= 1:
                header_row = table_rows[0]
                body_rows = table_rows[1:] if len(table_rows) > 1 else []
                nodes.append(_make_table_node(header_row, body_rows, alignments))
                i = next_idx
                continue

        # Headings
        if stripped.startswith('#'):
            level = len(stripped) - len(stripped.lstrip('#'))
            heading_text = stripped.lstrip('# ').strip()
            text_nodes = parse_markdown_inline(heading_text)
            nodes.append({
                'id': node_id,
                'type': 'HEADING',
                'nodes': text_nodes,
                'headingData': {'level': min(level, 6)}
            })
            i += 1
            continue

        # Blockquotes
        if stripped.startswith('>'):
            quote_lines = [stripped.lstrip('> ').strip()]
            i += 1
            while i < len(lines) and lines[i].strip().startswith('>'):
                quote_lines.append(lines[i].strip().lstrip('> ').strip())
                i += 1
            quote_content = ' '.join(quote_lines)
            text_nodes = parse_markdown_inline(quote_content)
            paragraph_node = {
                'id': str(uuid.uuid4()),
                'type': 'PARAGRAPH',
                'nodes': text_nodes,
            }
            nodes.append({
                'id': node_id,
                'type': 'BLOCKQUOTE',
                'nodes': [paragraph_node],
            })
            continue

        # Unordered lists (including task lists)
        if (stripped.startswith('- ') or stripped.startswith('* ') or
            (stripped.startswith('-') and len(stripped) > 1 and stripped[1] != '-') or
            (stripped.startswith('*') and len(stripped) > 1 and stripped[1] != '*')):
            list_items = []

            while i < len(lines):
                current_line = lines[i].strip()
                is_list_item = (current_line.startswith('- ') or current_line.startswith('* ') or
                               (current_line.startswith('-') and len(current_line) > 1 and current_line[1] != '-') or
                               (current_line.startswith('*') and len(current_line) > 1 and current_line[1] != '*'))

                if not is_list_item:
                    break

                if current_line.startswith('- ') or current_line.startswith('* '):
                    item_text = current_line[2:].strip()
                elif current_line.startswith('-') or current_line.startswith('*'):
                    item_text = current_line[1:].strip()
                else:
                    item_text = current_line

                list_items.append(item_text)
                i += 1

                # Check for nested items (indented with 2+ spaces)
                while i < len(lines):
                    next_line = lines[i]
                    if (next_line.startswith('  ') and
                        (next_line.strip().startswith('- ') or next_line.strip().startswith('* '))):
                        nested_text = next_line.strip()
                        if nested_text.startswith('- ') or nested_text.startswith('* '):
                            nested_text = nested_text[2:].strip()
                        elif nested_text.startswith('-') or nested_text.startswith('*'):
                            nested_text = nested_text[1:].strip()
                        list_items.append(nested_text)
                        i += 1
                    else:
                        break

            list_node_items = []
            for item_text in list_items:
                # Detect task list items: "- [ ] task" or "- [x] task"
                task_match = re.match(r'^\[([ xX])\]\s*(.*)', item_text)
                if task_match:
                    checked = task_match.group(1).lower() == 'x'
                    prefix = '☑ ' if checked else '☐ '
                    text_nodes = parse_markdown_inline(prefix + task_match.group(2))
                else:
                    text_nodes = parse_markdown_inline(item_text)
                paragraph_node = {
                    'id': str(uuid.uuid4()),
                    'type': 'PARAGRAPH',
                    'nodes': text_nodes,
                }
                list_node_items.append({
                    'id': str(uuid.uuid4()),
                    'type': 'LIST_ITEM',
                    'nodes': [paragraph_node]
                })

            nodes.append({
                'id': node_id,
                'type': 'BULLETED_LIST',
                'nodes': list_node_items,
            })
            continue

        # Ordered lists
        if re.match(r'^\d+\.\s+', stripped):
            list_items = []
            while i < len(lines) and re.match(r'^\d+\.\s+', lines[i].strip()):
                item_text = re.sub(r'^\d+\.\s+', '', lines[i].strip())
                list_items.append(item_text)
                i += 1
                # Check for nested items
                while i < len(lines) and lines[i].strip().startswith('  ') and re.match(r'^\s+\d+\.\s+', lines[i].strip()):
                    nested_text = re.sub(r'^\s+\d+\.\s+', '', lines[i].strip())
                    list_items.append(nested_text)
                    i += 1

            list_node_items = []
            for item_text in list_items:
                text_nodes = parse_markdown_inline(item_text)
                paragraph_node = {
                    'id': str(uuid.uuid4()),
                    'type': 'PARAGRAPH',
                    'nodes': text_nodes,
                }
                list_node_items.append({
                    'id': str(uuid.uuid4()),
                    'type': 'LIST_ITEM',
                    'nodes': [paragraph_node]
                })

            nodes.append({
                'id': node_id,
                'type': 'ORDERED_LIST',
                'nodes': list_node_items,
            })
            continue

        # Images
        if stripped.startswith('!['):
            img_match = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', stripped)
            if img_match:
                alt_text = img_match.group(1)
                img_url = img_match.group(2)
                nodes.append({
                    'id': node_id,
                    'type': 'IMAGE',
                    'nodes': [],
                    'imageData': {
                        'image': {
                            'src': {'url': img_url},
                            'altText': alt_text
                        },
                        'containerData': {
                            'alignment': 'CENTER',
                            'width': {'size': 'CONTENT'}
                        }
                    }
                })
            i += 1
            continue

        # Regular paragraph
        para_lines = [stripped]
        i += 1
        while i < len(lines):
            next_line = lines[i].strip()
            if not next_line:
                break
            # Stop if next line is a special markdown element
            if (next_line.startswith('#') or
                next_line.startswith('- ') or
                next_line.startswith('* ') or
                next_line.startswith('>') or
                next_line.startswith('![') or
                next_line.startswith('```') or
                next_line.startswith('|') or
                re.match(r'^(---+|\*\*\*|___+)$', next_line) or
                re.match(r'^\d+\.\s+', next_line)):
                break
            para_lines.append(next_line)
            i += 1

        para_text = ' '.join(para_lines)
        text_nodes = parse_markdown_inline(para_text)

        if text_nodes:
            nodes.append({
                'id': node_id,
                'type': 'PARAGRAPH',
                'nodes': text_nodes,
            })

    # Ensure at least one node exists
    if not nodes:
        nodes.append({
            'id': str(uuid.uuid4()),
            'type': 'PARAGRAPH',
            'nodes': [{
                'id': str(uuid.uuid4()),
                'type': 'TEXT',
                'nodes': [],
                'textData': {
                    'text': content[:500] if content else "This is a post from ALwrity.",
                    'decorations': []
                }
            }],
        })

    return {'nodes': nodes}