dealplustech-astroreal/scripts/inject_product_schema.py

"""
Inject `product={...}` prop into <BaseLayout> calls in product pages.

For pages that ALREADY have a Product JSON-LD block, this script also
removes that block (since the layout's `product` prop replaces it).

Run from project root:  python3 scripts/inject_product_schema.py
"""
import os
import re
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
PAGES = ROOT / "src" / "pages"

# Map page slug -> brand name for product schema
# Anything not listed falls back to "ดีล พลัส เทค"
BRAND_MAP = {
    "ท่อ-ppr-thai-ppr": "Thai PPR",
    "ท่อ-ppr-scg": "SCG",
    "ท่อ-hdpe": "HDPE",
    "ท่อ-upvc": "UPVC",
    "ท่อ-syler": "Syler",
    "ท่อ-xy-lent": "XYLENT",
    "เครื่องเชื่อม-hdpe": "HDPE",
    "เครื่องเชื่อม-ppr": "PPR",
    "pipe-coupling": "SMC",
    "เม็กกรู๊ฟ-คับปลิ้ง": "MECH",
    "วาล์ว-valve": "Generic",
    "water-pump": "Generic",
    "water-treatment": "Generic",
    "realflex": "Realflex",
    "armflex": "Armacell",
    "aeroflex": "Aerocell",
    "maxflex": "Maxflex",
    "เทอร์โมเบรค-thermobreak": "Thermobreak",
    "หัวจ่าย-ball-jet": "SAPA",
    "grilles": "Generic",
    "durgo-avvs": "DURGO",
    "ตู้ดับเพลิง": "Generic",
    "รั้วเทวดา": "Tevada",
    "ระบบรั้วไวน์แมน": "Vineman",
}

# Pages that are NOT products (skip them)
SKIP_PAGES = {
    "index.astro", "all-products.astro", "about-us.astro", "contact-us.astro",
    "portfolio.astro", "privacy-policy.astro", "terms-and-conditions.astro",
    "ระบบน้ำ.astro",  # category overview, not a product
}

IMG_RE = re.compile(r'<img[^>]*\bsrc="(/images/[^"]+)"')


def escape_for_jsx(value: str) -> str:
    """Escape a Python string for safe use inside a single-quoted JSX expression.

    Handles backslashes, single quotes, and newlines.
    """
    out = value.replace("\\", "\\\\")
    out = out.replace("'", "\\'")
    out = out.replace("\n", " ").replace("\r", " ")
    return out


def jsx_string(value: str) -> str:
    return "'" + escape_for_jsx(value) + "'"


def first_image_src(content: str) -> str | None:
    """Return the first <img src="/images/..."> path, skipping data URIs."""
    m = IMG_RE.search(content)
    return m.group(1) if m else None


def remove_existing_product_jsonld(content: str) -> str:
    """Strip the standalone Product JSON-LD <script> block (kept the rest)."""
    # Match <script type="application/ld+json"> with @type: Product ... </script>
    pattern = re.compile(
        r'<script[^>]*type="application/ld\+json"[^>]*>\s*'
        r'\{[^{}]*"@type"\s*:\s*"Product"[\s\S]*?'
        r'\}\s*</script>\s*',
        re.MULTILINE,
    )
    return pattern.sub('', content)


def extract_title_and_desc(content: str) -> tuple[str | None, str | None]:
    """Parse <BaseLayout title="..." description="..."> attrs (single or double quoted)."""
    # Title may contain " - ดีล พลัส เทค" suffix
    title_m = re.search(r'title=(?:"([^"]+)"|\'([^\']+)\')', content)
    desc_m = re.search(r'description=(?:"([^"]+)"|\'([^\']+)\')', content)
    title = (title_m.group(1) or title_m.group(2)) if title_m else None
    desc = (desc_m.group(1) or desc_m.group(2)) if desc_m else None
    return title, desc


def inject_product_prop(content: str, image: str, title: str, slug: str) -> str:
    """Add product={...} prop to the FIRST <BaseLayout> tag."""
    brand = BRAND_MAP.get(slug, "ดีล พลัส เทค")
    prop_block = (
        f'\n  product={{{{\n'
        f'    name: {jsx_string(title)},\n'
        f'    image: {jsx_string(image)},\n'
        f'    brand: {jsx_string(brand)},\n'
        f'  }}}}'
    )

    # Find first <BaseLayout ... > — we insert product={...} before the closing >
    # Use a non-greedy match up to the first > that is NOT inside a quoted attr.
    m = re.search(r'<BaseLayout\b[^>]*>', content)
    if not m:
        return content  # No <BaseLayout> tag — leave file untouched.

    insert_at = m.end() - 1  # position of the closing >
    return content[:insert_at] + prop_block + content[insert_at:]


def process_file(path: Path) -> str:
    content = path.read_text(encoding='utf-8')
    original = content

    slug = path.stem  # e.g. "ท่อ-hdpe"
    title, _ = extract_title_and_desc(content)
    if not title:
        return f"SKIP (no title): {path.name}"

    image = first_image_src(content)
    if not image:
        return f"SKIP (no image): {path.name}"

    has_existing_jsonld = '"Product"' in content and '<script type="application/ld+json"' in content

    content = inject_product_prop(content, image, title, slug)

    if has_existing_jsonld:
        content = remove_existing_product_jsonld(content)

    if content == original:
        return f"NO-CHANGE: {path.name}"

    path.write_text(content, encoding='utf-8')
    action = "migrated" if has_existing_jsonld else "injected"
    return f"OK ({action}): {path.name}"


def main() -> None:
    targets = sorted([
        p for p in PAGES.glob("*.astro")
        if p.name not in SKIP_PAGES
    ])

    for path in targets:
        print(process_file(path))


if __name__ == "__main__":
    main()