Files
dealplustech-astroreal/scripts/inject_product_schema.py
hermes b34f8fc2fb feat(blog): Phase 5 SEO/GEO content with 5 new blog posts
Add 5 long-form Thai blog posts (1,200-2,500 words each) with SEO + GEO
optimization for the dealplustech water-systems site. Each post targets
a specific audience (contractors, engineers, project managers) and
follows a content-quality workflow: source real product specs, verify
Thai text, dedupe images, link back to product pages.

## New blog posts (src/content/blog/)
- thermobreak-guide.md (Thermobreak closed-cell insulation overview)
- plastic-grilles-guide.md (ABS plastic grilles for HVAC)
- ppr-pipe-guide.md (PPR pipe properties + heat-fusion welding)
- ppr-vs-hdpe-vs-upvc.md (3-way pipe comparison with PE80/PE100)
- thermobreak-series-guide.md (Thermobreak LS vs Solar series)
- 10-things-checklist-pipe-ordering.md (10-point pre-order checklist)

## Removed legacy posts
- pipe-knowledge.md, valve-guide.md, welcome-post.md (orphans)

## Hero images (public/images/blog/)
~20 product photos sourced from manufacturers (Thermobreak, Thai PPR,
thaiconsupply) plus Nano Banana Pro infographics. All resized to
3:2 aspect ratio per user preference. Source folder preserved for
re-derivation.

## Astro layout/SEO work
- src/components/seo/SEO.astro, JsonLd.astro (new SEO components)
- src/layouts/BaseLayout.astro, Layout.astro (OG/Twitter/JSON-LD wiring)
- src/pages/404.astro
- Product pages (8): added #pricelist anchors + schema work
- src/styles/global.css: scroll-padding for sticky-header anchors

## Automation scripts (scripts/)
- build_og_image.py (OG image builder)
- inject_faq_schema.py, inject_product_schema.py (JSON-LD injection)

## Misc
- public/robots.txt, public/images/og/default-og.jpg
- .gitignore: exclude scripts/__pycache__/
2026-06-08 12:45:32 +07:00

159 lines
5.2 KiB
Python

"""
Inject `product={...}` prop into <BaseLayout> calls in product pages.
For pages that ALREADY have a Product JSON-LD block, this script also
removes that block (since the layout's `product` prop replaces it).
Run from project root: python3 scripts/inject_product_schema.py
"""
import os
import re
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
PAGES = ROOT / "src" / "pages"
# Map page slug -> brand name for product schema
# Anything not listed falls back to "ดีล พลัส เทค"
BRAND_MAP = {
"ท่อ-ppr-thai-ppr": "Thai PPR",
"ท่อ-ppr-scg": "SCG",
"ท่อ-hdpe": "HDPE",
"ท่อ-upvc": "UPVC",
"ท่อ-syler": "Syler",
"ท่อ-xy-lent": "XYLENT",
"เครื่องเชื่อม-hdpe": "HDPE",
"เครื่องเชื่อม-ppr": "PPR",
"pipe-coupling": "SMC",
"เม็กกรู๊ฟ-คับปลิ้ง": "MECH",
"วาล์ว-valve": "Generic",
"water-pump": "Generic",
"water-treatment": "Generic",
"realflex": "Realflex",
"armflex": "Armacell",
"aeroflex": "Aerocell",
"maxflex": "Maxflex",
"เทอร์โมเบรค-thermobreak": "Thermobreak",
"หัวจ่าย-ball-jet": "SAPA",
"grilles": "Generic",
"durgo-avvs": "DURGO",
"ตู้ดับเพลิง": "Generic",
"รั้วเทวดา": "Tevada",
"ระบบรั้วไวน์แมน": "Vineman",
}
# Pages that are NOT products (skip them)
SKIP_PAGES = {
"index.astro", "all-products.astro", "about-us.astro", "contact-us.astro",
"portfolio.astro", "privacy-policy.astro", "terms-and-conditions.astro",
"ระบบน้ำ.astro", # category overview, not a product
}
IMG_RE = re.compile(r'<img[^>]*\bsrc="(/images/[^"]+)"')
def escape_for_jsx(value: str) -> str:
"""Escape a Python string for safe use inside a single-quoted JSX expression.
Handles backslashes, single quotes, and newlines.
"""
out = value.replace("\\", "\\\\")
out = out.replace("'", "\\'")
out = out.replace("\n", " ").replace("\r", " ")
return out
def jsx_string(value: str) -> str:
return "'" + escape_for_jsx(value) + "'"
def first_image_src(content: str) -> str | None:
"""Return the first <img src="/images/..."> path, skipping data URIs."""
m = IMG_RE.search(content)
return m.group(1) if m else None
def remove_existing_product_jsonld(content: str) -> str:
"""Strip the standalone Product JSON-LD <script> block (kept the rest)."""
# Match <script type="application/ld+json"> with @type: Product ... </script>
pattern = re.compile(
r'<script[^>]*type="application/ld\+json"[^>]*>\s*'
r'\{[^{}]*"@type"\s*:\s*"Product"[\s\S]*?'
r'\}\s*</script>\s*',
re.MULTILINE,
)
return pattern.sub('', content)
def extract_title_and_desc(content: str) -> tuple[str | None, str | None]:
"""Parse <BaseLayout title="..." description="..."> attrs (single or double quoted)."""
# Title may contain " - ดีล พลัส เทค" suffix
title_m = re.search(r'title=(?:"([^"]+)"|\'([^\']+)\')', content)
desc_m = re.search(r'description=(?:"([^"]+)"|\'([^\']+)\')', content)
title = (title_m.group(1) or title_m.group(2)) if title_m else None
desc = (desc_m.group(1) or desc_m.group(2)) if desc_m else None
return title, desc
def inject_product_prop(content: str, image: str, title: str, slug: str) -> str:
"""Add product={...} prop to the FIRST <BaseLayout> tag."""
brand = BRAND_MAP.get(slug, "ดีล พลัส เทค")
prop_block = (
f'\n product={{{{\n'
f' name: {jsx_string(title)},\n'
f' image: {jsx_string(image)},\n'
f' brand: {jsx_string(brand)},\n'
f' }}}}'
)
# Find first <BaseLayout ... > — we insert product={...} before the closing >
# Use a non-greedy match up to the first > that is NOT inside a quoted attr.
m = re.search(r'<BaseLayout\b[^>]*>', content)
if not m:
return content # No <BaseLayout> tag — leave file untouched.
insert_at = m.end() - 1 # position of the closing >
return content[:insert_at] + prop_block + content[insert_at:]
def process_file(path: Path) -> str:
content = path.read_text(encoding='utf-8')
original = content
slug = path.stem # e.g. "ท่อ-hdpe"
title, _ = extract_title_and_desc(content)
if not title:
return f"SKIP (no title): {path.name}"
image = first_image_src(content)
if not image:
return f"SKIP (no image): {path.name}"
has_existing_jsonld = '"Product"' in content and '<script type="application/ld+json"' in content
content = inject_product_prop(content, image, title, slug)
if has_existing_jsonld:
content = remove_existing_product_jsonld(content)
if content == original:
return f"NO-CHANGE: {path.name}"
path.write_text(content, encoding='utf-8')
action = "migrated" if has_existing_jsonld else "injected"
return f"OK ({action}): {path.name}"
def main() -> None:
targets = sorted([
p for p in PAGES.glob("*.astro")
if p.name not in SKIP_PAGES
])
for path in targets:
print(process_file(path))
if __name__ == "__main__":
main()