feat(blog): Phase 5 SEO/GEO content with 5 new blog posts
Add 5 long-form Thai blog posts (1,200-2,500 words each) with SEO + GEO optimization for the dealplustech water-systems site. Each post targets a specific audience (contractors, engineers, project managers) and follows a content-quality workflow: source real product specs, verify Thai text, dedupe images, link back to product pages. ## New blog posts (src/content/blog/) - thermobreak-guide.md (Thermobreak closed-cell insulation overview) - plastic-grilles-guide.md (ABS plastic grilles for HVAC) - ppr-pipe-guide.md (PPR pipe properties + heat-fusion welding) - ppr-vs-hdpe-vs-upvc.md (3-way pipe comparison with PE80/PE100) - thermobreak-series-guide.md (Thermobreak LS vs Solar series) - 10-things-checklist-pipe-ordering.md (10-point pre-order checklist) ## Removed legacy posts - pipe-knowledge.md, valve-guide.md, welcome-post.md (orphans) ## Hero images (public/images/blog/) ~20 product photos sourced from manufacturers (Thermobreak, Thai PPR, thaiconsupply) plus Nano Banana Pro infographics. All resized to 3:2 aspect ratio per user preference. Source folder preserved for re-derivation. ## Astro layout/SEO work - src/components/seo/SEO.astro, JsonLd.astro (new SEO components) - src/layouts/BaseLayout.astro, Layout.astro (OG/Twitter/JSON-LD wiring) - src/pages/404.astro - Product pages (8): added #pricelist anchors + schema work - src/styles/global.css: scroll-padding for sticky-header anchors ## Automation scripts (scripts/) - build_og_image.py (OG image builder) - inject_faq_schema.py, inject_product_schema.py (JSON-LD injection) ## Misc - public/robots.txt, public/images/og/default-og.jpg - .gitignore: exclude scripts/__pycache__/
This commit is contained in:
152
scripts/inject_faq_schema.py
Normal file
152
scripts/inject_faq_schema.py
Normal file
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Extract FAQ Q&A pairs from product pages and inject as `faq={[...]}` prop.
|
||||
|
||||
Pattern detected:
|
||||
<h3 ...>Q: ...question...</h3>
|
||||
<p ...>...answer...</p>
|
||||
|
||||
Only operates on pages that have a FAQ section (search for 'คำถามที่พบบ่อย').
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
PAGES = ROOT / "src" / "pages"
|
||||
|
||||
# Product pages with FAQ UI (from earlier audit: lines >= 240 and grep -c FAQ > 0)
|
||||
TARGETS = [
|
||||
"pipe-coupling.astro",
|
||||
"ท่อ-syler.astro",
|
||||
"หัวจ่าย-ball-jet.astro",
|
||||
"เม็กกรู๊ฟ-คับปลิ้ง.astro",
|
||||
"เครื่องเชื่อม-hdpe.astro",
|
||||
"เครื่องเชื่อม-ppr.astro",
|
||||
"เทอร์โมเบรค-thermobreak.astro",
|
||||
"realflex.astro",
|
||||
"water-treatment.astro",
|
||||
"วาล์ว-valve.astro",
|
||||
"รั้วเทวดา.astro",
|
||||
"ระบบรั้วไวน์แมน.astro",
|
||||
"durgo-avvs.astro",
|
||||
"ตู้ดับเพลิง.astro",
|
||||
"water-pump.astro",
|
||||
"grilles.astro",
|
||||
"ท่อ-upvc.astro",
|
||||
"armflex.astro",
|
||||
"aeroflex.astro",
|
||||
"maxflex.astro",
|
||||
]
|
||||
|
||||
|
||||
def extract_faq(content: str) -> list[tuple[str, str]]:
|
||||
"""Return list of (question, answer) tuples from FAQ section.
|
||||
|
||||
Scopes regex to the FAQ section only: starts at "คำถามที่พบบ่อย" and
|
||||
ends at the next "<!-- Contact CTA" or "Contact CTA" marker. This
|
||||
prevents pattern D from catching feature lists elsewhere on the page.
|
||||
|
||||
Handles 4 patterns:
|
||||
A. <h3>Q: ...</h3><p>...</p>
|
||||
B. <h3>1. ...</h3><p>...</p> (number prefix)
|
||||
C. <details><summary>...</summary><div>...</div></details>
|
||||
D. <h3>question</h3><p>answer</p> (no prefix — grilles style)
|
||||
"""
|
||||
if 'คำถามที่พบบ่อย' not in content:
|
||||
return []
|
||||
|
||||
# Slice content to the FAQ block: from "คำถามที่พบบ่อย" up to the next
|
||||
# Contact CTA / section end. Fall back to end-of-file if no terminator.
|
||||
start = content.index('คำถามที่พบบ่อย')
|
||||
end = len(content)
|
||||
for marker in ('<!-- Contact CTA', 'Contact CTA', '<!-- End FAQ', '</section>\n <!--'):
|
||||
idx = content.find(marker, start)
|
||||
if idx != -1 and idx < end:
|
||||
end = idx
|
||||
block = content[start:end]
|
||||
|
||||
pairs: list[tuple[str, str]] = []
|
||||
|
||||
# Pattern A + B: <h2-h4 ...>prefix...</h2-h4><p>answer</p>
|
||||
p_ab = re.compile(
|
||||
r'<h[234]\b[^>]*>\s*(?:Q:|\d+\.\s*)(?P<q>[^<]+?)\s*</h[234]>\s*<p\b[^>]*>(?P<a>.*?)</p>',
|
||||
re.DOTALL,
|
||||
)
|
||||
for m in p_ab.finditer(block):
|
||||
pairs.append((m.group('q').strip(), m.group('a').strip()))
|
||||
|
||||
if not pairs:
|
||||
# Pattern D: <h2-h4>question</h2-h4><p>answer</p> (no prefix)
|
||||
p_d = re.compile(
|
||||
r'<h[234]\b[^>]*>\s*(?P<q>[^<]+?)\s*</h[234]>\s*<p\b[^>]*>(?P<a>.*?)</p>',
|
||||
re.DOTALL,
|
||||
)
|
||||
for m in p_d.finditer(block):
|
||||
pairs.append((m.group('q').strip(), m.group('a').strip()))
|
||||
|
||||
if not pairs:
|
||||
# Pattern C: <details><summary>...</summary><div>...</div></details>
|
||||
p_c = re.compile(
|
||||
r'<details\b[^>]*>\s*<summary\b[^>]*>(?P<sum>.*?)</summary>'
|
||||
r'.*?<div\b[^>]*>(?P<a>.*?)</div>\s*</details>',
|
||||
re.DOTALL,
|
||||
)
|
||||
for m in p_c.finditer(block):
|
||||
sum_html = m.group('sum')
|
||||
sp = re.search(r'<span\b[^>]*>(?P<q>.*?)</span>', sum_html, re.DOTALL)
|
||||
q = sp.group('q').strip() if sp else re.sub(r'<[^>]+>', '', sum_html).strip()
|
||||
pairs.append((q, m.group('a').strip()))
|
||||
|
||||
return pairs
|
||||
|
||||
|
||||
def js_string(value: str) -> str:
|
||||
"""Single-quoted JS literal that's safe to embed in Astro JSX."""
|
||||
return "'" + value.replace("\\", "\\\\").replace("'", "\\'") + "'"
|
||||
|
||||
|
||||
def build_faq_prop(pairs: list[tuple[str, str]]) -> str:
|
||||
lines = ['\n faq={[']
|
||||
for q, a in pairs:
|
||||
lines.append(f' {{ question: {js_string(q)}, answer: {js_string(a)} }},')
|
||||
lines.append(' ]}')
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
def inject_prop(content: str, prop_block: str) -> str:
|
||||
"""Insert prop_block before the closing > of the first <BaseLayout> tag."""
|
||||
m = re.search(r'<BaseLayout\b[^>]*>', content)
|
||||
if not m:
|
||||
return content
|
||||
insert_at = m.end() - 1
|
||||
return content[:insert_at] + prop_block + content[insert_at:]
|
||||
|
||||
|
||||
def process_file(path: Path) -> str:
|
||||
content = path.read_text(encoding='utf-8')
|
||||
# Idempotent: if faq prop already exists, skip.
|
||||
if 'faq={[' in content:
|
||||
return f"SKIP (already has faq prop): {path.name}"
|
||||
pairs = extract_faq(content)
|
||||
if not pairs:
|
||||
return f"SKIP (no FAQ): {path.name}"
|
||||
|
||||
prop_block = build_faq_prop(pairs)
|
||||
new_content = inject_prop(content, prop_block)
|
||||
if new_content == content:
|
||||
return f"NO-CHANGE: {path.name}"
|
||||
|
||||
path.write_text(new_content, encoding='utf-8')
|
||||
return f"OK ({len(pairs)} pairs): {path.name}"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
for name in TARGETS:
|
||||
path = PAGES / name
|
||||
if not path.exists():
|
||||
print(f"MISSING: {name}")
|
||||
continue
|
||||
print(process_file(path))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user