Files
dealplustech/crawl.js
2026-02-25 22:04:30 +07:00

157 lines
6.9 KiB
JavaScript

const { chromium } = require('playwright');
const fs = require('fs');
const URLs = [
'https://www.dealplustech.co.th/pipe/',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad-ppr-thai-ppr/',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%9e%e0%b8%b5%e0%b8%9e%e0%b8%b5%e0%b8%ad%e0%b8%b2%e0%b8%a3%e0%b9%8c%e0%b8%95%e0%b8%a3%e0%b8%b2%e0%b8%8a%e0%b9%89%e0%b8%b2%e0%b8%87/',
'https://www.dealplustech.co.th/pp-r-pp-rct-poloplast/',
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%84%e0%b8%a3%e0%b8%b7%e0%b9%88%e0%b8%ad%e0%b8%87%e0%b9%80%e0%b8%8a%e0%b8%b7%e0%b9%88%e0%b8%ad%e0%b8%a1%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%9e%e0%b8%b5%e0%b8%9e%e0%b8%b5%e0%b8%ad%e0%b8%b2/',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%adhdpe/',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad-upvc/',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b9%84%e0%b8%8b%e0%b9%80%e0%b8%a5%e0%b8%ad%e0%b8%a3%e0%b9%8c/',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b9%81%e0%b8%a5%e0%b8%b0%e0%b8%82%e0%b9%89%e0%b8%ad%e0%b8%95%e0%b9%88%e0%b8%adpvc/',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%a3%e0%b8%b0%e0%b8%9a%e0%b8%b2%e0%b8%a2%e0%b8%99%e0%b9%89%e0%b8%b3-3-%e0%b8%8a%e0%b8%b1%e0%b9%89%e0%b8%99-%e0%b9%84%e0%b8%8b%e0%b9%80%e0%b8%a5%e0%b8%99%e0%b8%97/'
];
const categoryNames = {
'https://www.dealplustech.co.th/pipe/': 'ท่อ | Pipe',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad-ppr-thai-ppr/': 'ท่อ PPR | Thai PPR',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%9e%e0%b8%b5%e0%b8%9e%e0%b8%b5%e0%b8%ad%e0%b8%b2%e0%b8%a3%e0%b9%8c%e0%b8%95%e0%b8%a3%e0%b8%b2%e0%b8%8a%e0%b9%89%e0%b8%b2%e0%b8%87/': 'ท่อพีพีอาร์ (ท่อ PPR) ตราช้าง',
'https://www.dealplustech.co.th/pp-r-pp-rct-poloplast/': 'PP-R / PP-RCT POLOPLAST',
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%84%e0%b8%a3%e0%b8%b7%e0%b9%88%e0%b8%ad%e0%b8%87%e0%b9%80%e0%b8%8a%e0%b8%b7%e0%b9%88%e0%b8%ad%e0%b8%a1%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%9e%e0%b8%b5%e0%b8%9e%e0%b8%b5%e0%b8%ad%e0%b8%b2/': 'เครื่องเชื่อมท่อพีพีอาร์',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%adhdpe/': 'ท่อเอชดีพีอี (HDPE Pipe)',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad-upvc/': 'ท่อ uPVC ทนทานต่อทุกสภาวะอากาศ',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b9%84%e0%b8%8b%e0%b9%80%e0%b8%a5%e0%b8%ad%e0%b8%a3%e0%b9%8c/': 'ท่อไซเลอร์ (Syler) ท่อเหล็กเหล็กบุพีอี',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b9%81%e0%b8%a5%e0%b8%b0%e0%b8%82%e0%b9%89%e0%b8%ad%e0%b8%95%e0%b9%88%e0%b8%adpvc/': 'ท่อและข้อต่อพีวีซี (PVC pipe)',
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%a3%e0%b8%b0%e0%b8%9a%e0%b8%b2%e0%b8%a2%e0%b8%99%e0%b9%89%e0%b8%b3-3-%e0%b8%8a%e0%b8%b1%e0%b9%89%e0%b8%99-%e0%b9%84%e0%b8%8b%e0%b9%80%e0%b8%a5%e0%b8%99%e0%b8%97/': 'ท่อระบายน้ำ 3 ชั้น ไซเลนท์ | XYLENT'
};
async function crawlPage(url) {
console.log(`Crawling: ${url}`);
const browser = await chromium.launch({ headless: true });
const page = await browser.newPage();
page.on('console', msg => console.log('PAGE LOG:', msg.text()));
try {
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
const data = await page.evaluate(() => {
const result = {
category: '',
url: window.location.href,
description: '',
image: '',
subcategories: [],
products: []
};
const metaDesc = document.querySelector('meta[name="description"]');
if (metaDesc) {
result.description = metaDesc.getAttribute('content');
}
const ogImage = document.querySelector('meta[property="og:image"]');
if (ogImage) {
result.image = ogImage.getAttribute('content');
}
const allLinks = Array.from(document.querySelectorAll('a[href]'));
const productLinks = [];
const categoryLinks = [];
for (const a of allLinks) {
const href = a.getAttribute('href');
const text = a.textContent.trim();
if (!href) continue;
const isInternal = href.startsWith('/') || href.includes('dealplustech.co.th');
if (!isInternal || href.includes('?') || href.includes('#')) continue;
if (!text || text.length < 2) continue;
let cleanHref = href;
if (href.startsWith('http')) {
try {
cleanHref = new URL(href).pathname;
} catch (e) {
continue;
}
}
const urlObj = new URL(cleanHref, window.location.origin);
if (href.includes('/product/') || href.match(/-\d+\/?$/)) {
const img = a.querySelector('img');
productLinks.push({
name: text,
url: urlObj.href,
image: img ? (img.getAttribute('src') || img.getAttribute('data-src') || '') : ''
});
} else if (href.match(/^\/[a-z0-9ก-๙%.-]+$/i) && !href.includes('/product/') && !href.includes('/category/')) {
categoryLinks.push({
name: text,
url: urlObj.href,
});
}
}
const uniqueProducts = [];
const seenProductUrls = new Set();
for (const p of productLinks) {
if (!seenProductUrls.has(p.url)) {
seenProductUrls.add(p.url);
uniqueProducts.push(p);
}
}
const uniqueCategories = [];
const seenCategoryUrls = new Set();
for (const c of categoryLinks) {
if (!seenCategoryUrls.has(c.url)) {
seenCategoryUrls.add(c.url);
uniqueCategories.push(c);
}
}
result.products = uniqueProducts;
result.subcategories = uniqueCategories;
console.log(` Found ${result.products.length} products, ${result.subcategories.length} subcategories`);
console.log(` Found ${result.products.length} products, ${result.subcategories.length} subcategories`);
return result;
});
data.category = categoryNames[data.url] || data.url;
await browser.close();
return data;
} catch (error) {
console.error(` Error crawling ${url}:`, error.message);
await browser.close();
return { category: categoryNames[url] || url, url, error: error.message };
}
}
async function main() {
const results = [];
for (const url of URLs) {
const data = await crawlPage(url);
results.push(data);
await new Promise(resolve => setTimeout(resolve, 1000));
}
fs.writeFileSync('pipe-categories.json', JSON.stringify(results, null, 2));
console.log('\nData saved to pipe-categories.json');
}
main().catch(console.error);