157 lines
6.9 KiB
JavaScript
157 lines
6.9 KiB
JavaScript
const { chromium } = require('playwright');
|
|
const fs = require('fs');
|
|
|
|
const URLs = [
|
|
'https://www.dealplustech.co.th/pipe/',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad-ppr-thai-ppr/',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%9e%e0%b8%b5%e0%b8%9e%e0%b8%b5%e0%b8%ad%e0%b8%b2%e0%b8%a3%e0%b9%8c%e0%b8%95%e0%b8%a3%e0%b8%b2%e0%b8%8a%e0%b9%89%e0%b8%b2%e0%b8%87/',
|
|
'https://www.dealplustech.co.th/pp-r-pp-rct-poloplast/',
|
|
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%84%e0%b8%a3%e0%b8%b7%e0%b9%88%e0%b8%ad%e0%b8%87%e0%b9%80%e0%b8%8a%e0%b8%b7%e0%b9%88%e0%b8%ad%e0%b8%a1%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%9e%e0%b8%b5%e0%b8%9e%e0%b8%b5%e0%b8%ad%e0%b8%b2/',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%adhdpe/',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad-upvc/',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b9%84%e0%b8%8b%e0%b9%80%e0%b8%a5%e0%b8%ad%e0%b8%a3%e0%b9%8c/',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b9%81%e0%b8%a5%e0%b8%b0%e0%b8%82%e0%b9%89%e0%b8%ad%e0%b8%95%e0%b9%88%e0%b8%adpvc/',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%a3%e0%b8%b0%e0%b8%9a%e0%b8%b2%e0%b8%a2%e0%b8%99%e0%b9%89%e0%b8%b3-3-%e0%b8%8a%e0%b8%b1%e0%b9%89%e0%b8%99-%e0%b9%84%e0%b8%8b%e0%b9%80%e0%b8%a5%e0%b8%99%e0%b8%97/'
|
|
];
|
|
|
|
const categoryNames = {
|
|
'https://www.dealplustech.co.th/pipe/': 'ท่อ | Pipe',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad-ppr-thai-ppr/': 'ท่อ PPR | Thai PPR',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%9e%e0%b8%b5%e0%b8%9e%e0%b8%b5%e0%b8%ad%e0%b8%b2%e0%b8%a3%e0%b9%8c%e0%b8%95%e0%b8%a3%e0%b8%b2%e0%b8%8a%e0%b9%89%e0%b8%b2%e0%b8%87/': 'ท่อพีพีอาร์ (ท่อ PPR) ตราช้าง',
|
|
'https://www.dealplustech.co.th/pp-r-pp-rct-poloplast/': 'PP-R / PP-RCT POLOPLAST',
|
|
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%84%e0%b8%a3%e0%b8%b7%e0%b9%88%e0%b8%ad%e0%b8%87%e0%b9%80%e0%b8%8a%e0%b8%b7%e0%b9%88%e0%b8%ad%e0%b8%a1%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%9e%e0%b8%b5%e0%b8%9e%e0%b8%b5%e0%b8%ad%e0%b8%b2/': 'เครื่องเชื่อมท่อพีพีอาร์',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%adhdpe/': 'ท่อเอชดีพีอี (HDPE Pipe)',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad-upvc/': 'ท่อ uPVC ทนทานต่อทุกสภาวะอากาศ',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b9%84%e0%b8%8b%e0%b9%80%e0%b8%a5%e0%b8%ad%e0%b8%a3%e0%b9%8c/': 'ท่อไซเลอร์ (Syler) ท่อเหล็กเหล็กบุพีอี',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b9%81%e0%b8%a5%e0%b8%b0%e0%b8%82%e0%b9%89%e0%b8%ad%e0%b8%95%e0%b9%88%e0%b8%adpvc/': 'ท่อและข้อต่อพีวีซี (PVC pipe)',
|
|
'https://www.dealplustech.co.th/%e0%b8%97%e0%b9%88%e0%b8%ad%e0%b8%a3%e0%b8%b0%e0%b8%9a%e0%b8%b2%e0%b8%a2%e0%b8%99%e0%b9%89%e0%b8%b3-3-%e0%b8%8a%e0%b8%b1%e0%b9%89%e0%b8%99-%e0%b9%84%e0%b8%8b%e0%b9%80%e0%b8%a5%e0%b8%99%e0%b8%97/': 'ท่อระบายน้ำ 3 ชั้น ไซเลนท์ | XYLENT'
|
|
};
|
|
|
|
async function crawlPage(url) {
|
|
console.log(`Crawling: ${url}`);
|
|
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage();
|
|
|
|
page.on('console', msg => console.log('PAGE LOG:', msg.text()));
|
|
|
|
try {
|
|
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
|
|
|
const data = await page.evaluate(() => {
|
|
const result = {
|
|
category: '',
|
|
url: window.location.href,
|
|
description: '',
|
|
image: '',
|
|
subcategories: [],
|
|
products: []
|
|
};
|
|
|
|
const metaDesc = document.querySelector('meta[name="description"]');
|
|
if (metaDesc) {
|
|
result.description = metaDesc.getAttribute('content');
|
|
}
|
|
|
|
const ogImage = document.querySelector('meta[property="og:image"]');
|
|
if (ogImage) {
|
|
result.image = ogImage.getAttribute('content');
|
|
}
|
|
|
|
const allLinks = Array.from(document.querySelectorAll('a[href]'));
|
|
|
|
const productLinks = [];
|
|
const categoryLinks = [];
|
|
|
|
for (const a of allLinks) {
|
|
const href = a.getAttribute('href');
|
|
const text = a.textContent.trim();
|
|
|
|
if (!href) continue;
|
|
|
|
const isInternal = href.startsWith('/') || href.includes('dealplustech.co.th');
|
|
if (!isInternal || href.includes('?') || href.includes('#')) continue;
|
|
if (!text || text.length < 2) continue;
|
|
|
|
let cleanHref = href;
|
|
if (href.startsWith('http')) {
|
|
try {
|
|
cleanHref = new URL(href).pathname;
|
|
} catch (e) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
const urlObj = new URL(cleanHref, window.location.origin);
|
|
|
|
if (href.includes('/product/') || href.match(/-\d+\/?$/)) {
|
|
const img = a.querySelector('img');
|
|
productLinks.push({
|
|
name: text,
|
|
url: urlObj.href,
|
|
image: img ? (img.getAttribute('src') || img.getAttribute('data-src') || '') : ''
|
|
});
|
|
} else if (href.match(/^\/[a-z0-9ก-๙%.-]+$/i) && !href.includes('/product/') && !href.includes('/category/')) {
|
|
categoryLinks.push({
|
|
name: text,
|
|
url: urlObj.href,
|
|
});
|
|
}
|
|
}
|
|
|
|
const uniqueProducts = [];
|
|
const seenProductUrls = new Set();
|
|
for (const p of productLinks) {
|
|
if (!seenProductUrls.has(p.url)) {
|
|
seenProductUrls.add(p.url);
|
|
uniqueProducts.push(p);
|
|
}
|
|
}
|
|
|
|
const uniqueCategories = [];
|
|
const seenCategoryUrls = new Set();
|
|
for (const c of categoryLinks) {
|
|
if (!seenCategoryUrls.has(c.url)) {
|
|
seenCategoryUrls.add(c.url);
|
|
uniqueCategories.push(c);
|
|
}
|
|
}
|
|
|
|
result.products = uniqueProducts;
|
|
result.subcategories = uniqueCategories;
|
|
|
|
console.log(` Found ${result.products.length} products, ${result.subcategories.length} subcategories`);
|
|
|
|
console.log(` Found ${result.products.length} products, ${result.subcategories.length} subcategories`);
|
|
|
|
return result;
|
|
});
|
|
|
|
data.category = categoryNames[data.url] || data.url;
|
|
|
|
await browser.close();
|
|
return data;
|
|
|
|
} catch (error) {
|
|
console.error(` Error crawling ${url}:`, error.message);
|
|
await browser.close();
|
|
return { category: categoryNames[url] || url, url, error: error.message };
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
const results = [];
|
|
|
|
for (const url of URLs) {
|
|
const data = await crawlPage(url);
|
|
results.push(data);
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
}
|
|
|
|
fs.writeFileSync('pipe-categories.json', JSON.stringify(results, null, 2));
|
|
console.log('\nData saved to pipe-categories.json');
|
|
}
|
|
|
|
main().catch(console.error);
|