Files
dealplustech/crawl-hanger-clamp.js
2026-02-25 22:04:30 +07:00

192 lines
11 KiB
JavaScript

const { chromium } = require('playwright');
const fs = require('fs');
const URLs = [
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c-%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1-%e0%b9%82%e0%b8%9a%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b9%81%e0%b8%ab%e0%b8%a7/',
'https://www.dealplustech.co.th/%e0%b8%aa%e0%b8%9b%e0%b8%a3%e0%b8%b4%e0%b8%97%e0%b8%a3%e0%b8%b4%e0%b8%87%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c-sr19-adjustable-split-ring-hanger/',
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%84%e0%b8%a7%e0%b8%b4%e0%b8%aa%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c/',
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b8%9f%e0%b8%b1%e0%b8%99%e0%b8%88%e0%b8%a3%e0%b8%b0%e0%b9%80%e0%b8%82%e0%b9%89-beam-clamp/',
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b8%ab%e0%b8%a2%e0%b8%94%e0%b8%99%e0%b9%89%e0%b8%b3-adjustable-band-hanger/',
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b9%80%e0%b8%a5%e0%b9%80%e0%b8%a7%e0%b8%a5-level-clamp/',
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%88%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b9%81%e0%b8%ad%e0%b8%a5%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-j-bolt-l-bolt/',
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-u-bolt/',
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b8%9b%e0%b8%a3%e0%b8%b0%e0%b8%81%e0%b8%b1%e0%b8%9a-u-bolt-clamp/',
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81%e0%b9%81%e0%b8%9c%e0%b9%88%e0%b8%99-%e0%b8%a2%e0%b8%b9%e0%b9%81%e0%b8%9a%e0%b8%99-strap/',
'https://www.dealplustech.co.th/%e0%b8%99%e0%b9%87%e0%b8%ad%e0%b8%95-%e0%b9%81%e0%b8%ab%e0%b8%a7%e0%b8%99-%e0%b8%aa%e0%b8%81%e0%b8%a3%e0%b8%b9-hex-nut-flat-washer-hexagon-head-screw/',
'https://www.dealplustech.co.th/%e0%b8%9e%e0%b8%b8%e0%b8%81%e0%b8%95%e0%b9%88%e0%b8%b2%e0%b8%87%e0%b9%86/',
'https://www.dealplustech.co.th/%e0%b8%9e%e0%b8%b8%e0%b8%81%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81-sleeve-anchor-bolt/',
'https://www.dealplustech.co.th/%e0%b8%aa%e0%b8%95%e0%b8%b1%e0%b8%94%e0%b9%80%e0%b8%81%e0%b8%a5%e0%b8%b5%e0%b8%a2%e0%b8%a7%e0%b8%95%e0%b8%a5%e0%b8%ad%e0%b8%94-%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81-threaded-rod/'
];
const categoryNames = {
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c-%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1-%e0%b9%82%e0%b8%9a%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b9%81%e0%b8%ab%e0%b8%a7/': 'แฮงเกอร์/แคล้ม/โบลท์ | Hanger/Clamp/Bolt',
'https://www.dealplustech.co.th/%e0%b8%aa%e0%b8%9b%e0%b8%a3%e0%b8%b4%e0%b8%97%e0%b8%a3%e0%b8%b4%e0%b8%87%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c-sr19-adjustable-split-ring-hanger/': 'สปริงท์ซิงแฮงเกอร์ SR19 | SR19 Adjustable Split Ring Hanger',
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%84%e0%b8%a7%e0%b8%b4%e0%b8%aa%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c/': 'เครื่องแฮงเกอร์',
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b8%9f%e0%b8%b1%e0%b8%99%e0%b8%88%e0%b8%a3%e0%b8%b0%e0%b9%80%e0%b8%82%e0%b9%89-beam-clamp/': 'แคล้มหัวเสา Beam Clamp',
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b8%ab%e0%b8%a2%e0%b8%94%e0%b8%99%e0%b9%89%e0%b8%b3-adjustable-band-hanger/': 'แคล้มหยดน้ำ Adjustable Band Hanger',
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b9%80%e0%b8%a5%e0%b9%80%e0%b8%a7%e0%b8%a5-level-clamp/': 'แคล้มเลเวล Level Clamp',
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%88%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b9%81%e0%b8%ad%e0%b8%a5%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-j-bolt-l-bolt/': 'เจโบลท์/แอลโบลท์ | J Bolt / L Bolt',
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-u-bolt/': 'ยูโบลท์ | U Bolt',
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b8%9b%e0%b8%a3%e0%b8%b0%e0%b8%81%e0%b8%b1%e0%b8%9a-u-bolt-clamp/': 'ยูโบลท์แบบหนีบ | U Bolt Clamp',
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81%e0%b9%81%e0%b8%9c%e0%b9%88%e0%b8%99-%e0%b8%a2%e0%b8%b9%e0%b9%81%e0%b8%9a%e0%b8%99-strap/': 'ยูโบลท์เหล็กแผ่น ยูแบนด์สแตรป',
'https://www.dealplustech.co.th/%e0%b8%99%e0%b9%87%e0%b8%ad%e0%b8%95-%e0%b9%81%e0%b8%ab%e0%b8%a7%e0%b8%99-%e0%b8%aa%e0%b8%81%e0%b8%a3%e0%b8%b9-hex-nut-flat-washer-hexagon-head-screw/': 'น็อต/แหวน/สกรู | Hex Nut / Flat Washer / Hexagon Head Screw',
'https://www.dealplustech.co.th/%e0%b8%9e%e0%b8%b8%e0%b8%81%e0%b8%95%e0%b9%88%e0%b8%b2%e0%b8%87%e0%b9%86/': 'สกรูตัวเต็ม',
'https://www.dealplustech.co.th/%e0%b8%9e%e0%b8%b8%e0%b8%81%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81-sleeve-anchor-bolt/': 'สกรูเหล็ก Sleeve Anchor Bolt',
'https://www.dealplustech.co.th/%e0%b8%aa%e0%b8%95%e0%b8%b1%e0%b8%94%e0%b9%80%e0%b8%81%e0%b8%a5%e0%b8%b5%e0%b8%a2%e0%b8%a7%e0%b8%95%e0%b8%a5%e0%b8%ad%e0%b8%94-%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81-threaded-rod/': 'สแตนเลสเจาะเกลียว Threaded Rod'
};
async function crawlPage(url) {
console.log(`Crawling: ${url}`);
const browser = await chromium.launch({ headless: true });
const page = await browser.newPage();
page.on('console', msg => console.log('PAGE LOG:', msg.text()));
try {
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
const data = await page.evaluate(() => {
const result = {
category: '',
url: window.location.href,
description: '',
image: '',
products: []
};
// Get page title
const title = document.querySelector('h1') || document.querySelector('title');
result.category = title ? title.textContent.trim() : '';
// Get meta description
const metaDesc = document.querySelector('meta[name="description"]');
if (metaDesc) {
result.description = metaDesc.getAttribute('content');
}
// Get og:image
const ogImage = document.querySelector('meta[property="og:image"]');
if (ogImage) {
result.image = ogImage.getAttribute('content');
}
// Try to find main product image
const mainImage = document.querySelector('.wp-post-image') ||
document.querySelector('img.wp-post-image') ||
document.querySelector('.attachment-large') ||
document.querySelector('.product-image img') ||
document.querySelector('.featured-image img');
if (mainImage && !result.image) {
result.image = mainImage.getAttribute('src') || mainImage.getAttribute('data-src') || '';
}
// Find all product links and images
const allLinks = Array.from(document.querySelectorAll('a[href]'));
const productLinks = [];
for (const a of allLinks) {
const href = a.getAttribute('href');
const text = a.textContent.trim();
if (!href) continue;
const isInternal = href.startsWith('/') || href.includes('dealplustech.co.th');
if (!isInternal) continue;
// Skip pagination and query params
if (href.includes('?') || href.includes('#')) continue;
if (!text || text.length < 2) continue;
// Check if it's a product link (contains product or ends with number)
const isProduct = href.includes('/product/') || href.match(/-\d+\/?$/);
if (isProduct) {
const img = a.querySelector('img') || a.closest('.product, .item, .post, article')?.querySelector('img');
const imgSrc = img ? (img.getAttribute('src') || img.getAttribute('data-src') || '') : '';
// Also check for price
const priceEl = a.closest('.product, .item, .post, article')?.querySelector('.price, .amount, .product-price');
const price = priceEl ? priceEl.textContent.trim() : '';
productLinks.push({
name: text,
url: href.startsWith('http') ? href : new URL(href, window.location.origin).href,
image: imgSrc,
price: price
});
}
}
// Deduplicate products
const uniqueProducts = [];
const seenUrls = new Set();
for (const p of productLinks) {
if (!seenUrls.has(p.url)) {
seenUrls.add(p.url);
uniqueProducts.push(p);
}
}
result.products = uniqueProducts;
// Also look for products in product loops/grids
const productElements = document.querySelectorAll('.products .product, .product-list .product, .woocommerce ul.products li.product, .grid-view .item');
if (productElements.length > 0 && result.products.length === 0) {
const gridProducts = [];
for (const el of productElements) {
const link = el.querySelector('a');
const nameEl = el.querySelector('h2, h3, .title, .product-title, .name');
const img = el.querySelector('img');
const priceEl = el.querySelector('.price, .amount');
if (link && nameEl) {
gridProducts.push({
name: nameEl.textContent.trim(),
url: link.getAttribute('href') || '',
image: img ? (img.getAttribute('src') || img.getAttribute('data-src') || '') : '',
price: priceEl ? priceEl.textContent.trim() : ''
});
}
}
if (gridProducts.length > result.products.length) {
result.products = gridProducts;
}
}
console.log(` Found ${result.products.length} products`);
return result;
});
data.category = categoryNames[data.url] || data.category;
await browser.close();
return data;
} catch (error) {
console.error(` Error crawling ${url}:`, error.message);
await browser.close();
return { category: categoryNames[url] || url, url, error: error.message };
}
}
async function main() {
const results = [];
for (const url of URLs) {
const data = await crawlPage(url);
results.push(data);
// Wait between requests
await new Promise(resolve => setTimeout(resolve, 1500));
}
fs.writeFileSync('hanger-clamp-products.json', JSON.stringify(results, null, 2));
console.log('\nData saved to hanger-clamp-products.json');
console.log(`Total pages crawled: ${results.length}`);
}
main().catch(console.error);