192 lines
11 KiB
JavaScript
192 lines
11 KiB
JavaScript
const { chromium } = require('playwright');
|
|
const fs = require('fs');
|
|
|
|
const URLs = [
|
|
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c-%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1-%e0%b9%82%e0%b8%9a%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b9%81%e0%b8%ab%e0%b8%a7/',
|
|
'https://www.dealplustech.co.th/%e0%b8%aa%e0%b8%9b%e0%b8%a3%e0%b8%b4%e0%b8%97%e0%b8%a3%e0%b8%b4%e0%b8%87%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c-sr19-adjustable-split-ring-hanger/',
|
|
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%84%e0%b8%a7%e0%b8%b4%e0%b8%aa%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c/',
|
|
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b8%9f%e0%b8%b1%e0%b8%99%e0%b8%88%e0%b8%a3%e0%b8%b0%e0%b9%80%e0%b8%82%e0%b9%89-beam-clamp/',
|
|
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b8%ab%e0%b8%a2%e0%b8%94%e0%b8%99%e0%b9%89%e0%b8%b3-adjustable-band-hanger/',
|
|
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b9%80%e0%b8%a5%e0%b9%80%e0%b8%a7%e0%b8%a5-level-clamp/',
|
|
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%88%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b9%81%e0%b8%ad%e0%b8%a5%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-j-bolt-l-bolt/',
|
|
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-u-bolt/',
|
|
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b8%9b%e0%b8%a3%e0%b8%b0%e0%b8%81%e0%b8%b1%e0%b8%9a-u-bolt-clamp/',
|
|
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81%e0%b9%81%e0%b8%9c%e0%b9%88%e0%b8%99-%e0%b8%a2%e0%b8%b9%e0%b9%81%e0%b8%9a%e0%b8%99-strap/',
|
|
'https://www.dealplustech.co.th/%e0%b8%99%e0%b9%87%e0%b8%ad%e0%b8%95-%e0%b9%81%e0%b8%ab%e0%b8%a7%e0%b8%99-%e0%b8%aa%e0%b8%81%e0%b8%a3%e0%b8%b9-hex-nut-flat-washer-hexagon-head-screw/',
|
|
'https://www.dealplustech.co.th/%e0%b8%9e%e0%b8%b8%e0%b8%81%e0%b8%95%e0%b9%88%e0%b8%b2%e0%b8%87%e0%b9%86/',
|
|
'https://www.dealplustech.co.th/%e0%b8%9e%e0%b8%b8%e0%b8%81%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81-sleeve-anchor-bolt/',
|
|
'https://www.dealplustech.co.th/%e0%b8%aa%e0%b8%95%e0%b8%b1%e0%b8%94%e0%b9%80%e0%b8%81%e0%b8%a5%e0%b8%b5%e0%b8%a2%e0%b8%a7%e0%b8%95%e0%b8%a5%e0%b8%ad%e0%b8%94-%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81-threaded-rod/'
|
|
];
|
|
|
|
const categoryNames = {
|
|
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c-%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1-%e0%b9%82%e0%b8%9a%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b9%81%e0%b8%ab%e0%b8%a7/': 'แฮงเกอร์/แคล้ม/โบลท์ | Hanger/Clamp/Bolt',
|
|
'https://www.dealplustech.co.th/%e0%b8%aa%e0%b8%9b%e0%b8%a3%e0%b8%b4%e0%b8%97%e0%b8%a3%e0%b8%b4%e0%b8%87%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c-sr19-adjustable-split-ring-hanger/': 'สปริงท์ซิงแฮงเกอร์ SR19 | SR19 Adjustable Split Ring Hanger',
|
|
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%84%e0%b8%a7%e0%b8%b4%e0%b8%aa%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c/': 'เครื่องแฮงเกอร์',
|
|
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b8%9f%e0%b8%b1%e0%b8%99%e0%b8%88%e0%b8%a3%e0%b8%b0%e0%b9%80%e0%b8%82%e0%b9%89-beam-clamp/': 'แคล้มหัวเสา Beam Clamp',
|
|
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b8%ab%e0%b8%a2%e0%b8%94%e0%b8%99%e0%b9%89%e0%b8%b3-adjustable-band-hanger/': 'แคล้มหยดน้ำ Adjustable Band Hanger',
|
|
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b9%80%e0%b8%a5%e0%b9%80%e0%b8%a7%e0%b8%a5-level-clamp/': 'แคล้มเลเวล Level Clamp',
|
|
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%88%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b9%81%e0%b8%ad%e0%b8%a5%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-j-bolt-l-bolt/': 'เจโบลท์/แอลโบลท์ | J Bolt / L Bolt',
|
|
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-u-bolt/': 'ยูโบลท์ | U Bolt',
|
|
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b8%9b%e0%b8%a3%e0%b8%b0%e0%b8%81%e0%b8%b1%e0%b8%9a-u-bolt-clamp/': 'ยูโบลท์แบบหนีบ | U Bolt Clamp',
|
|
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81%e0%b9%81%e0%b8%9c%e0%b9%88%e0%b8%99-%e0%b8%a2%e0%b8%b9%e0%b9%81%e0%b8%9a%e0%b8%99-strap/': 'ยูโบลท์เหล็กแผ่น ยูแบนด์สแตรป',
|
|
'https://www.dealplustech.co.th/%e0%b8%99%e0%b9%87%e0%b8%ad%e0%b8%95-%e0%b9%81%e0%b8%ab%e0%b8%a7%e0%b8%99-%e0%b8%aa%e0%b8%81%e0%b8%a3%e0%b8%b9-hex-nut-flat-washer-hexagon-head-screw/': 'น็อต/แหวน/สกรู | Hex Nut / Flat Washer / Hexagon Head Screw',
|
|
'https://www.dealplustech.co.th/%e0%b8%9e%e0%b8%b8%e0%b8%81%e0%b8%95%e0%b9%88%e0%b8%b2%e0%b8%87%e0%b9%86/': 'สกรูตัวเต็ม',
|
|
'https://www.dealplustech.co.th/%e0%b8%9e%e0%b8%b8%e0%b8%81%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81-sleeve-anchor-bolt/': 'สกรูเหล็ก Sleeve Anchor Bolt',
|
|
'https://www.dealplustech.co.th/%e0%b8%aa%e0%b8%95%e0%b8%b1%e0%b8%94%e0%b9%80%e0%b8%81%e0%b8%a5%e0%b8%b5%e0%b8%a2%e0%b8%a7%e0%b8%95%e0%b8%a5%e0%b8%ad%e0%b8%94-%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81-threaded-rod/': 'สแตนเลสเจาะเกลียว Threaded Rod'
|
|
};
|
|
|
|
async function crawlPage(url) {
|
|
console.log(`Crawling: ${url}`);
|
|
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage();
|
|
|
|
page.on('console', msg => console.log('PAGE LOG:', msg.text()));
|
|
|
|
try {
|
|
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
|
|
|
const data = await page.evaluate(() => {
|
|
const result = {
|
|
category: '',
|
|
url: window.location.href,
|
|
description: '',
|
|
image: '',
|
|
products: []
|
|
};
|
|
|
|
// Get page title
|
|
const title = document.querySelector('h1') || document.querySelector('title');
|
|
result.category = title ? title.textContent.trim() : '';
|
|
|
|
// Get meta description
|
|
const metaDesc = document.querySelector('meta[name="description"]');
|
|
if (metaDesc) {
|
|
result.description = metaDesc.getAttribute('content');
|
|
}
|
|
|
|
// Get og:image
|
|
const ogImage = document.querySelector('meta[property="og:image"]');
|
|
if (ogImage) {
|
|
result.image = ogImage.getAttribute('content');
|
|
}
|
|
|
|
// Try to find main product image
|
|
const mainImage = document.querySelector('.wp-post-image') ||
|
|
document.querySelector('img.wp-post-image') ||
|
|
document.querySelector('.attachment-large') ||
|
|
document.querySelector('.product-image img') ||
|
|
document.querySelector('.featured-image img');
|
|
if (mainImage && !result.image) {
|
|
result.image = mainImage.getAttribute('src') || mainImage.getAttribute('data-src') || '';
|
|
}
|
|
|
|
// Find all product links and images
|
|
const allLinks = Array.from(document.querySelectorAll('a[href]'));
|
|
|
|
const productLinks = [];
|
|
|
|
for (const a of allLinks) {
|
|
const href = a.getAttribute('href');
|
|
const text = a.textContent.trim();
|
|
|
|
if (!href) continue;
|
|
|
|
const isInternal = href.startsWith('/') || href.includes('dealplustech.co.th');
|
|
if (!isInternal) continue;
|
|
|
|
// Skip pagination and query params
|
|
if (href.includes('?') || href.includes('#')) continue;
|
|
if (!text || text.length < 2) continue;
|
|
|
|
// Check if it's a product link (contains product or ends with number)
|
|
const isProduct = href.includes('/product/') || href.match(/-\d+\/?$/);
|
|
|
|
if (isProduct) {
|
|
const img = a.querySelector('img') || a.closest('.product, .item, .post, article')?.querySelector('img');
|
|
const imgSrc = img ? (img.getAttribute('src') || img.getAttribute('data-src') || '') : '';
|
|
|
|
// Also check for price
|
|
const priceEl = a.closest('.product, .item, .post, article')?.querySelector('.price, .amount, .product-price');
|
|
const price = priceEl ? priceEl.textContent.trim() : '';
|
|
|
|
productLinks.push({
|
|
name: text,
|
|
url: href.startsWith('http') ? href : new URL(href, window.location.origin).href,
|
|
image: imgSrc,
|
|
price: price
|
|
});
|
|
}
|
|
}
|
|
|
|
// Deduplicate products
|
|
const uniqueProducts = [];
|
|
const seenUrls = new Set();
|
|
for (const p of productLinks) {
|
|
if (!seenUrls.has(p.url)) {
|
|
seenUrls.add(p.url);
|
|
uniqueProducts.push(p);
|
|
}
|
|
}
|
|
|
|
result.products = uniqueProducts;
|
|
|
|
// Also look for products in product loops/grids
|
|
const productElements = document.querySelectorAll('.products .product, .product-list .product, .woocommerce ul.products li.product, .grid-view .item');
|
|
if (productElements.length > 0 && result.products.length === 0) {
|
|
const gridProducts = [];
|
|
for (const el of productElements) {
|
|
const link = el.querySelector('a');
|
|
const nameEl = el.querySelector('h2, h3, .title, .product-title, .name');
|
|
const img = el.querySelector('img');
|
|
const priceEl = el.querySelector('.price, .amount');
|
|
|
|
if (link && nameEl) {
|
|
gridProducts.push({
|
|
name: nameEl.textContent.trim(),
|
|
url: link.getAttribute('href') || '',
|
|
image: img ? (img.getAttribute('src') || img.getAttribute('data-src') || '') : '',
|
|
price: priceEl ? priceEl.textContent.trim() : ''
|
|
});
|
|
}
|
|
}
|
|
if (gridProducts.length > result.products.length) {
|
|
result.products = gridProducts;
|
|
}
|
|
}
|
|
|
|
console.log(` Found ${result.products.length} products`);
|
|
|
|
return result;
|
|
});
|
|
|
|
data.category = categoryNames[data.url] || data.category;
|
|
|
|
await browser.close();
|
|
return data;
|
|
|
|
} catch (error) {
|
|
console.error(` Error crawling ${url}:`, error.message);
|
|
await browser.close();
|
|
return { category: categoryNames[url] || url, url, error: error.message };
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
const results = [];
|
|
|
|
for (const url of URLs) {
|
|
const data = await crawlPage(url);
|
|
results.push(data);
|
|
|
|
// Wait between requests
|
|
await new Promise(resolve => setTimeout(resolve, 1500));
|
|
}
|
|
|
|
fs.writeFileSync('hanger-clamp-products.json', JSON.stringify(results, null, 2));
|
|
console.log('\nData saved to hanger-clamp-products.json');
|
|
console.log(`Total pages crawled: ${results.length}`);
|
|
}
|
|
|
|
main().catch(console.error);
|