Initial commit: New industrial design with green theme
This commit is contained in:
132
crawl-hanger-clamp-v2.js
Normal file
132
crawl-hanger-clamp-v2.js
Normal file
@@ -0,0 +1,132 @@
|
||||
const { chromium } = require('playwright');
|
||||
const fs = require('fs');
|
||||
|
||||
const URLs = [
|
||||
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c-%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1-%e0%b9%82%e0%b8%9a%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b9%81%e0%b8%ab%e0%b8%a7/',
|
||||
'https://www.dealplustech.co.th/%e0%b8%aa%e0%b8%9b%e0%b8%a3%e0%b8%b4%e0%b8%97%e0%b8%a3%e0%b8%b4%e0%b8%87%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c-sr19-adjustable-split-ring-hanger/',
|
||||
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%84%e0%b8%a7%e0%b8%b4%e0%b8%aa%e0%b9%81%e0%b8%ae%e0%b8%87%e0%b9%80%e0%b8%81%e0%b8%ad%e0%b8%a3%e0%b9%8c/',
|
||||
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b8%9f%e0%b8%b1%e0%b8%99%e0%b8%88%e0%b8%a3%e0%b8%b0%e0%b9%80%e0%b8%82%e0%b9%89-beam-clamp/',
|
||||
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b8%ab%e0%b8%a2%e0%b8%94%e0%b8%99%e0%b9%89%e0%b8%b3-adjustable-band-hanger/',
|
||||
'https://www.dealplustech.co.th/%e0%b9%81%e0%b8%84%e0%b8%a5%e0%b9%89%e0%b8%a1%e0%b9%80%e0%b8%a5%e0%b9%80%e0%b8%a7%e0%b8%a5-level-clamp/',
|
||||
'https://www.dealplustech.co.th/%e0%b9%80%e0%b8%88%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b9%81%e0%b8%ad%e0%b8%a5%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-j-bolt-l-bolt/',
|
||||
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-u-bolt/',
|
||||
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c-%e0%b8%9b%e0%b8%a3%e0%b8%b0%e0%b8%81%e0%b8%b1%e0%b8%9a-u-bolt-clamp/',
|
||||
'https://www.dealplustech.co.th/%e0%b8%a2%e0%b8%b9%e0%b9%82%e0%b8%9b%e0%b8%a5%e0%b8%97%e0%b9%8c%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81%e0%b9%81%e0%b8%9c%e0%b9%88%e0%b8%99-%e0%b8%a2%e0%b8%b9%e0%b9%81%e0%b8%9a%e0%b8%99-strap/',
|
||||
'https://www.dealplustech.co.th/%e0%b8%99%e0%b9%87%e0%b8%ad%e0%b8%95-%e0%b9%81%e0%b8%ab%e0%b8%a7%e0%b8%99-%e0%b8%aa%e0%b8%81%e0%b8%a3%e0%b8%b0-hex-nut-flat-washer-hexagon-head-screw/',
|
||||
'https://www.dealplustech.co.th/%e0%b8%9e%e0%b8%b8%e0%b8%81%e0%b8%95%e0%b9%88%e0%b8%b2%e0%b8%87%e0%b9%86/',
|
||||
'https://www.dealplustech.co.th/%e0%b8%9e%e0%b8%b8%e0%b8%81%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81-sleeve-anchor-bolt/',
|
||||
'https://www.dealplustech.co.th/%e0%b8%aa%e0%b8%95%e0%b8%b1%e0%b8%94%e0%b9%80%e0%b8%81%e0%b8%a5%e0%b8%b5%e0%b8%a2%e0%b8%a7%e0%b8%95%e0%b8%a5%e0%b8%ad%e0%b8%94-%e0%b9%80%e0%b8%ab%e0%b8%a5%e0%b9%87%e0%b8%81-threaded-rod/'
|
||||
];
|
||||
|
||||
async function crawlPage(url) {
|
||||
console.log(`Crawling: ${url}`);
|
||||
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const page = await browser.newPage();
|
||||
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
||||
|
||||
const data = await page.evaluate(() => {
|
||||
const result = {
|
||||
category: '',
|
||||
url: window.location.href,
|
||||
description: '',
|
||||
image: '',
|
||||
products: []
|
||||
};
|
||||
|
||||
const h1 = document.querySelector('h1');
|
||||
result.category = h1 ? h1.textContent.trim() : '';
|
||||
|
||||
const metaDesc = document.querySelector('meta[name="description"]');
|
||||
if (metaDesc) {
|
||||
result.description = metaDesc.getAttribute('content');
|
||||
}
|
||||
|
||||
const ogImage = document.querySelector('meta[property="og:image"]');
|
||||
if (ogImage) {
|
||||
result.image = ogImage.getAttribute('content');
|
||||
}
|
||||
|
||||
if (!result.image) {
|
||||
const featuredImg = document.querySelector('.wp-post-image, .attachment-large, .featured-image img, article img');
|
||||
if (featuredImg) {
|
||||
result.image = featuredImg.getAttribute('src') || '';
|
||||
}
|
||||
}
|
||||
|
||||
if (!result.image) {
|
||||
const contentImages = document.querySelectorAll('.entry-content img, .post-content img, main img');
|
||||
for (const img of contentImages) {
|
||||
const src = img.getAttribute('src') || '';
|
||||
if (src && src.includes('wp-content/uploads') && !src.includes('logo') && !src.includes('QR')) {
|
||||
result.image = src;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const tables = document.querySelectorAll('table');
|
||||
tables.forEach((table) => {
|
||||
const rows = table.querySelectorAll('tr');
|
||||
if (rows.length < 2) return;
|
||||
|
||||
const headers = [];
|
||||
const headerCells = table.querySelectorAll('th');
|
||||
headerCells.forEach(th => {
|
||||
headers.push(th.textContent.trim());
|
||||
});
|
||||
|
||||
const products = [];
|
||||
let hasData = false;
|
||||
rows.forEach((row, rowIdx) => {
|
||||
const cells = row.querySelectorAll('td, th');
|
||||
const rowData = {};
|
||||
let cellIndex = 0;
|
||||
|
||||
cells.forEach((cell) => {
|
||||
const header = headers[cellIndex] || `col${cellIndex}`;
|
||||
rowData[header] = cell.textContent.trim();
|
||||
cellIndex++;
|
||||
if (cell.textContent.trim()) hasData = true;
|
||||
});
|
||||
|
||||
if (Object.keys(rowData).length > 0) {
|
||||
products.push(rowData);
|
||||
}
|
||||
});
|
||||
|
||||
if (hasData && products.length > 0) {
|
||||
result.products.push(...products);
|
||||
}
|
||||
});
|
||||
|
||||
return result;
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
return data;
|
||||
|
||||
} catch (error) {
|
||||
console.error(` Error crawling ${url}:`, error.message);
|
||||
await browser.close();
|
||||
return { category: url, url, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const results = [];
|
||||
|
||||
for (const url of URLs) {
|
||||
const data = await crawlPage(url);
|
||||
results.push(data);
|
||||
await new Promise(resolve => setTimeout(resolve, 1500));
|
||||
}
|
||||
|
||||
fs.writeFileSync('hanger-clamp-products.json', JSON.stringify(results, null, 2));
|
||||
console.log('\nData saved to hanger-clamp-products.json');
|
||||
console.log(`Total pages crawled: ${results.length}`);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Reference in New Issue
Block a user