1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
| const puppeteer = require('puppeteer'); const csvWriter = require('csv-write-stream'); const fs = require('fs'); const axios = require('axios'); const path = require('path');
async function downloadImage(images, title, outputDir, type) { const ImgType = type === 0 ? 'main/' : 'desc/'; for (const img of images) { const output = path.join(outputDir + ImgType + title.replaceAll("/", "").replaceAll("\\", "").replaceAll(" ", ""), path.basename(new URL(img).pathname)); const { data } = await axios.get(img, { responseType: 'arraybuffer' }); if (!fs.existsSync(outputDir + ImgType + title.replaceAll("/", "").replaceAll("\\", "").replaceAll(" ", ""))) { fs.mkdirSync(outputDir + ImgType + title.replaceAll("/", "").replaceAll("\\", "").replaceAll(" ", ""), { recursive: true }); } fs.writeFileSync(output + '.webp', data); console.log(`Downloaded image to ${output}`); } }
function createCSV(_data = []) { // 创建CSV写入流 try { const csvWriterStream = csvWriter(); // const header = ['商品标题', '商品价格', '商品简介', '商品图片', "商品详情"]; // 创建可写流,将其连接到你的文件 const writeStream = fs.createWriteStream('template.csv', { encoding: 'utf8' }); // 将数据追加到CSV文件 csvWriterStream.pipe(writeStream); // 追加数据到CSV文件 // csvWriterStream.write(header); _data.forEach(row => { csvWriterStream.write(row); }); console.log('写入到本地模版文件') // 结束写入并关闭流 csvWriterStream.end();
} catch (e) { console.log(e) } }
(async () => { const browser = await puppeteer.launch(); console.log('准备获取...') const page = await browser.newPage(); await page.setUserAgent("Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1"); try { await page.goto('商品页面URL'); console.log('已打开商品页面') // 等待页面中的元素加载完成 await page.waitForSelector('.description-container .goods-name', { timeout: 5000 }); console.log('获取到商品信息..') // 获取并操作元素 const goodsTitle = await page.$eval('.description-container .goods-name', element => element.textContent); const goodsPrice = await page.$eval('.price-container .price', element => element.textContent); const goodsDesc = await page.$eval('.content-description', element => element.textContent); const images = await page.$$eval('.carousel-image', element => element.map(img => img.src)); const descImages = await page.$$eval('.content-container img', element => element.map(img => img.src)); console.log('商品标题:' + goodsTitle); console.log('商品价格:' + goodsPrice); console.log('商品描述:' + goodsDesc); console.log('商品主图:' + images); console.log('商品描述图:' + descImages); await downloadImage(images, goodsTitle, "./images/", 0); await downloadImage(descImages, goodsTitle, "./images/", 1) await browser.close(); createCSV([{ goodsTitle, goodsPrice, goodsDesc, images, descImages }]); } catch (e) { console.log('访问失败,可能出现验证码:'); console.log(e); await browser.close(); } })();
|