小红书店铺商品抓取爬虫NodeJS



const puppeteer = require('puppeteer');
const csvWriter = require('csv-write-stream');
const fs = require('fs');
const axios = require('axios');
const path = require('path');

async function downloadImage(images, title, outputDir, type) {
    const ImgType = type === 0 ? 'main/' : 'desc/';
    for (const img of images) {
        const output = path.join(outputDir + ImgType + title.replaceAll("/", "").replaceAll("\\", "").replaceAll(" ", ""), path.basename(new URL(img).pathname));
        const { data } = await axios.get(img, { responseType: 'arraybuffer' });
        if (!fs.existsSync(outputDir + ImgType + title.replaceAll("/", "").replaceAll("\\", "").replaceAll(" ", ""))) {
            fs.mkdirSync(outputDir + ImgType + title.replaceAll("/", "").replaceAll("\\", "").replaceAll(" ", ""), { recursive: true });
        }
        fs.writeFileSync(output + '.webp', data);
        console.log(`Downloaded image to ${output}`);
    }
}


function createCSV(_data = []) {
    // 创建CSV写入流  
    try {
        const csvWriterStream = csvWriter();
        // const header = ['商品标题', '商品价格', '商品简介', '商品图片', "商品详情"];
        // 创建可写流,将其连接到你的文件  
        const writeStream = fs.createWriteStream('template.csv', { encoding: 'utf8' });
        // 将数据追加到CSV文件  
        csvWriterStream.pipe(writeStream);
        // 追加数据到CSV文件  
        // csvWriterStream.write(header);
        _data.forEach(row => {
            csvWriterStream.write(row);
        });
        console.log('写入到本地模版文件')
        // 结束写入并关闭流  
        csvWriterStream.end();

    } catch (e) {
        console.log(e)
    }
}

(async () => {
    const browser = await puppeteer.launch();
    console.log('准备获取...')
    const page = await browser.newPage();
    await page.setUserAgent("Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1");
    try {
        await page.goto('商品页面URL');
        console.log('已打开商品页面')
        // 等待页面中的元素加载完成  
        await page.waitForSelector('.description-container .goods-name', { timeout: 5000 });
        console.log('获取到商品信息..')
        // 获取并操作元素  
        const goodsTitle = await page.$eval('.description-container .goods-name', element => element.textContent);
        const goodsPrice = await page.$eval('.price-container .price', element => element.textContent);
        const goodsDesc = await page.$eval('.content-description', element => element.textContent);
        const images = await page.$$eval('.carousel-image', element => element.map(img => img.src));
        const descImages = await page.$$eval('.content-container img', element => element.map(img => img.src));
        console.log('商品标题:' + goodsTitle);
        console.log('商品价格:' + goodsPrice);
        console.log('商品描述:' + goodsDesc);
        console.log('商品主图:' + images);
        console.log('商品描述图:' + descImages);
        await downloadImage(images, goodsTitle, "./images/", 0);
        await downloadImage(descImages, goodsTitle, "./images/", 1)
        await browser.close();
        createCSV([{ goodsTitle, goodsPrice, goodsDesc, images, descImages }]);
    } catch (e) {
        console.log('访问失败,可能出现验证码:');
        console.log(e);
        await browser.close();
    }
})();


文章作者: 2winter
文章链接: https://2winter.com
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 2winter !