小红书店铺商品抓取爬虫NodeJS

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

const puppeteer = require('puppeteer');
const csvWriter = require('csv-write-stream');
const fs = require('fs');
const axios = require('axios');
const path = require('path');

async function downloadImage(images, title, outputDir, type) {
const ImgType = type === 0 ? 'main/' : 'desc/';
for (const img of images) {
const output = path.join(outputDir + ImgType + title.replaceAll("/", "").replaceAll("\\", "").replaceAll(" ", ""), path.basename(new URL(img).pathname));
const { data } = await axios.get(img, { responseType: 'arraybuffer' });
if (!fs.existsSync(outputDir + ImgType + title.replaceAll("/", "").replaceAll("\\", "").replaceAll(" ", ""))) {
fs.mkdirSync(outputDir + ImgType + title.replaceAll("/", "").replaceAll("\\", "").replaceAll(" ", ""), { recursive: true });
}
fs.writeFileSync(output + '.webp', data);
console.log(`Downloaded image to ${output}`);
}
}


function createCSV(_data = []) {
// 创建CSV写入流
try {
const csvWriterStream = csvWriter();
// const header = ['商品标题', '商品价格', '商品简介', '商品图片', "商品详情"];
// 创建可写流,将其连接到你的文件
const writeStream = fs.createWriteStream('template.csv', { encoding: 'utf8' });
// 将数据追加到CSV文件
csvWriterStream.pipe(writeStream);
// 追加数据到CSV文件
// csvWriterStream.write(header);
_data.forEach(row => {
csvWriterStream.write(row);
});
console.log('写入到本地模版文件')
// 结束写入并关闭流
csvWriterStream.end();

} catch (e) {
console.log(e)
}
}

(async () => {
const browser = await puppeteer.launch();
console.log('准备获取...')
const page = await browser.newPage();
await page.setUserAgent("Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1");
try {
await page.goto('商品页面URL');
console.log('已打开商品页面')
// 等待页面中的元素加载完成
await page.waitForSelector('.description-container .goods-name', { timeout: 5000 });
console.log('获取到商品信息..')
// 获取并操作元素
const goodsTitle = await page.$eval('.description-container .goods-name', element => element.textContent);
const goodsPrice = await page.$eval('.price-container .price', element => element.textContent);
const goodsDesc = await page.$eval('.content-description', element => element.textContent);
const images = await page.$$eval('.carousel-image', element => element.map(img => img.src));
const descImages = await page.$$eval('.content-container img', element => element.map(img => img.src));
console.log('商品标题:' + goodsTitle);
console.log('商品价格:' + goodsPrice);
console.log('商品描述:' + goodsDesc);
console.log('商品主图:' + images);
console.log('商品描述图:' + descImages);
await downloadImage(images, goodsTitle, "./images/", 0);
await downloadImage(descImages, goodsTitle, "./images/", 1)
await browser.close();
createCSV([{ goodsTitle, goodsPrice, goodsDesc, images, descImages }]);
} catch (e) {
console.log('访问失败,可能出现验证码:');
console.log(e);
await browser.close();
}
})();