返回
阅读【枫落迎雪、】的帖子
nodejs爬取笔趣阁小说
版块:开发交流
多说无益,直接上代码
var http = require("http");
var express = require("express");
var request = require('request');
var async = require("async");
var fs = require('fs');
var iconv = require('iconv-lite');
var cheerio = require('cheerio');
var app = express();
let k = 1;
let breakNum = 1;
down(k)
async function down(i) {
let old = 0;
request({
url: "https://www.biqugex.com/book_" + i + "/",//请求路径
method: "GET",//请求方式,默认为get
headers: {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Connection": "keep - alive",
"Host": "www.biqugex.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36",
},
encoding: null,
}, await function (err, res, body) {
//转码
let html = iconv.decode(body, "gb2312");
//小说名
let bookName = html.match(/<h2>.*<\/h2>/)[0].replace(/<(\S*?)[^>]*>.*?|<.*? \/>/g, "");
//获取小说章节元素
let textBody = html.split("</dt>")[2].split("</dl>")[0];
//拆分元素获取地址
let url = textBody.split('"');
let urlList = [];
console.log(urlList.length)
for (let i = 1; i < url.length; i += 2) {
//把地址存入数组
urlList.push("https://www.biqugex.com" + url[i])
}
console.log(urlList.length)
fs.mkdir(__dirname + "/" + bookName + "/", function (err) {
if (err) {
return console.error(err);
}
console.log("目录创建成功。");
});
async.mapLimit(urlList, 50, function (url, callback) {
request({
url: url,//请求路径
method: "GET",//请求方式,默认为get
headers: {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Connection": "keep - alive",
"Host": "www.biqugex.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36",
},
encoding: null,
}, function (err, res, body) {
if (err) {
if (err.code == "ENOTFOUND") {
breakNum++
return
}
console.log(url)
console.log(body)
console.log(err)
}
console.log(old, urlList.length - 1)
if (old == urlList.length - breakNum) {
console.log(121212)
if (k > 100) {
return
}
k++;
down(k)
}
let htmlBody = iconv.decode(body, "gb2312");
let $ = cheerio.load(htmlBody);
//过滤本章地址
let title = $("h1").text()
let testText = $("#content").text().replace(/(https?|ftp|file):\/\/[-A-Za-z0-9+&@#\/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]/g, "");
//过滤笔趣阁广告
let text = testText.replace("请记住本书首发域名:www.biqugex.com。笔趣阁手机版阅读网址:m.biqugex.com", "")
old++
fs.writeFile('./' + bookName + '/' + title + ".txt", text, function (err) {
})
callback(null, url)
})
}, function (error, results) {
console.log("result :");
console.log(results);
})
})
}
app.listen(3000);
一步一个坑的走过来的,虽然不到一百行代码,写起来可真是要命
(adsbygoogle = window.adsbygoogle || []).push({});
- 0
- 0收藏帖子
- 0关注帖子
- 0
本贴有需要购买的内容
共 15 条评论
排序:时间 降序
请文明交流~
14楼
顶一下
小孙带女友吃饭,吃完后,一算帐,78元,小孙打开钱包翻找着,女友不耐烦了,摸出一张一百的给了老板,老板跑到柜台找钱,女友对小强说:别找了。这话说得并不大声,却让老板听见了。老板忙说:谢谢啦。 +1枚云币
00
13楼
厉害
有个人在自家后花园的树上发现了一只大猩猩,赶紧给动物园打电话。 动物园派人来了,这人带条特凶悍的狗,还有一根棍子,外加一个特大型的手铐,一把手枪。 来到了后花园,主人问这个人怎么办,此人说:“我先拿着棍子上去,把它揍下来,接着这只训练有素的狗就会扑上去一顿狂咬,趁大猩猩用双手保护自己的时候,你就用手铐把它拷起来……” 主人:“那手枪……” 那人说:“万一我不小心掉下来……你赶紧把狗给我打死…… +4枚云币
00
最后一页