分享笔趣阁小说网Python爬虫技术
笔趣阁小说网Python爬虫分享是一种技术,它可以帮助用户从笔趣阁小说网上获取大量的小说数据。这种爬虫通常使用Python编程语言编写,因为它具有简单易学、功能强大的特点。在编写爬虫时,需要注意遵守网站的robots.txt协议,避免对网站造成不必要的影响。此外,还需要注意保护用户的隐私和版权问题。总之,笔趣阁小说网Python爬虫分享是一种非常有用的技术,它可以帮助用户快速获取所需的信息。如果您对此感兴趣,可以学习相关的编程知识,并尝试编写自己的爬虫程序。
#[url=https://www.biquge.info/wanjiexiaoshuo/]https://www.biquge.info/wanjiexiaoshuo/[/url]笔趣阁小说全本爬虫importtimeimportrequestsimportosimportrandomfromlxmlimportetreeimportwebbrowserheader={"User-Agent":"Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/89.0.4389.128Safari/537.36Edg/89.0.774.77"}noName=['#','/','',':','*','?','"','<','>','|']#/:*?"<>|filePath='./保存小说'defstrZ(_str):#将特殊字符转换为空格ret=''for_in_str:if_innoName:ret+=""else:ret+=_returnretdefmain():webbrowser.open('https://www.biquwx.la/')ifnotos.path.exists(filePath):os.mkdir(filePath)print('1.爬取指定小说')print('2.爬取整个站点')ifinput('使用哪种方式爬取小说?')=='1':appintDown()else:allDown()input("按下任意键退出")defappintDown():#爬取指定小说前提是网页没错page_url=input('输入要爬取的小说网站(例如[url=https://www.biquwx.la/10_10240/]https://www.biquwx.la/10_10240/[/url]):')page=requests.get(url=page_url,headers=header)ifpage.status_code==200:#响应就爬取page.encoding='utf-8'page_tree=etree.HTML(page.text)page_title=page_tree.xpath('//div[@id="info"]/h1/text()')[0]_filePath=filePath+'/'+page_titleifnotos.path.exists(_filePath):os.mkdir(_filePath)page_dl_list=page_tree.xpath('//div[@class="box_con"]/div[@id="list"]/dl/dd')for_inpage_dl_list:_page_url=page_url+_.xpath('./a/@href')[0]_page_title=_filePath+'/'+strZ(_.xpath('./a/@title')[0])+'.txt'_page=requests.get(_page_url,headers=header)if_page.status_code==200:_page.encoding='utf-8'_tree=etree.HTML(_page.text)_page_content=_tree.xpath('//div[@id="content"]/text()')fileContent=''for_in_page_content:fileContent+=_+'n'withopen(_page_title,'w',encoding='utf-8')asfp:fp.write(fileContent)print('%s成功下载到本地'%(_page_title))time.sleep(random.uniform(0.05,0.2))defallDown():#整个站点小说爬取url='https://www.biquge.info/wanjiexiaoshuo/'#目录page=requests.get(url=url,headers=header)ifpage.status_code==200:#响应就爬取page.encoding='utf-8'tree=etree.HTML(page.text)page_last=tree.xpath('//div[@class="pagelink"]/a[@class="last"]/text()')[0]forpage_iinrange(1,int(page_last)):#小说页数遍历url='https://www.biquge.info/wanjiexiaoshuo/'+str(page_i)page=requests.get(url=url,headers=header)ifpage.status_code==200:#响应就爬取page.encoding='utf-8'tree=etree.HTML(page.text)li_list=tree.xpath('//div[@class="novelslistss"]/ul/li')forliinli_list:page_url=li.xpath('./span[@class="s2"]/a/@href')[0]#目录链接page_title=strZ(li.xpath('./span[@class="s2"]/a/text()')[0])page=requests.get(url=page_url,headers=header)ifpage.status_code==200:#响应就爬取page.encoding='utf-8'page_tree=etree.HTML(page.text)_filePath=filePath+'/'+page_titleifnotos.path.exists(_filePath):os.mkdir(_filePath)page_dl_list=page_tree.xpath('//div[@class="box_con"]/div[@id="list"]/dl/dd')for_inpage_dl_list:_page_url=page_url+_.xpath('./a/@href')[0]_page_title=_filePath+'/'+strZ(_.xpath('./a/@title')[0])+'.txt'_page=requests.get(_page_url,headers=header)if_page.status_code==200:_page.encoding='utf-8'_tree=etree.HTML(_page.text)_page_content=_tree.xpath('//div[@id="content"]/text()')fileContent=''for_in_page_content:fileContent+=_+'n'withopen(_page_title,'w',encoding='utf-8')asfp:fp.write(fileContent)print('%s成功下载到本地'%(_page_title))time.sleep(random.uniform(0.05,0.2))if__name__=='__main__':main()