使用Python编写的Wallhaven壁纸爬取脚本
以下是一个简单的Python爬取wallhaven壁纸脚本,使用了requests和BeautifulSoup库: ```python import requests from bs4 import BeautifulSoup def get_wallpapers(query, page=1): url = f'https://wallhaven.cc/search?q={query}&categories=111&purity=100&atleast=1920x1080&sorting=random&order=desc&page={page}' response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') wallpapers = soup.find_all('a', class_='preview') return [wallpaper['href'] for wallpaper in wallpapers] if __name__ == '__main__': query = '风景' wall
#-*-codeing=utf-8-*-#@Time:2022/4/80:02#@Software:PyCharm#@File:wallhavenBiZhi.pyimportrequestsfromlxmlimportetreeimporttimeimportrandomclassBZ():#实例化etreedeftree(self,e):returnetree.HTML(e)#获取到图片的contentdefgetBZ(self,url):#翻页10页forpageinrange(1,10):headers={#'referer':'https://wallhaven.cc/','User-Agent':'Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/92.0.4515.159Safari/537.36',}print(time.strftime("%H:%M:%S"))#随机爬取第1到50页#page=random.randint(1,50)print("第{}页".format(page))url1=url.format(page)#一级页面请求html=requests.get(url=url1,headers=headers,timeout=5.0).textdata=self.tree(html)li_list=data.xpath('.//div[@id="thumbs"]//li')#print(li_list)#每一张图片的url地址forliinli_list:img=li.xpath('.//img[@class="lazyload"]/@data-src')ifimg:img=img[0]img_end=img[-10:]tupian_url='https://w.wallhaven.cc/full/'+img[-10:-8]+'/wallhaven-'+img_endtupian=requests.get(url=tupian_url,headers=headers,timeout=5.0)iftupian.status_code==404:#print(img_end)img_end=img_end[:-3]+'png'tupian_url='https://w.wallhaven.cc/full/'+img[-10:-8]+'/wallhaven-'+img_endtupian=requests.get(url=tupian_url,headers=headers,timeout=5.0)content_picture=tupian.contentself.save_picture(img_end,content_picture)print(tupian_url)#保存图片defsave_picture(self,img_end,content_picture):#保存路径withopen('C:/Users/19873/Pictures/bizhi/'+img_end,'wb')asfile:file.write(content_picture)print('保存完成'+time.strftime("%H:%M:%S"))if__name__=='__main__':url='https://wallhaven.cc/hot?page={}'bz=BZ()bz.getBZ(url)