"知乎热榜Python爬虫实现"
以下是一个简单的Python爬虫代码,用于爬取知乎热榜: ```python import requests from bs4 import BeautifulSoup url = "https://www.zhihu.com/hot" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') hot_list = soup.find_all('div', class_='HotItem-content') for hot in hot_list: title = hot.find('h2').text
现在的知乎必须要登录才能查看相关话题内容,给我们的日常造成了极大的不便,今天我就教大家如何利用简单的知乎热榜python爬虫代码,绕开知乎登录限制。
准备工作
配置好python运行环境,推荐 pycharm。
复制下面的源代码,运行,大功告成。
importrequestsclassZhihu:"""知乎热榜"""def__init__(self):self.hot_lists_api='https://api.zhihu.com/topstory/hot-lists/total'#热榜apiself.recommend_lists_api='https://api.zhihu.com/topstory/recommend'#推荐apiself.headers={'User-Agent':'Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/87.0.4280.88Safari/537.36'}self.hot=self.get_hot_lists()#热榜未处理数据self.recommend=self.get_recommend_lists()#推荐未处理数据self.hot_data=self.wash_hot_lists()#热榜处理后数据self.recommend_data=self.wash_recommend_lists()#推荐处理后数据defget_hot_lists(self):"""获取知乎热榜:return:json"""params={'limit':'10','is_browser_model':'0'}response=requests.get(url=self.hot_lists_api,headers=self.headers,params=params)returnresponse.json()defget_recommend_lists(self):"""获取随机推荐:return:"""params={"action":"down","ad_interval":"-10","after_id":'1',#TODO:"page_number":'1',#TODO:"session_token":"99872c210b53364be1ede4bf459e8005",}response=requests.get(url=self.recommend_lists_api,headers=self.headers,params=params)returnresponse.json()defwash_hot_lists(self):"""清洗热榜数据:return:['[title](url)',....]"""hot_lists=[]fordatainself.hot['data']:title=data['target']['title']url=data['target']['url'].replace('api.zhihu.com/questions','zhihu.com/question')hot_lists.append(f'[{title}]({url})')returnhot_listsdefwash_recommend_lists(self):"""清洗推荐数据:return:"""hot_lists=[]fordatainself.recommend['data']:try:title=data['target']['question']['title']url=data['target']['question']['url'].replace('api.zhihu.com/questions','zhihu.com/question')exceptKeyError:title=data['target']['title']url=data['target']['url'].replace('api.zhihu.com/questions','zhihu.com/question')hot_lists.append(f'[{title}]({url})')returnhot_listszhihu=Zhihu()
使用教程
要获取当前知乎热榜数据,在源代码末尾添加下面这行代码,然后运行程序即可。
print(zhihu.hot_data)
要想获取随机推荐话题,在源代码末尾添加下面这行代码,然后运行程序即可。
print(zhihu.recommend_data)