Python百度图片搜索API源码解析
百度图片搜索API源码主要包括以下几个步骤:首先,导入所需的库,如requests和json。然后,设置API的URL和参数,如搜索关键词、图片数量等。接着,使用requests库发送GET请求,获取API返回的数据。最后,解析返回的JSON数据,提取并展示图片信息。 以下是一个简单的示例代码: ```python import requests import json def baidu_image_search(keyword, num_results=10): url = "https://aip.baidubce.com/rpc/2.0/nlp/v1/imageclassify/advanced_general?access_token=你的access_token" headers = {"Content-Type": "application/x-www-form-urlencoded"} data = {"image": keyword, "width": "", "height": "", "ratio": "", "
python百度图片搜索API源码,一段简单的图片爬虫程序。通过输入关键词,在百度图片中搜索相关图片,并返回一张随机的图片。
代码使用Flask框架搭建了一个简单的Web应用,将用户输入的关键词作为参数传递给爬虫程序,然后从百度图片中获取相关图片的URL并随机选择一张返回给用户。
用户可以通过访问Web应用的首页,在输入框中输入关键词进行搜索。如果找到相关图片,则会跳转到图片的URL,如果未找到图片,则会显示无法正常查找的提示信息。
将代码发布到服务器上,就可以用markdown格式![image]服务器域名/?word=关键字来显示搜索的图片。
importargparseimportosimportreimportsysimporturllibimportjsonimportsocketimporturllib.requestimporturllib.parseimporturllib.errorimportrandom#设置超时importtimefromflaskimportFlask,redirect,request,make_responseimportrandomtimeout=5socket.setdefaulttimeout(timeout)importsslssl._create_default_https_context=ssl._create_unverified_contextclassCrawler:#睡眠时长__time_sleep=0.1__amount=0__start_amount=0__counter=0headers={'User-Agent':'Mozilla/5.0(WindowsNT6.1;WOW64;rv:23.0)Gecko/20100101Firefox/23.0','Cookie':''}__per_page=30#获取图片url内容等#t下载图片时间间隔def__init__(self,t=0.1):self.time_sleep=t#获取后缀名@staticmethoddefget_suffix(name):m=re.search(r'\.[^\.]*$',name)ifm.group(0)andlen(m.group(0))<=5:returnm.group(0)else:return'.jpeg'@staticmethoddefhandle_baidu_cookie(original_cookie,cookies):""":paramstringoriginal_cookie::paramlistcookies::returnstring:"""ifnotcookies:returnoriginal_cookieresult=original_cookieforcookieincookies:result+=cookie.split(';')[0]+';'result.rstrip(';')returnresult#开始获取defget_images(self,word):search=urllib.parse.quote(word)pn=self.__start_amountimage_urls=[]whilepn<self.__amount:url='https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%s&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=©right=&word=%s&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&pn=%s&rn=%d&gsm=1e&1594447993172='%(search,search,str(pn),self.__per_page)try:time.sleep(self.time_sleep)req=urllib.request.Request(url=url,headers=self.headers)page=urllib.request.urlopen(req)self.headers['Cookie']=self.handle_baidu_cookie(self.headers['Cookie'],page.info().get_all('Set-Cookie'))rsp=page.read()page.close()exceptUnicodeDecodeErrorase:print(e)print('-----UnicodeDecodeErrorurl:',url)excepturllib.error.URLErrorase:print(e)print("-----urlErrorurl:",url)exceptsocket.timeoutase:print(e)print("-----sockettimout:",url)else:rsp_data=json.loads(rsp,strict=False,object_hook=lambdad:{k:urllib.parse.unquote(v)ifisinstance(v,str)elsevfork,vind.items()})if'data'notinrsp_data:continueelse:forimage_infoinrsp_data['data']:if'thumbURL'inimage_info:thumb_url=image_info['thumbURL']image_urls.append(thumb_url)pn+=self.__per_pagereturnimage_urlsdefstart(self,word):self.__per_page=30self.__start_amount=0self.__amount=self.__per_pagereturnself.get_images(word)app=Flask(__name__)@app.route("/")defindex():word=request.args.get('word')ifword:crawler=Crawler(0.1)#抓取延迟为0.1image_urls=crawler.start(word)ifimage_urls:image_url=random.choice(image_urls)#返回图片的URLreturnredirect(image_url)return"""<!DOCTYPEhtml><html><body><h1>无法正常查找</h1></body></html>"""if__name__=='__main__':app.run()