登录百度OCR,创建应用
https://console.bce.baidu.com/ai/?fromai=1#/ai/ocr/overview/index
创建后,查看:
根据API文档调试:
先获取Access Token:
如:https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=Va5yQRHlA4Fq5eR3LT0vuXV4&client_secret=0rDSjzQ20XUj5itV6WRtznPQSzr5pVw2&
API调用:
测试代码1:
#coding:utf-8
__author__='vulsee.com'
import requests
import base64
def getToken():
html=requests.post(getTokenurl).json()
access_token= html['access_token']
return access_token
def getImgbs64(imgfile):
f = open(imgfile, 'rb')
img = base64.b64encode(f.read())
return img
def getResult(imgfile):
access_token=getToken()
url='https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=%s' % access_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
image=getImgbs64(imgfile)
params = {"image":image}
#以下非必要信息,可以删除
'''
language_type='auto_detect'
detect_direction='false'
paragraph='false'
probability='false'
'''
html=requests.post(url,headers=headers,data=params).json()
res= html['words_result']
rs=''
for i in res:
#print i['words']
r=i['words']
rs+=i['words']
print rs
def start():
getResult('105010.jpg')
def main():
start()
if __name__ == '__main__':
global getTokenurl,grant_type,client_id,client_secret
grant_type='client_credentials'
client_id='*****'
client_secret='***'
getTokenurl='https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s&' %(client_id,client_secret)
main()
处理结果:
测试代码2:
如果flag为file,则从文件读取;如果flag为url,则直接从web获取
#coding:utf-8
__author__='vulsee.com'
import requests
import base64
def getToken():
html=requests.post(getTokenurl).json()
access_token= html['access_token']
return access_token
def getImgbs64(imgfile):
f = open(imgfile, 'rb')
img = base64.b64encode(f.read())
return img
def getImgbs64Online(url):
html=requests.get(url).content
img = base64.b64encode(html)
return img
def getResult(flag,imgfile,access_token):
url='https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=%s' % access_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
if flag=='file':
image=getImgbs64(imgfile)
params = {"image":image}
html=requests.post(url,headers=headers,data=params).json()
res= html['words_result']
rs=''
for i in res:
r=i['words']
rs+=i['words']
print 'source:%s' % flag,rs
else:
image=getImgbs64Online(imgfile)
params = {"image":image}
html=requests.post(url,headers=headers,data=params).json()
res= html['words_result']
rs=''
for i in res:
#print i['words']
r=i['words']
rs+=i['words']
print 'source:%s' % flag,rs
def start(access_token,flag):
if flag=='file':
filename='105007.jpg'
getResult(flag,filename,access_token)
elif flag=='url':
url='https://****:***/api/api/*****?id=***&***=159980316534726'
getResult('url',url,access_token)
else:
print 'error'
def main():
access_token=getToken()
#如果flag为file,则从文件读取;如果flag为url,则直接从web获取
flag='url'
start(access_token,flag)
if __name__ == '__main__':
global getTokenurl,grant_type,client_id,client_secret
grant_type='client_credentials'
client_id='****'
client_secret='******'
getTokenurl='https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s&' %(client_id,client_secret)
main()