能正常访问API,但图片一直无法传过去识别。请问原因大概是出在哪里呢?
百度的技术文档地址:https://ai.baidu.com/ai-doc/OCR/Ekwkggqa5
import requests
import base64
import urllib.parse
import logging
from seatable_api import Base, context
from seatable_api.constants import ColumnTypes
from seatable_api.exception import AuthExpiredError
# 配置日志记录
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def get_access_token(api_key, secret_key):
"""获取百度API的访问令牌"""
try:
url = "https://aip.baidubce.com/oauth/2.0/token"
params = {
"grant_type": "client_credentials",
"client_id": api_key,
"client_secret": secret_key
}
response = requests.post(url, params=params)
response.raise_for_status()
return response.json().get("access_token")
except Exception as e:
logger.error(f"获取访问令牌失败: {e}")
raise
def get_image_as_base64(image_url, api_token, server_url):
"""
从Seatable获取图片并转换为base64编码
:param image_url: 图片URL
:param api_token: SeaTable API令牌
:param server_url: SeaTable服务器URL
:return: base64编码的图片数据,或None(如果出错)
"""
try:
# 构建完整的图片URL(处理相对URL)
if not image_url.startswith(('http://', 'https://')):
image_url = f"{server_url.rstrip('/')}/{image_url.lstrip('/')}"
# 添加认证头
headers = {
"Authorization": f"Token {api_token}"
}
# 下载图片
response = requests.get(image_url, headers=headers)
response.raise_for_status()
# 转换为base64并进行urlencode
base64_data = base64.b64encode(response.content).decode("utf-8")
encoded_data = urllib.parse.quote_plus(base64_data)
return encoded_data
except Exception as e:
logger.error(f"图片转换为base64失败: {e}")
return None
def recognize_waybill(image_base64, access_token):
"""
调用百度API识别快递面单(使用base64编码的图片)
:param image_base64: base64编码并urlencode后的图片数据
:param access_token: 百度API访问令牌
:return: 识别结果字典
"""
try:
request_url = f"https://aip.baidubce.com/rest/2.0/ocr/v1/waybill?access_token={access_token}"
params = {
"image": image_base64,
"is_identify_virtual_waybill": "true" # 开启隐私面单识别
}
headers = {"Content-Type": "application/x-www-form-urlencoded"}
response = requests.post(request_url, data=params, headers=headers)
response.raise_for_status()
result = response.json()
if "error_code" in result:
logger.error(f"百度API错误: {result.get('error_msg', '未知错误')}")
return None
return result
except Exception as e:
logger.error(f"识别快递面单失败: {e}")
return None
def process_seatable():
"""处理SeaTable数据读写流程"""
try:
# 获取SeaTable认证信息
server_url = context.server_url
api_token = context.api_token
base = Base(api_token, server_url)
base.auth()
# 定义表名
source_table_name = "快递识别" # 源表名称
target_table_name = "识别结果" # 目标表名称
# 检查目标表是否存在,存在则删除重建
tables = base.list_tables()
for table in tables:
if table["name"] == target_table_name:
base.delete_table(target_table_name)
logger.info(f"已删除表: {target_table_name}")
# 创建目标表
base.add_table(target_table_name, lang='zh-cn')
logger.info(f"已创建表: {target_table_name}")
# 定义目标表所需列
target_columns = [
{"name": "原表ID", "column_type": ColumnTypes.TEXT},
{"name": "快递面单图片", "column_type": ColumnTypes.TEXT}, # 存储图片URL用于溯源
{"name": "条形码", "column_type": ColumnTypes.TEXT},
{"name": "快递运单号", "column_type": ColumnTypes.TEXT},
{"name": "三段码", "column_type": ColumnTypes.TEXT},
{"name": "收件人姓名", "column_type": ColumnTypes.TEXT},
{"name": "寄件人姓名", "column_type": ColumnTypes.TEXT},
{"name": "收件人电话", "column_type": ColumnTypes.TEXT},
{"name": "寄件人电话", "column_type": ColumnTypes.TEXT},
{"name": "识别状态", "column_type": ColumnTypes.TEXT},
{"name": "错误信息", "column_type": ColumnTypes.TEXT}
]
# 添加列到目标表
for column in target_columns:
base.insert_column(target_table_name, column["name"], column["column_type"])
logger.info(f"目标表列创建完成: {target_table_name}")
# 获取百度API凭证
BAIDU_APP_ID = "隐藏"
BAIDU_API_KEY = "隐藏"
BAIDU_SECRET_KEY = "隐藏"
access_token = get_access_token(BAIDU_API_KEY, BAIDU_SECRET_KEY)
logger.info("百度API访问令牌获取成功")
# 读取源表数据
source_rows = base.list_rows(source_table_name)
logger.info(f"从源表读取数据: {len(source_rows)} 条记录")
# 处理每条记录
for row in source_rows:
row_id = row.get("_id", "未知ID")
logger.info(f"处理行 {row_id}...")
# 获取图片URL
image_cell = row.get("快递面单")
image_url = ""
error_msg = ""
try:
if isinstance(image_cell, list) and image_cell:
first_image = image_cell[0]
if isinstance(first_image, dict):
image_url = first_image.get("url", "")
elif isinstance(first_image, str):
image_url = first_image
else:
error_msg = f"图片信息格式错误: {type(first_image).__name__}"
elif isinstance(image_cell, str):
image_url = image_cell
elif not image_cell:
error_msg = "无图片数据"
else:
error_msg = f"图片列类型不支持: {type(image_cell).__name__}"
except Exception as e:
error_msg = f"获取图片URL出错: {str(e)}"
if not image_url:
error_msg = error_msg or "无有效图片URL"
logger.warning(f"行 {row_id}: {error_msg}")
write_result(base, target_table_name, row_id, image_url, None, error_msg)
continue
# 将图片转换为base64
image_base64 = get_image_as_base64(image_url, api_token, server_url)
if not image_base64:
error_msg = "无法获取图片内容"
logger.warning(f"行 {row_id}: {error_msg}")
write_result(base, target_table_name, row_id, image_url, None, error_msg)
continue
# 调用百度API识别
result = recognize_waybill(image_base64, access_token)
write_result(base, target_table_name, row_id, image_url, result, "")
logger.info("所有记录处理完成")
except AuthExpiredError:
logger.warning("SeaTable认证过期,重新认证...")
base.auth() # 重新认证
process_seatable() # 递归重试
except Exception as e:
logger.error(f"脚本执行出错: {e}")
def write_result(base, table_name, source_id, image_url, recognition_result, error_msg):
"""将识别结果写入目标表"""
try:
# 初始化结果数据
result_data = {
"原表ID": source_id,
"快递面单图片": image_url,
"识别状态": "失败" if error_msg else "成功",
"错误信息": error_msg
}
# 提取识别结果
if recognition_result:
words_result = recognition_result.get("words_result", [{}])[0]
api_field_mapping = {
"条形码": "bar_code",
"快递运单号": "waybill_number",
"三段码": "three_segment_code",
"收件人姓名": "recipient_name",
"寄件人姓名": "sender_name",
"收件人电话": "recipient_phone",
"寄件人电话": "sender_phone"
}
for target_field, api_field in api_field_mapping.items():
if api_field in words_result and words_result[api_field]:
result_data[target_field] = words_result[api_field][0].get("word", "")
else:
result_data[target_field] = ""
# 写入数据(处理认证过期)
try:
base.append_row(table_name, result_data)
except AuthExpiredError:
base.auth() # 重新认证
base.append_row(table_name, result_data) # 再次尝试写入
except Exception as e:
logger.error(f"写入结果失败: {e}")
if __name__ == "__main__":
process_seatable()