使用 CANN Python API 快速构建 AI 推理服务
确保已安装 CANN 提供的 Python 包(通常位于 ):bash编辑2. 编写推理类python编辑3. 构建 Flask 服务python编辑4. 测试服务bash编辑
·
对于大多数算法工程师和应用开发者而言,C++ 编程门槛较高。为此,CANN 提供了 Python 封装接口(通常称为 acllite 或 pyACL),大幅简化模型部署流程。本文将演示如何用不到 50 行 Python 代码构建一个 RESTful 推理服务。
1. 安装 Python 依赖
确保已安装 CANN 提供的 Python 包(通常位于 /usr/local/Ascend/ascend-toolkit/latest/python/site-packages):
bash
编辑
export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$PYTHONPATH
python3 -c "import acl; print('ACL Python loaded successfully')"
2. 编写推理类
python
编辑
# inference_engine.py
import acl
import numpy as np
class CANNInference:
def __init__(self, model_path):
self.model_id = None
self.context = None
self.load_model(model_path)
def load_model(self, model_path):
ret = acl.init()
assert ret == acl.ACL_SUCCESS, "ACL init failed"
ret = acl.rt.set_device(0)
assert ret == acl.ACL_SUCCESS
self.context, ret = acl.rt.create_context(0)
assert ret == acl.ACL_SUCCESS
self.model_id, ret = acl.mdl.load_from_file(model_path)
assert ret == acl.ACL_SUCCESS
print(f"Model loaded: {model_path}")
def infer(self, input_data: np.ndarray) -> np.ndarray:
# input_data: shape (1, 3, 224, 224), dtype=np.float32
input_size = input_data.nbytes
device_ptr, ret = acl.rt.malloc(input_size, acl.ACL_MEM_MALLOC_NORMAL_ONLY)
assert ret == acl.ACL_SUCCESS
# Copy to device
ret = acl.rt.memcpy(device_ptr, input_size,
input_data.ctypes.data_as(acl.ffi_void_p),
input_size, acl.ACL_MEMCPY_HOST_TO_DEVICE)
assert ret == acl.ACL_SUCCESS
# Prepare dataset
dataset = acl.mdl.create_dataset()
data_buffer = acl.create_data_buffer(device_ptr, input_size)
acl.mdl.add_dataset_buffer(dataset, data_buffer)
output_dataset = acl.mdl.create_dataset()
output_buffer_size = 1000 * 4 # float32 * 1000
output_ptr, _ = acl.rt.malloc(output_buffer_size, acl.ACL_MEM_MALLOC_NORMAL_ONLY)
output_buffer = acl.create_data_buffer(output_ptr, output_buffer_size)
acl.mdl.add_dataset_buffer(output_dataset, output_buffer)
# Execute
ret = acl.mdl.execute(self.model_id, dataset, output_dataset)
assert ret == acl.ACL_SUCCESS
# Copy back
output_host = np.empty(1000, dtype=np.float32)
ret = acl.rt.memcpy(output_host.ctypes.data_as(acl.ffi_void_p),
output_buffer_size, output_ptr, output_buffer_size,
acl.ACL_MEMCPY_DEVICE_TO_HOST)
assert ret == acl.ACL_SUCCESS
# Cleanup
acl.rt.free(device_ptr)
acl.rt.free(output_ptr)
acl.mdl.destroy_dataset(dataset)
acl.mdl.destroy_dataset(output_dataset)
return output_host
def __del__(self):
if self.model_id is not None:
acl.mdl.unload(self.model_id)
acl.finalize()
3. 构建 Flask 服务
python
编辑
# app.py
from flask import Flask, request, jsonify
import numpy as np
from inference_engine import CANNInference
app = Flask(__name__)
engine = CANNInference("resnet50.om")
@app.route('/predict', methods=['POST'])
def predict():
# 假设客户端发送 base64 编码的图像或直接上传 numpy 数组
data = request.json
input_array = np.array(data['input'], dtype=np.float32).reshape(1, 3, 224, 224)
output = engine.infer(input_array)
pred_class = int(np.argmax(output))
confidence = float(np.max(output))
return jsonify({
'class_id': pred_class,
'confidence': confidence
})
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
4. 测试服务
bash
编辑
curl -X POST http://localhost:5000/predict \
-H "Content-Type: application/json" \
-d '{"input": [0.1, 0.2, ..., 0.5]}' # 长度为 3*224*224 的数组
优势与适用场景
- 开发效率高:Python 接口隐藏了大部分内存管理细节;
- 适合微服务架构:可轻松集成到 Kubernetes 或 Docker 环境;
- 支持批量推理:只需调整输入 shape 即可启用 batch 处理。
- cann组织链接:https://atomgit.com/cann
-
ops-nn仓库链接:https://atomgit.com/cann/ops-nn
注意:生产环境中建议使用异步框架(如 FastAPI + asyncio)提升并发能力。
昇腾计算产业是基于昇腾系列(HUAWEI Ascend)处理器和基础软件构建的全栈 AI计算基础设施、行业应用及服务,https://devpress.csdn.net/organization/setting/general/146749包括昇腾系列处理器、系列硬件、CANN、AI计算框架、应用使能、开发工具链、管理运维工具、行业应用及服务等全产业链
更多推荐

所有评论(0)