
鸿蒙端侧AI模型推理耗时统计方案设计与实现 原创
鸿蒙端侧AI模型推理耗时统计方案设计与实现
一、系统架构设计
基于HarmonyOS的AI框架,我们设计了一套端侧AI模型推理耗时统计系统,用于分析和优化跨设备场景下的AI推理性能。
!https://example.com/ai-inference-arch.png
系统包含三大核心模块:
模型管理模块 - 负责AI模型的加载与配置
推理执行模块 - 执行模型推理并收集性能数据
统计分析模块 - 对耗时数据进行可视化展示
二、核心代码实现
AI模型管理服务(Java)
// AiModelManager.java
public class AiModelManager {
private static final String TAG = “AiModelManager”;
private AIDL aiEngine;
private Map<String, ModelInfo> loadedModels = new HashMap<>();
// 初始化AI引擎
public void init(Context context) {
aiEngine = AIDL.create(context);
aiEngine.setConfig(new AIDLConfig()
.setPerformanceMode(AIDLConfig.PERFORMANCE_HIGH)
.setPreference(AIDLConfig.PREFERENCE_FAST_SINGLE_ANSWER)
);
// 加载模型
public boolean loadModel(String modelName, String modelPath) {
try {
long startTime = System.currentTimeMillis();
ModelInfo modelInfo = new ModelInfo();
modelInfo.model = aiEngine.loadModel(modelPath);
modelInfo.loadTime = System.currentTimeMillis() - startTime;
loadedModels.put(modelName, modelInfo);
HiLog.info(TAG, "模型加载完成: " + modelName);
return true;
catch (AIDLException e) {
HiLog.error(TAG, "模型加载失败: " + e.getMessage());
return false;
}
// 执行推理
public InferenceResult runInference(String modelName, AIDLTensor input) {
ModelInfo modelInfo = loadedModels.get(modelName);
if (modelInfo == null) {
throw new IllegalStateException("模型未加载: " + modelName);
try {
long startTime = System.currentTimeMillis();
AIDLTensor output = aiEngine.run(modelInfo.model, input);
long inferenceTime = System.currentTimeMillis() - startTime;
modelInfo.addInferenceTime(inferenceTime);
return new InferenceResult(output, inferenceTime);
catch (AIDLException e) {
HiLog.error(TAG, "推理执行失败: " + e.getMessage());
throw new RuntimeException(e);
}
// 获取模型统计信息
public ModelStats getModelStats(String modelName) {
ModelInfo modelInfo = loadedModels.get(modelName);
if (modelInfo == null) {
return null;
return modelInfo.getStats();
// 模型信息封装类
private static class ModelInfo {
AIDLModel model;
long loadTime;
List<Long> inferenceTimes = new ArrayList<>();
void addInferenceTime(long time) {
inferenceTimes.add(time);
ModelStats getStats() {
ModelStats stats = new ModelStats();
stats.loadTime = loadTime;
if (!inferenceTimes.isEmpty()) {
stats.avgInferenceTime = (long) inferenceTimes.stream()
.mapToLong(Long::longValue)
.average()
.orElse(0);
stats.minInferenceTime = inferenceTimes.stream()
.mapToLong(Long::longValue)
.min()
.orElse(0);
stats.maxInferenceTime = inferenceTimes.stream()
.mapToLong(Long::longValue)
.max()
.orElse(0);
return stats;
}
// 模型统计信息
public static class ModelStats {
public long loadTime;
public long avgInferenceTime;
public long minInferenceTime;
public long maxInferenceTime;
// 推理结果
public static class InferenceResult {
public final AIDLTensor output;
public final long inferenceTime;
public InferenceResult(AIDLTensor output, long inferenceTime) {
this.output = output;
this.inferenceTime = inferenceTime;
}
推理耗时统计界面(ArkTS)
// AiInferenceStats.ets
import ai from ‘@ohos.ai’;
import modelManager from ‘…/services/AiModelManager’;
@Entry
@Component
struct AiInferenceStats {
@State modelStats: ModelStats | null = null;
@State inferenceHistory: InferenceRecord[] = [];
@State currentModel: string = ‘face_detection’;
aboutToAppear() {
this.loadModelStats();
private loadModelStats() {
modelManager.getModelStats(this.currentModel, (stats) => {
this.modelStats = stats;
this.loadInferenceHistory();
});
private loadInferenceHistory() {
modelManager.getInferenceHistory(this.currentModel, 20, (records) => {
this.inferenceHistory = records;
});
build() {
Column() {
// 模型选择器
this.buildModelSelector()
// 统计概览
if (this.modelStats) {
this.buildStatsOverview()
// 耗时历史图表
this.buildInferenceChart()
// 历史记录列表
this.buildHistoryList()
.padding(10)
@Builder
private buildModelSelector() {
Row() {
Text(‘选择模型:’)
.fontSize(16)
Select([
value: ‘face_detection’, name: ‘人脸检测’ },
value: ‘object_detection’, name: ‘目标检测’ },
value: ‘image_classification’, name: ‘图像分类’ }
], this.currentModel)
.onSelect((value: string) => {
this.currentModel = value;
this.loadModelStats();
})
.margin({ bottom: 20 })
@Builder
private buildStatsOverview() {
Column() {
StatItem({
label: ‘模型加载耗时’,
value: ${this.modelStats.loadTime}ms,
icon: ‘resources/load.png’
})
StatItem({
label: '平均推理耗时',
value: ${this.modelStats.avgInferenceTime}ms,
icon: 'resources/avg.png'
})
StatItem({
label: '最快推理耗时',
value: ${this.modelStats.minInferenceTime}ms,
icon: 'resources/min.png'
})
StatItem({
label: '最慢推理耗时',
value: ${this.modelStats.maxInferenceTime}ms,
icon: 'resources/max.png'
})
.width(‘100%’)
.margin({ bottom: 20 })
@Builder
private buildInferenceChart() {
Column() {
Text(‘推理耗时趋势’)
.fontSize(16)
.margin({ bottom: 10 })
LineChart({
data: this.inferenceHistory.map(r => r.time),
config: {
height: 200,
lineColor: '#409EFF',
showXAxis: true,
showYAxis: true
})
.width(‘100%’)
.margin({ bottom: 20 })
@Builder
private buildHistoryList() {
Column() {
Text(‘最近推理记录’)
.fontSize(16)
.margin({ bottom: 10 })
List() {
ForEach(this.inferenceHistory, (record) => {
ListItem() {
Row() {
Text(#${record.id})
.width('15%')
Text(${record.time}ms)
.width('25%')
.textAlign(TextAlign.End)
Text(new Date(record.timestamp).toLocaleTimeString())
.width('30%')
.textAlign(TextAlign.End)
PerformanceTag({
time: record.time,
threshold: this.modelStats?.avgInferenceTime || 0
})
.padding(10)
})
.height(200)
}
@Component
struct StatItem {
@Prop label: string
@Prop value: string
@Prop icon: string
build() {
Row() {
Image(this.icon)
.width(24)
.height(24)
.margin({ right: 10 })
Text(this.label)
.fontSize(14)
.layoutWeight(1)
Text(this.value)
.fontSize(14)
.fontColor('#409EFF')
.height(40)
.borderRadius(8)
.backgroundColor('#F5F5F5')
.padding(10)
.margin({ bottom: 8 })
}
@Component
struct PerformanceTag {
@Prop time: number
@Prop threshold: number
build() {
const isGood = this.time <= this.threshold;
Text(isGood ? '优' : '良')
.fontSize(12)
.fontColor(isGood ? '#67C23A' : '#E6A23C')
.backgroundColor(isGood ? '#f0f9eb' : '#fdf6ec')
.borderRadius(4)
.padding(2)
.width('30%')
.textAlign(TextAlign.Center)
}
interface ModelStats {
loadTime: number;
avgInferenceTime: number;
minInferenceTime: number;
maxInferenceTime: number;
interface InferenceRecord {
id: number;
time: number;
timestamp: number;
模型推理测试组件(ArkTS)
// AiInferenceTest.ets
import ai from ‘@ohos.ai’;
import modelManager from ‘…/services/AiModelManager’;
@Entry
@Component
struct AiInferenceTest {
@State testImage: PixelMap | null = null;
@State testResults: TestResult[] = [];
@State isTesting: boolean = false;
aboutToAppear() {
this.loadTestImage();
private loadTestImage() {
// 加载测试图片
image.createPixelMapFromFile('resources/test_image.jpg', (err, pixelMap) => {
if (!err && pixelMap) {
this.testImage = pixelMap;
});
build() {
Column() {
// 测试图片预览
if (this.testImage) {
Image(this.testImage)
.width(200)
.height(200)
.margin(10)
// 测试按钮
Button(this.isTesting ? '测试中...' : '开始性能测试')
.onClick(() => this.runTests())
.disabled(this.isTesting)
.width('80%')
.margin(10)
// 测试结果
if (this.testResults.length > 0) {
this.buildTestResults()
}
@Builder
private buildTestResults() {
Column() {
Text(‘测试结果’)
.fontSize(18)
.margin(10)
Grid() {
ForEach(this.testResults, (result) => {
GridItem() {
TestResultCard({ result })
})
.columnsTemplate(‘1fr 1fr’)
.columnsGap(10)
.rowsGap(10)
}
private async runTests() {
if (!this.testImage) return;
this.isTesting = true;
this.testResults = [];
// 准备测试输入
const inputTensor = await ai.createTensorFromPixelMap(this.testImage);
// 测试不同模型
const models = [
'face_detection',
'object_detection',
'image_classification'
];
for (const model of models) {
const result = await this.runSingleTest(model, inputTensor);
this.testResults = [...this.testResults, result];
this.isTesting = false;
private async runSingleTest(modelName: string, input: AIDLTensor): Promise<TestResult> {
// 确保模型已加载
if (!modelManager.isModelLoaded(modelName)) {
await modelManager.loadModel(modelName);
// 预热运行
await modelManager.runInference(modelName, input);
// 正式测试运行
const startTime = Date.now();
const result = await modelManager.runInference(modelName, input);
const endTime = Date.now();
return {
modelName,
inferenceTime: result.inferenceTime,
totalTime: endTime - startTime,
timestamp: Date.now()
};
}
@Component
struct TestResultCard {
@Prop result: TestResult
build() {
Column() {
Text(this.getModelDisplayName())
.fontSize(16)
.fontWeight(FontWeight.Bold)
.margin({ bottom: 5 })
Divider()
Row() {
Text('推理耗时:')
.fontSize(14)
Text(${this.result.inferenceTime}ms)
.fontSize(14)
.fontColor('#409EFF')
.margin({ top: 5 })
Row() {
Text('总耗时:')
.fontSize(14)
Text(${this.result.totalTime}ms)
.fontSize(14)
.fontColor('#67C23A')
.margin({ top: 5 })
.padding(10)
.borderRadius(8)
.backgroundColor('#F5F5F5')
private getModelDisplayName(): string {
switch(this.result.modelName) {
case 'face_detection': return '人脸检测';
case 'object_detection': return '目标检测';
case 'image_classification': return '图像分类';
default: return this.result.modelName;
}
interface TestResult {
modelName: string;
inferenceTime: number;
totalTime: number;
timestamp: number;
三、关键技术实现
推理耗时统计流程
sequenceDiagram
participant 应用
participant AI框架
participant 模型
应用->>AI框架: 加载模型
AI框架->>模型: 初始化
模型-->>AI框架: 加载完成
AI框架-->>应用: 返回加载耗时
应用->>AI框架: 执行推理
AI框架->>模型: 前向计算
模型-->>AI框架: 推理结果
AI框架-->>应用: 返回推理耗时
应用->>应用: 记录性能数据
性能数据采集点
数据点 采集方式 说明
模型加载耗时 System.currentTimeMillis()差值 从开始加载到加载完成的时间
推理耗时 System.currentTimeMillis()差值 从输入数据到输出结果的时间
内存占用 Runtime.getRuntime() 推理前后的内存变化
CPU利用率 ActivityManager.getProcessMemoryInfo() 推理期间的CPU使用率
跨设备性能对比
// 跨设备性能对比服务
public class CrossDeviceCompare {
public static void comparePerformance(String modelName, List<DeviceInfo> devices) {
ExecutorService executor = Executors.newFixedThreadPool(devices.size());
List<Future<DevicePerf>> futures = new ArrayList<>();
// 在每个设备上运行测试
for (DeviceInfo device : devices) {
futures.add(executor.submit(() -> {
AiModelManager manager = connectToDevice(device);
return runPerformanceTest(manager, modelName);
}));
// 收集结果
List<DevicePerf> results = new ArrayList<>();
for (Future<DevicePerf> future : futures) {
results.add(future.get());
// 生成对比报告
generateReport(modelName, results);
}
四、测试方案
基准测试用例(Java)
public class AiInferenceBenchmark {
private AiModelManager modelManager;
private static final int WARMUP_ROUNDS = 5;
private static final int TEST_ROUNDS = 10;
@Before
public void setup() {
modelManager = new AiModelManager();
modelManager.init(getContext());
@Test
public void benchmarkFaceDetection() {
benchmarkModel("face_detection");
@Test
public void benchmarkObjectDetection() {
benchmarkModel("object_detection");
private void benchmarkModel(String modelName) {
// 加载模型
assertTrue(modelManager.loadModel(modelName, getModelPath(modelName)));
// 准备输入数据
AIDLTensor input = createTestInput();
// 预热运行
for (int i = 0; i < WARMUP_ROUNDS; i++) {
modelManager.runInference(modelName, input);
// 正式测试
long totalTime = 0;
for (int i = 0; i < TEST_ROUNDS; i++) {
long startTime = System.nanoTime();
modelManager.runInference(modelName, input);
long endTime = System.nanoTime();
totalTime += (endTime - startTime);
long avgTime = totalTime / TEST_ROUNDS / 1000000; // 转换为毫秒
HiLog.info("AiInferenceBenchmark",
"模型 %{public}s 平均推理耗时: %{public}dms", modelName, avgTime);
// 验证性能指标
assertTrue("推理耗时超过阈值", avgTime < getThresholdForModel(modelName));
}
性能回归测试(ArkTS)
// AiPerformanceTest.ets
@Entry
@Component
struct AiPerformanceTest {
@State baselineResults: TestResult[] = [];
@State currentResults: TestResult[] = [];
@State regressionItems: RegressionItem[] = [];
aboutToAppear() {
this.loadBaselineData();
private loadBaselineData() {
// 从本地存储加载基线数据
storage.get('ai_perf_baseline', (err, data) => {
if (!err && data) {
this.baselineResults = JSON.parse(data);
});
private saveAsBaseline() {
storage.set('ai_perf_baseline', JSON.stringify(this.currentResults));
private async runRegressionTest() {
const tester = new AiInferenceTester();
this.currentResults = await tester.runAllTests();
// 对比基线数据
this.regressionItems = this.compareWithBaseline();
private compareWithBaseline(): RegressionItem[] {
if (this.baselineResults.length === 0) return [];
return this.currentResults.map(current => {
const baseline = this.baselineResults.find(b => b.modelName === current.modelName);
if (!baseline) {
return {
modelName: current.modelName,
change: 'new',
current: current.inferenceTime,
baseline: 0
};
const ratio = current.inferenceTime / baseline.inferenceTime;
return {
modelName: current.modelName,
change: ratio > 1.1 ? 'worse' :
ratio < 0.9 ? 'better' : 'same',
current: current.inferenceTime,
baseline: baseline.inferenceTime
};
});
build() {
Column() {
Button('运行性能测试')
.onClick(() => this.runRegressionTest())
.width('80%')
.margin(10)
Button('设为基线')
.onClick(() => this.saveAsBaseline())
.width('80%')
.margin(10)
if (this.regressionItems.length > 0) {
this.buildRegressionReport()
}
@Builder
private buildRegressionReport() {
Column() {
Text(‘性能回归报告’)
.fontSize(18)
.margin(10)
List() {
ForEach(this.regressionItems, (item) => {
ListItem() {
RegressionItemCard({ item })
})
.height(300)
}
@Component
struct RegressionItemCard {
@Prop item: RegressionItem
build() {
Row() {
Text(this.item.modelName)
.fontSize(16)
.layoutWeight(1)
Column() {
Text(当前: ${this.item.current}ms)
Text(基线: ${this.item.baseline}ms)
.layoutWeight(1)
Text(this.getChangeText())
.fontColor(this.getChangeColor())
.fontSize(16)
.padding(10)
private getChangeText(): string {
switch(this.item.change) {
case 'better': return '↑ 提升';
case 'worse': return '↓ 下降';
case 'same': return '→ 持平';
default: return '新测试';
}
private getChangeColor(): string {
switch(this.item.change) {
case ‘better’: return ‘#67C23A’;
case ‘worse’: return ‘#F56C6C’;
default: return ‘#909399’;
}
interface RegressionItem {
modelName: string;
change: ‘better’ ‘worse’ ‘same’
‘new’;
current: number;
baseline: number;
五、总结与展望
本方案实现了以下核心功能:
精准耗时统计:精确测量模型加载和推理各阶段耗时
多维度分析:支持平均耗时、最大最小耗时等统计指标
可视化展示:直观的图表展示性能数据和趋势
跨设备对比:支持不同设备间的性能对比分析
未来优化方向:
增加GPU/NPU硬件加速统计
集成内存和功耗监控
支持自动化性能回归测试
增加AI模型优化建议功能
通过本方案,开发者可以全面了解AI模型在鸿蒙设备上的性能表现,为优化模型和提升用户体验提供数据支持。
