鸿蒙端侧AI模型推理耗时统计方案设计与实现 原创
鸿蒙端侧AI模型推理耗时统计方案设计与实现
一、系统架构设计
基于HarmonyOS的AI框架,我们设计了一套端侧AI模型推理耗时统计系统,用于分析和优化跨设备场景下的AI推理性能。
!https://example.com/ai-inference-arch.png
系统包含三大核心模块:
模型管理模块 - 负责AI模型的加载与配置
推理执行模块 - 执行模型推理并收集性能数据
统计分析模块 - 对耗时数据进行可视化展示
二、核心代码实现
AI模型管理服务(Java)
// AiModelManager.java
public class AiModelManager {
private static final String TAG = “AiModelManager”;
private AIDL aiEngine;
private Map<String, ModelInfo> loadedModels = new HashMap<>();
// 初始化AI引擎
public void init(Context context) {
    aiEngine = AIDL.create(context);
    aiEngine.setConfig(new AIDLConfig()
        .setPerformanceMode(AIDLConfig.PERFORMANCE_HIGH)
        .setPreference(AIDLConfig.PREFERENCE_FAST_SINGLE_ANSWER)
    );
// 加载模型
public boolean loadModel(String modelName, String modelPath) {
    try {
        long startTime = System.currentTimeMillis();
        
        ModelInfo modelInfo = new ModelInfo();
        modelInfo.model = aiEngine.loadModel(modelPath);
        modelInfo.loadTime = System.currentTimeMillis() - startTime;
        
        loadedModels.put(modelName, modelInfo);
        HiLog.info(TAG, "模型加载完成: " + modelName);
        return true;
catch (AIDLException e) {
        HiLog.error(TAG, "模型加载失败: " + e.getMessage());
        return false;
}
// 执行推理
public InferenceResult runInference(String modelName, AIDLTensor input) {
    ModelInfo modelInfo = loadedModels.get(modelName);
    if (modelInfo == null) {
        throw new IllegalStateException("模型未加载: " + modelName);
try {
        long startTime = System.currentTimeMillis();
        
        AIDLTensor output = aiEngine.run(modelInfo.model, input);
        
        long inferenceTime = System.currentTimeMillis() - startTime;
        modelInfo.addInferenceTime(inferenceTime);
        
        return new InferenceResult(output, inferenceTime);
catch (AIDLException e) {
        HiLog.error(TAG, "推理执行失败: " + e.getMessage());
        throw new RuntimeException(e);
}
// 获取模型统计信息
public ModelStats getModelStats(String modelName) {
    ModelInfo modelInfo = loadedModels.get(modelName);
    if (modelInfo == null) {
        return null;
return modelInfo.getStats();
// 模型信息封装类
private static class ModelInfo {
    AIDLModel model;
    long loadTime;
    List<Long> inferenceTimes = new ArrayList<>();
    
    void addInferenceTime(long time) {
        inferenceTimes.add(time);
ModelStats getStats() {
        ModelStats stats = new ModelStats();
        stats.loadTime = loadTime;
        
        if (!inferenceTimes.isEmpty()) {
            stats.avgInferenceTime = (long) inferenceTimes.stream()
                .mapToLong(Long::longValue)
                .average()
                .orElse(0);
            
            stats.minInferenceTime = inferenceTimes.stream()
                .mapToLong(Long::longValue)
                .min()
                .orElse(0);
            
            stats.maxInferenceTime = inferenceTimes.stream()
                .mapToLong(Long::longValue)
                .max()
                .orElse(0);
return stats;
}
// 模型统计信息
public static class ModelStats {
    public long loadTime;
    public long avgInferenceTime;
    public long minInferenceTime;
    public long maxInferenceTime;
// 推理结果
public static class InferenceResult {
    public final AIDLTensor output;
    public final long inferenceTime;
    
    public InferenceResult(AIDLTensor output, long inferenceTime) {
        this.output = output;
        this.inferenceTime = inferenceTime;
}
推理耗时统计界面(ArkTS)
// AiInferenceStats.ets
import ai from ‘@ohos.ai’;
import modelManager from ‘…/services/AiModelManager’;
@Entry
@Component
struct AiInferenceStats {
@State modelStats: ModelStats | null = null;
@State inferenceHistory: InferenceRecord[] = [];
@State currentModel: string = ‘face_detection’;
aboutToAppear() {
this.loadModelStats();
private loadModelStats() {
modelManager.getModelStats(this.currentModel, (stats) => {
  this.modelStats = stats;
  this.loadInferenceHistory();
});
private loadInferenceHistory() {
modelManager.getInferenceHistory(this.currentModel, 20, (records) => {
  this.inferenceHistory = records;
});
build() {
Column() {
  // 模型选择器
  this.buildModelSelector()
  
  // 统计概览
  if (this.modelStats) {
    this.buildStatsOverview()
// 耗时历史图表
  this.buildInferenceChart()
  
  // 历史记录列表
  this.buildHistoryList()
.padding(10)
@Builder
private buildModelSelector() {
Row() {
Text(‘选择模型:’)
.fontSize(16)
  Select([
value: ‘face_detection’, name: ‘人脸检测’ },
value: ‘object_detection’, name: ‘目标检测’ },
value: ‘image_classification’, name: ‘图像分类’ }
  ], this.currentModel)
  .onSelect((value: string) => {
    this.currentModel = value;
    this.loadModelStats();
  })
.margin({ bottom: 20 })
@Builder
private buildStatsOverview() {
Column() {
StatItem({
label: ‘模型加载耗时’,
value: ${this.modelStats.loadTime}ms,
icon: ‘resources/load.png’
})
  StatItem({
    label: '平均推理耗时',
    value: ${this.modelStats.avgInferenceTime}ms,
    icon: 'resources/avg.png'
  })
  
  StatItem({
    label: '最快推理耗时',
    value: ${this.modelStats.minInferenceTime}ms,
    icon: 'resources/min.png'
  })
  
  StatItem({
    label: '最慢推理耗时',
    value: ${this.modelStats.maxInferenceTime}ms,
    icon: 'resources/max.png'
  })
.width(‘100%’)
.margin({ bottom: 20 })
@Builder
private buildInferenceChart() {
Column() {
Text(‘推理耗时趋势’)
.fontSize(16)
.margin({ bottom: 10 })
  LineChart({
    data: this.inferenceHistory.map(r => r.time),
    config: {
      height: 200,
      lineColor: '#409EFF',
      showXAxis: true,
      showYAxis: true
})
.width(‘100%’)
.margin({ bottom: 20 })
@Builder
private buildHistoryList() {
Column() {
Text(‘最近推理记录’)
.fontSize(16)
.margin({ bottom: 10 })
  List() {
    ForEach(this.inferenceHistory, (record) => {
      ListItem() {
        Row() {
          Text(#${record.id})
            .width('15%')
          
          Text(${record.time}ms)
            .width('25%')
            .textAlign(TextAlign.End)
          
          Text(new Date(record.timestamp).toLocaleTimeString())
            .width('30%')
            .textAlign(TextAlign.End)
          
          PerformanceTag({
            time: record.time,
            threshold: this.modelStats?.avgInferenceTime || 0
          })
.padding(10)
})
.height(200)
}
@Component
struct StatItem {
@Prop label: string
@Prop value: string
@Prop icon: string
build() {
Row() {
Image(this.icon)
.width(24)
.height(24)
.margin({ right: 10 })
  Text(this.label)
    .fontSize(14)
    .layoutWeight(1)
  
  Text(this.value)
    .fontSize(14)
    .fontColor('#409EFF')
.height(40)
.borderRadius(8)
.backgroundColor('#F5F5F5')
.padding(10)
.margin({ bottom: 8 })
}
@Component
struct PerformanceTag {
@Prop time: number
@Prop threshold: number
build() {
const isGood = this.time <= this.threshold;
Text(isGood ? '优' : '良')
  .fontSize(12)
  .fontColor(isGood ? '#67C23A' : '#E6A23C')
  .backgroundColor(isGood ? '#f0f9eb' : '#fdf6ec')
  .borderRadius(4)
  .padding(2)
  .width('30%')
  .textAlign(TextAlign.Center)
}
interface ModelStats {
loadTime: number;
avgInferenceTime: number;
minInferenceTime: number;
maxInferenceTime: number;
interface InferenceRecord {
id: number;
time: number;
timestamp: number;
模型推理测试组件(ArkTS)
// AiInferenceTest.ets
import ai from ‘@ohos.ai’;
import modelManager from ‘…/services/AiModelManager’;
@Entry
@Component
struct AiInferenceTest {
@State testImage: PixelMap | null = null;
@State testResults: TestResult[] = [];
@State isTesting: boolean = false;
aboutToAppear() {
this.loadTestImage();
private loadTestImage() {
// 加载测试图片
image.createPixelMapFromFile('resources/test_image.jpg', (err, pixelMap) => {
  if (!err && pixelMap) {
    this.testImage = pixelMap;
});
build() {
Column() {
  // 测试图片预览
  if (this.testImage) {
    Image(this.testImage)
      .width(200)
      .height(200)
      .margin(10)
// 测试按钮
  Button(this.isTesting ? '测试中...' : '开始性能测试')
    .onClick(() => this.runTests())
    .disabled(this.isTesting)
    .width('80%')
    .margin(10)
  
  // 测试结果
  if (this.testResults.length > 0) {
    this.buildTestResults()
}
@Builder
private buildTestResults() {
Column() {
Text(‘测试结果’)
.fontSize(18)
.margin(10)
  Grid() {
    ForEach(this.testResults, (result) => {
      GridItem() {
        TestResultCard({ result })
})
.columnsTemplate(‘1fr 1fr’)
  .columnsGap(10)
  .rowsGap(10)
}
private async runTests() {
if (!this.testImage) return;
this.isTesting = true;
this.testResults = [];
// 准备测试输入
const inputTensor = await ai.createTensorFromPixelMap(this.testImage);
// 测试不同模型
const models = [
  'face_detection',
  'object_detection',
  'image_classification'
];
for (const model of models) {
  const result = await this.runSingleTest(model, inputTensor);
  this.testResults = [...this.testResults, result];
this.isTesting = false;
private async runSingleTest(modelName: string, input: AIDLTensor): Promise<TestResult> {
// 确保模型已加载
if (!modelManager.isModelLoaded(modelName)) {
  await modelManager.loadModel(modelName);
// 预热运行
await modelManager.runInference(modelName, input);
// 正式测试运行
const startTime = Date.now();
const result = await modelManager.runInference(modelName, input);
const endTime = Date.now();
return {
  modelName,
  inferenceTime: result.inferenceTime,
  totalTime: endTime - startTime,
  timestamp: Date.now()
};
}
@Component
struct TestResultCard {
@Prop result: TestResult
build() {
Column() {
Text(this.getModelDisplayName())
.fontSize(16)
.fontWeight(FontWeight.Bold)
.margin({ bottom: 5 })
  Divider()
  
  Row() {
    Text('推理耗时:')
      .fontSize(14)
    
    Text(${this.result.inferenceTime}ms)
      .fontSize(14)
      .fontColor('#409EFF')
.margin({ top: 5 })
  Row() {
    Text('总耗时:')
      .fontSize(14)
    
    Text(${this.result.totalTime}ms)
      .fontSize(14)
      .fontColor('#67C23A')
.margin({ top: 5 })
.padding(10)
.borderRadius(8)
.backgroundColor('#F5F5F5')
private getModelDisplayName(): string {
switch(this.result.modelName) {
  case 'face_detection': return '人脸检测';
  case 'object_detection': return '目标检测';
  case 'image_classification': return '图像分类';
  default: return this.result.modelName;
}
interface TestResult {
modelName: string;
inferenceTime: number;
totalTime: number;
timestamp: number;
三、关键技术实现
推理耗时统计流程
sequenceDiagram
participant 应用
participant AI框架
participant 模型
应用->>AI框架: 加载模型
AI框架->>模型: 初始化
模型-->>AI框架: 加载完成
AI框架-->>应用: 返回加载耗时
应用->>AI框架: 执行推理
AI框架->>模型: 前向计算
模型-->>AI框架: 推理结果
AI框架-->>应用: 返回推理耗时
应用->>应用: 记录性能数据
性能数据采集点
数据点 采集方式 说明
模型加载耗时 System.currentTimeMillis()差值 从开始加载到加载完成的时间
推理耗时 System.currentTimeMillis()差值 从输入数据到输出结果的时间
内存占用 Runtime.getRuntime() 推理前后的内存变化
CPU利用率 ActivityManager.getProcessMemoryInfo() 推理期间的CPU使用率
跨设备性能对比
// 跨设备性能对比服务
public class CrossDeviceCompare {
public static void comparePerformance(String modelName, List<DeviceInfo> devices) {
ExecutorService executor = Executors.newFixedThreadPool(devices.size());
List<Future<DevicePerf>> futures = new ArrayList<>();
    // 在每个设备上运行测试
    for (DeviceInfo device : devices) {
        futures.add(executor.submit(() -> {
            AiModelManager manager = connectToDevice(device);
            return runPerformanceTest(manager, modelName);
        }));
// 收集结果
    List<DevicePerf> results = new ArrayList<>();
    for (Future<DevicePerf> future : futures) {
        results.add(future.get());
// 生成对比报告
    generateReport(modelName, results);
}
四、测试方案
基准测试用例(Java)
public class AiInferenceBenchmark {
private AiModelManager modelManager;
private static final int WARMUP_ROUNDS = 5;
private static final int TEST_ROUNDS = 10;
@Before
public void setup() {
    modelManager = new AiModelManager();
    modelManager.init(getContext());
@Test
public void benchmarkFaceDetection() {
    benchmarkModel("face_detection");
@Test
public void benchmarkObjectDetection() {
    benchmarkModel("object_detection");
private void benchmarkModel(String modelName) {
    // 加载模型
    assertTrue(modelManager.loadModel(modelName, getModelPath(modelName)));
    
    // 准备输入数据
    AIDLTensor input = createTestInput();
    
    // 预热运行
    for (int i = 0; i < WARMUP_ROUNDS; i++) {
        modelManager.runInference(modelName, input);
// 正式测试
    long totalTime = 0;
    for (int i = 0; i < TEST_ROUNDS; i++) {
        long startTime = System.nanoTime();
        modelManager.runInference(modelName, input);
        long endTime = System.nanoTime();
        totalTime += (endTime - startTime);
long avgTime = totalTime / TEST_ROUNDS / 1000000; // 转换为毫秒
    HiLog.info("AiInferenceBenchmark", 
        "模型 %{public}s 平均推理耗时: %{public}dms", modelName, avgTime);
    
    // 验证性能指标
    assertTrue("推理耗时超过阈值", avgTime < getThresholdForModel(modelName));
}
性能回归测试(ArkTS)
// AiPerformanceTest.ets
@Entry
@Component
struct AiPerformanceTest {
@State baselineResults: TestResult[] = [];
@State currentResults: TestResult[] = [];
@State regressionItems: RegressionItem[] = [];
aboutToAppear() {
this.loadBaselineData();
private loadBaselineData() {
// 从本地存储加载基线数据
storage.get('ai_perf_baseline', (err, data) => {
  if (!err && data) {
    this.baselineResults = JSON.parse(data);
});
private saveAsBaseline() {
storage.set('ai_perf_baseline', JSON.stringify(this.currentResults));
private async runRegressionTest() {
const tester = new AiInferenceTester();
this.currentResults = await tester.runAllTests();
// 对比基线数据
this.regressionItems = this.compareWithBaseline();
private compareWithBaseline(): RegressionItem[] {
if (this.baselineResults.length === 0) return [];
return this.currentResults.map(current => {
  const baseline = this.baselineResults.find(b => b.modelName === current.modelName);
  if (!baseline) {
    return {
      modelName: current.modelName,
      change: 'new',
      current: current.inferenceTime,
      baseline: 0
    };
const ratio = current.inferenceTime / baseline.inferenceTime;
  return {
    modelName: current.modelName,
    change: ratio > 1.1 ? 'worse' : 
           ratio < 0.9 ? 'better' : 'same',
    current: current.inferenceTime,
    baseline: baseline.inferenceTime
  };
});
build() {
Column() {
  Button('运行性能测试')
    .onClick(() => this.runRegressionTest())
    .width('80%')
    .margin(10)
  
  Button('设为基线')
    .onClick(() => this.saveAsBaseline())
    .width('80%')
    .margin(10)
  
  if (this.regressionItems.length > 0) {
    this.buildRegressionReport()
}
@Builder
private buildRegressionReport() {
Column() {
Text(‘性能回归报告’)
.fontSize(18)
.margin(10)
  List() {
    ForEach(this.regressionItems, (item) => {
      ListItem() {
        RegressionItemCard({ item })
})
.height(300)
}
@Component
struct RegressionItemCard {
@Prop item: RegressionItem
build() {
Row() {
Text(this.item.modelName)
.fontSize(16)
.layoutWeight(1)
  Column() {
    Text(当前: ${this.item.current}ms)
    Text(基线: ${this.item.baseline}ms)
.layoutWeight(1)
  Text(this.getChangeText())
    .fontColor(this.getChangeColor())
    .fontSize(16)
.padding(10)
private getChangeText(): string {
switch(this.item.change) {
  case 'better': return '↑ 提升';
  case 'worse': return '↓ 下降';
  case 'same': return '→ 持平';
  default: return '新测试';
}
private getChangeColor(): string {
switch(this.item.change) {
case ‘better’: return ‘#67C23A’;
case ‘worse’: return ‘#F56C6C’;
default: return ‘#909399’;
}
interface RegressionItem {
modelName: string;
change: ‘better’ ‘worse’ ‘same’
‘new’;
current: number;
baseline: number;
五、总结与展望
本方案实现了以下核心功能:
精准耗时统计:精确测量模型加载和推理各阶段耗时
多维度分析:支持平均耗时、最大最小耗时等统计指标
可视化展示:直观的图表展示性能数据和趋势
跨设备对比:支持不同设备间的性能对比分析
未来优化方向:
增加GPU/NPU硬件加速统计
集成内存和功耗监控
支持自动化性能回归测试
增加AI模型优化建议功能
通过本方案,开发者可以全面了解AI模型在鸿蒙设备上的性能表现,为优化模型和提升用户体验提供数据支持。




















