鸿蒙端侧AI模型推理耗时统计方案设计与实现 原创

进修的泡芙
发布于 2025-6-17 20:48
浏览
0收藏

鸿蒙端侧AI模型推理耗时统计方案设计与实现

一、系统架构设计

基于HarmonyOS的AI框架,我们设计了一套端侧AI模型推理耗时统计系统,用于分析和优化跨设备场景下的AI推理性能。

!https://example.com/ai-inference-arch.png

系统包含三大核心模块:
模型管理模块 - 负责AI模型的加载与配置

推理执行模块 - 执行模型推理并收集性能数据

统计分析模块 - 对耗时数据进行可视化展示

二、核心代码实现
AI模型管理服务(Java)

// AiModelManager.java
public class AiModelManager {
private static final String TAG = “AiModelManager”;
private AIDL aiEngine;
private Map<String, ModelInfo> loadedModels = new HashMap<>();

// 初始化AI引擎
public void init(Context context) {
    aiEngine = AIDL.create(context);
    aiEngine.setConfig(new AIDLConfig()
        .setPerformanceMode(AIDLConfig.PERFORMANCE_HIGH)
        .setPreference(AIDLConfig.PREFERENCE_FAST_SINGLE_ANSWER)
    );

// 加载模型

public boolean loadModel(String modelName, String modelPath) {
    try {
        long startTime = System.currentTimeMillis();
        
        ModelInfo modelInfo = new ModelInfo();
        modelInfo.model = aiEngine.loadModel(modelPath);
        modelInfo.loadTime = System.currentTimeMillis() - startTime;
        
        loadedModels.put(modelName, modelInfo);
        HiLog.info(TAG, "模型加载完成: " + modelName);
        return true;

catch (AIDLException e) {

        HiLog.error(TAG, "模型加载失败: " + e.getMessage());
        return false;

}

// 执行推理
public InferenceResult runInference(String modelName, AIDLTensor input) {
    ModelInfo modelInfo = loadedModels.get(modelName);
    if (modelInfo == null) {
        throw new IllegalStateException("模型未加载: " + modelName);

try {

        long startTime = System.currentTimeMillis();
        
        AIDLTensor output = aiEngine.run(modelInfo.model, input);
        
        long inferenceTime = System.currentTimeMillis() - startTime;
        modelInfo.addInferenceTime(inferenceTime);
        
        return new InferenceResult(output, inferenceTime);

catch (AIDLException e) {

        HiLog.error(TAG, "推理执行失败: " + e.getMessage());
        throw new RuntimeException(e);

}

// 获取模型统计信息
public ModelStats getModelStats(String modelName) {
    ModelInfo modelInfo = loadedModels.get(modelName);
    if (modelInfo == null) {
        return null;

return modelInfo.getStats();

// 模型信息封装类

private static class ModelInfo {
    AIDLModel model;
    long loadTime;
    List<Long> inferenceTimes = new ArrayList<>();
    
    void addInferenceTime(long time) {
        inferenceTimes.add(time);

ModelStats getStats() {

        ModelStats stats = new ModelStats();
        stats.loadTime = loadTime;
        
        if (!inferenceTimes.isEmpty()) {
            stats.avgInferenceTime = (long) inferenceTimes.stream()
                .mapToLong(Long::longValue)
                .average()
                .orElse(0);
            
            stats.minInferenceTime = inferenceTimes.stream()
                .mapToLong(Long::longValue)
                .min()
                .orElse(0);
            
            stats.maxInferenceTime = inferenceTimes.stream()
                .mapToLong(Long::longValue)
                .max()
                .orElse(0);

return stats;

}

// 模型统计信息
public static class ModelStats {
    public long loadTime;
    public long avgInferenceTime;
    public long minInferenceTime;
    public long maxInferenceTime;

// 推理结果

public static class InferenceResult {
    public final AIDLTensor output;
    public final long inferenceTime;
    
    public InferenceResult(AIDLTensor output, long inferenceTime) {
        this.output = output;
        this.inferenceTime = inferenceTime;

}

推理耗时统计界面(ArkTS)

// AiInferenceStats.ets
import ai from ‘@ohos.ai’;
import modelManager from ‘…/services/AiModelManager’;

@Entry
@Component
struct AiInferenceStats {
@State modelStats: ModelStats | null = null;
@State inferenceHistory: InferenceRecord[] = [];
@State currentModel: string = ‘face_detection’;

aboutToAppear() {
this.loadModelStats();
private loadModelStats() {

modelManager.getModelStats(this.currentModel, (stats) => {
  this.modelStats = stats;
  this.loadInferenceHistory();
});

private loadInferenceHistory() {

modelManager.getInferenceHistory(this.currentModel, 20, (records) => {
  this.inferenceHistory = records;
});

build() {

Column() {
  // 模型选择器
  this.buildModelSelector()
  
  // 统计概览
  if (this.modelStats) {
    this.buildStatsOverview()

// 耗时历史图表

  this.buildInferenceChart()
  
  // 历史记录列表
  this.buildHistoryList()

.padding(10)

@Builder

private buildModelSelector() {
Row() {
Text(‘选择模型:’)
.fontSize(16)

  Select([

value: ‘face_detection’, name: ‘人脸检测’ },

value: ‘object_detection’, name: ‘目标检测’ },

value: ‘image_classification’, name: ‘图像分类’ }

  ], this.currentModel)
  .onSelect((value: string) => {
    this.currentModel = value;
    this.loadModelStats();
  })

.margin({ bottom: 20 })

@Builder

private buildStatsOverview() {
Column() {
StatItem({
label: ‘模型加载耗时’,
value: ${this.modelStats.loadTime}ms,
icon: ‘resources/load.png’
})

  StatItem({
    label: '平均推理耗时',
    value: ${this.modelStats.avgInferenceTime}ms,
    icon: 'resources/avg.png'
  })
  
  StatItem({
    label: '最快推理耗时',
    value: ${this.modelStats.minInferenceTime}ms,
    icon: 'resources/min.png'
  })
  
  StatItem({
    label: '最慢推理耗时',
    value: ${this.modelStats.maxInferenceTime}ms,
    icon: 'resources/max.png'
  })

.width(‘100%’)

.margin({ bottom: 20 })

@Builder

private buildInferenceChart() {
Column() {
Text(‘推理耗时趋势’)
.fontSize(16)
.margin({ bottom: 10 })

  LineChart({
    data: this.inferenceHistory.map(r => r.time),
    config: {
      height: 200,
      lineColor: '#409EFF',
      showXAxis: true,
      showYAxis: true

})

.width(‘100%’)

.margin({ bottom: 20 })

@Builder

private buildHistoryList() {
Column() {
Text(‘最近推理记录’)
.fontSize(16)
.margin({ bottom: 10 })

  List() {
    ForEach(this.inferenceHistory, (record) => {
      ListItem() {
        Row() {
          Text(#${record.id})
            .width('15%')
          
          Text(${record.time}ms)
            .width('25%')
            .textAlign(TextAlign.End)
          
          Text(new Date(record.timestamp).toLocaleTimeString())
            .width('30%')
            .textAlign(TextAlign.End)
          
          PerformanceTag({
            time: record.time,
            threshold: this.modelStats?.avgInferenceTime || 0
          })

.padding(10)

})

.height(200)

}

@Component

struct StatItem {
@Prop label: string
@Prop value: string
@Prop icon: string

build() {
Row() {
Image(this.icon)
.width(24)
.height(24)
.margin({ right: 10 })

  Text(this.label)
    .fontSize(14)
    .layoutWeight(1)
  
  Text(this.value)
    .fontSize(14)
    .fontColor('#409EFF')

.height(40)

.borderRadius(8)
.backgroundColor('#F5F5F5')
.padding(10)
.margin({ bottom: 8 })

}

@Component
struct PerformanceTag {
@Prop time: number
@Prop threshold: number

build() {
const isGood = this.time <= this.threshold;

Text(isGood ? '优' : '良')
  .fontSize(12)
  .fontColor(isGood ? '#67C23A' : '#E6A23C')
  .backgroundColor(isGood ? '#f0f9eb' : '#fdf6ec')
  .borderRadius(4)
  .padding(2)
  .width('30%')
  .textAlign(TextAlign.Center)

}

interface ModelStats {
loadTime: number;
avgInferenceTime: number;
minInferenceTime: number;
maxInferenceTime: number;
interface InferenceRecord {

id: number;
time: number;
timestamp: number;

模型推理测试组件(ArkTS)

// AiInferenceTest.ets
import ai from ‘@ohos.ai’;
import modelManager from ‘…/services/AiModelManager’;

@Entry
@Component
struct AiInferenceTest {
@State testImage: PixelMap | null = null;
@State testResults: TestResult[] = [];
@State isTesting: boolean = false;

aboutToAppear() {
this.loadTestImage();
private loadTestImage() {

// 加载测试图片
image.createPixelMapFromFile('resources/test_image.jpg', (err, pixelMap) => {
  if (!err && pixelMap) {
    this.testImage = pixelMap;

});

build() {

Column() {
  // 测试图片预览
  if (this.testImage) {
    Image(this.testImage)
      .width(200)
      .height(200)
      .margin(10)

// 测试按钮

  Button(this.isTesting ? '测试中...' : '开始性能测试')
    .onClick(() => this.runTests())
    .disabled(this.isTesting)
    .width('80%')
    .margin(10)
  
  // 测试结果
  if (this.testResults.length > 0) {
    this.buildTestResults()

}

@Builder

private buildTestResults() {
Column() {
Text(‘测试结果’)
.fontSize(18)
.margin(10)

  Grid() {
    ForEach(this.testResults, (result) => {
      GridItem() {
        TestResultCard({ result })

})

.columnsTemplate(‘1fr 1fr’)

  .columnsGap(10)
  .rowsGap(10)

}

private async runTests() {
if (!this.testImage) return;

this.isTesting = true;
this.testResults = [];

// 准备测试输入
const inputTensor = await ai.createTensorFromPixelMap(this.testImage);

// 测试不同模型
const models = [
  'face_detection',
  'object_detection',
  'image_classification'
];

for (const model of models) {
  const result = await this.runSingleTest(model, inputTensor);
  this.testResults = [...this.testResults, result];

this.isTesting = false;

private async runSingleTest(modelName: string, input: AIDLTensor): Promise<TestResult> {

// 确保模型已加载
if (!modelManager.isModelLoaded(modelName)) {
  await modelManager.loadModel(modelName);

// 预热运行

await modelManager.runInference(modelName, input);

// 正式测试运行
const startTime = Date.now();
const result = await modelManager.runInference(modelName, input);
const endTime = Date.now();

return {
  modelName,
  inferenceTime: result.inferenceTime,
  totalTime: endTime - startTime,
  timestamp: Date.now()
};

}

@Component
struct TestResultCard {
@Prop result: TestResult

build() {
Column() {
Text(this.getModelDisplayName())
.fontSize(16)
.fontWeight(FontWeight.Bold)
.margin({ bottom: 5 })

  Divider()
  
  Row() {
    Text('推理耗时:')
      .fontSize(14)
    
    Text(${this.result.inferenceTime}ms)
      .fontSize(14)
      .fontColor('#409EFF')

.margin({ top: 5 })

  Row() {
    Text('总耗时:')
      .fontSize(14)
    
    Text(${this.result.totalTime}ms)
      .fontSize(14)
      .fontColor('#67C23A')

.margin({ top: 5 })

.padding(10)

.borderRadius(8)
.backgroundColor('#F5F5F5')

private getModelDisplayName(): string {

switch(this.result.modelName) {
  case 'face_detection': return '人脸检测';
  case 'object_detection': return '目标检测';
  case 'image_classification': return '图像分类';
  default: return this.result.modelName;

}

interface TestResult {

modelName: string;
inferenceTime: number;
totalTime: number;
timestamp: number;

三、关键技术实现
推理耗时统计流程

sequenceDiagram
participant 应用
participant AI框架
participant 模型

应用->>AI框架: 加载模型
AI框架->>模型: 初始化
模型-->>AI框架: 加载完成
AI框架-->>应用: 返回加载耗时

应用->>AI框架: 执行推理
AI框架->>模型: 前向计算
模型-->>AI框架: 推理结果
AI框架-->>应用: 返回推理耗时

应用->>应用: 记录性能数据

性能数据采集点

数据点 采集方式 说明

模型加载耗时 System.currentTimeMillis()差值 从开始加载到加载完成的时间
推理耗时 System.currentTimeMillis()差值 从输入数据到输出结果的时间
内存占用 Runtime.getRuntime() 推理前后的内存变化
CPU利用率 ActivityManager.getProcessMemoryInfo() 推理期间的CPU使用率

跨设备性能对比

// 跨设备性能对比服务
public class CrossDeviceCompare {
public static void comparePerformance(String modelName, List<DeviceInfo> devices) {
ExecutorService executor = Executors.newFixedThreadPool(devices.size());
List<Future<DevicePerf>> futures = new ArrayList<>();

    // 在每个设备上运行测试
    for (DeviceInfo device : devices) {
        futures.add(executor.submit(() -> {
            AiModelManager manager = connectToDevice(device);
            return runPerformanceTest(manager, modelName);
        }));

// 收集结果

    List<DevicePerf> results = new ArrayList<>();
    for (Future<DevicePerf> future : futures) {
        results.add(future.get());

// 生成对比报告

    generateReport(modelName, results);

}

四、测试方案
基准测试用例(Java)

public class AiInferenceBenchmark {
private AiModelManager modelManager;
private static final int WARMUP_ROUNDS = 5;
private static final int TEST_ROUNDS = 10;

@Before
public void setup() {
    modelManager = new AiModelManager();
    modelManager.init(getContext());

@Test

public void benchmarkFaceDetection() {
    benchmarkModel("face_detection");

@Test

public void benchmarkObjectDetection() {
    benchmarkModel("object_detection");

private void benchmarkModel(String modelName) {

    // 加载模型
    assertTrue(modelManager.loadModel(modelName, getModelPath(modelName)));
    
    // 准备输入数据
    AIDLTensor input = createTestInput();
    
    // 预热运行
    for (int i = 0; i < WARMUP_ROUNDS; i++) {
        modelManager.runInference(modelName, input);

// 正式测试

    long totalTime = 0;
    for (int i = 0; i < TEST_ROUNDS; i++) {
        long startTime = System.nanoTime();
        modelManager.runInference(modelName, input);
        long endTime = System.nanoTime();
        totalTime += (endTime - startTime);

long avgTime = totalTime / TEST_ROUNDS / 1000000; // 转换为毫秒

    HiLog.info("AiInferenceBenchmark", 
        "模型 %{public}s 平均推理耗时: %{public}dms", modelName, avgTime);
    
    // 验证性能指标
    assertTrue("推理耗时超过阈值", avgTime < getThresholdForModel(modelName));

}

性能回归测试(ArkTS)

// AiPerformanceTest.ets
@Entry
@Component
struct AiPerformanceTest {
@State baselineResults: TestResult[] = [];
@State currentResults: TestResult[] = [];
@State regressionItems: RegressionItem[] = [];

aboutToAppear() {
this.loadBaselineData();
private loadBaselineData() {

// 从本地存储加载基线数据
storage.get('ai_perf_baseline', (err, data) => {
  if (!err && data) {
    this.baselineResults = JSON.parse(data);

});

private saveAsBaseline() {

storage.set('ai_perf_baseline', JSON.stringify(this.currentResults));

private async runRegressionTest() {

const tester = new AiInferenceTester();
this.currentResults = await tester.runAllTests();

// 对比基线数据
this.regressionItems = this.compareWithBaseline();

private compareWithBaseline(): RegressionItem[] {

if (this.baselineResults.length === 0) return [];

return this.currentResults.map(current => {
  const baseline = this.baselineResults.find(b => b.modelName === current.modelName);
  if (!baseline) {
    return {
      modelName: current.modelName,
      change: 'new',
      current: current.inferenceTime,
      baseline: 0
    };

const ratio = current.inferenceTime / baseline.inferenceTime;

  return {
    modelName: current.modelName,
    change: ratio > 1.1 ? 'worse' : 
           ratio < 0.9 ? 'better' : 'same',
    current: current.inferenceTime,
    baseline: baseline.inferenceTime
  };
});

build() {

Column() {
  Button('运行性能测试')
    .onClick(() => this.runRegressionTest())
    .width('80%')
    .margin(10)
  
  Button('设为基线')
    .onClick(() => this.saveAsBaseline())
    .width('80%')
    .margin(10)
  
  if (this.regressionItems.length > 0) {
    this.buildRegressionReport()

}

@Builder

private buildRegressionReport() {
Column() {
Text(‘性能回归报告’)
.fontSize(18)
.margin(10)

  List() {
    ForEach(this.regressionItems, (item) => {
      ListItem() {
        RegressionItemCard({ item })

})

.height(300)

}

@Component

struct RegressionItemCard {
@Prop item: RegressionItem

build() {
Row() {
Text(this.item.modelName)
.fontSize(16)
.layoutWeight(1)

  Column() {
    Text(当前: ${this.item.current}ms)
    Text(基线: ${this.item.baseline}ms)

.layoutWeight(1)

  Text(this.getChangeText())
    .fontColor(this.getChangeColor())
    .fontSize(16)

.padding(10)

private getChangeText(): string {

switch(this.item.change) {
  case 'better': return '↑ 提升';
  case 'worse': return '↓ 下降';
  case 'same': return '→ 持平';
  default: return '新测试';

}

private getChangeColor(): string {
switch(this.item.change) {
case ‘better’: return ‘#67C23A’;
case ‘worse’: return ‘#F56C6C’;
default: return ‘#909399’;
}

interface RegressionItem {

modelName: string;
change: ‘better’ ‘worse’ ‘same’
‘new’;
current: number;
baseline: number;

五、总结与展望

本方案实现了以下核心功能:
精准耗时统计:精确测量模型加载和推理各阶段耗时

多维度分析:支持平均耗时、最大最小耗时等统计指标

可视化展示:直观的图表展示性能数据和趋势

跨设备对比:支持不同设备间的性能对比分析

未来优化方向:
增加GPU/NPU硬件加速统计

集成内存和功耗监控

支持自动化性能回归测试

增加AI模型优化建议功能

通过本方案,开发者可以全面了解AI模型在鸿蒙设备上的性能表现,为优化模型和提升用户体验提供数据支持。

©著作权归作者所有,如需转载,请注明出处,否则将追究法律责任
收藏
回复
举报
回复
    相关推荐