Ubuntu 20.02 & Mali Drivers

@icecream95 are the cpu calcs for cpu and not neon as we can test that also.
GpuAcc = Gpu, CpuAcc = Really is Neon, CpuRef = Cpu

I slept and had a look at the code and thought it would be easy.
It processes all audio into an array 1st so you may think has froze and then just runs the model.

python3 run_audio_file.py --audio_file_path samples/hp0.wav --model_file_path tflite_int8/wav2letter_int8.tflite --preferred_backends CpuAcc CpuRef

cpu seems to take approx 05.517623s and approx 45% load
gpu seems to take approx :06.291094 and approx 5% cpu load 75% gpu load

That is with a hacky python script feeding it but now very little in the inference loop.
There is some overhead of armnn and opencl but this is exactly what I wanted to check.
The rk3588 really is a power house for ml as its Mali mp4 is almost a perfect match for CPU
So we can run 2x models that are approx 2x the load of current and would be interesting to see how the mali also reacts with bigger models.

I had a go with tensorflow TTS as its a much heavier load but working out how to slot in armnn and the quantisation specifics is going to be bigger dev chore than what the above can demo.

# Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT

"""Automatic speech recognition with PyArmNN demo for processing audio clips to text."""

import sys
import os
import numpy as np
import psutil
import soundfile as sf
script_dir = os.path.dirname(__file__)
sys.path.insert(1, os.path.join(script_dir, '..', 'common'))

from argparse import ArgumentParser
from network_executor import ArmnnNetworkExecutor
from utils import prepare_input_data
from audio_capture import AudioCaptureParams, capture_audio
from audio_utils import decode_text, display_text
from wav2letter_mfcc import Wav2LetterMFCC, W2LAudioPreprocessor
from mfcc import MFCCParams
from datetime import datetime, timedelta

# Model Specific Labels
labels = {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l', 12: 'm',
          13: 'n',
          14: 'o', 15: 'p', 16: 'q', 17: 'r', 18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w', 23: 'x', 24: 'y',
          25: 'z',
          26: "'", 27: ' ', 28: '$'}


def time_float(result):
    seconds = int(result)
    microseconds = int((result * 1000000) % 1000000)
    output = timedelta(0, seconds, microseconds)
    return output

def parse_args():
    parser = ArgumentParser(description="ASR with PyArmNN")
    parser.add_argument(
        "--audio_file_path",
        required=True,
        type=str,
        help="Path to the audio file to perform ASR",
    )
    parser.add_argument(
        "--model_file_path",
        required=True,
        type=str,
        help="Path to ASR model to use",
    )
    parser.add_argument(
        "--preferred_backends",
        type=str,
        nargs="+",
        default=["GpuAcc", "CpuAcc", "CpuRef"],
        help="""List of backends in order of preference for optimizing
        subgraphs, falling back to the next backend in the list on unsupported
        layers. Defaults to [GpuAcc, CpuAcc, CpuRef]""",
    )
    return parser.parse_args()


def main(args, network, input_data):

    current_r_context = ""
    is_first_window = True
    avg_cpu = 0.0
    for input_chunk in input_data:
        # Run inference
        output_result = network.run([input_chunk])

        # Slice and Decode the text, and store the right context
        current_r_context, text = decode_text(is_first_window, labels, output_result)

        is_first_window = False

        display_text(text)
        runtime = datetime.now() - starttime
        print(" " + str(runtime))
        avg_cpu = avg_cpu + psutil.cpu_percent()

    print(current_r_context, flush=True)
    print("Inference End: Avg CPU%=" + str(avg_cpu / len(input_data)))
    return runtime

if __name__ == "__main__":
    args = parse_args()
    # Create the ArmNN inference runner
    network = ArmnnNetworkExecutor(args.model_file_path, args.preferred_backends)
    # Read command line args
    audio_file = args.audio_file_path
    sf_data, samplerate = sf.read(audio_file)
    sf_secs = time_float((len(sf_data) / samplerate))
    # Specify model specific audio data requirements
    audio_capture_params = AudioCaptureParams(dtype=np.float32, overlap=31712, min_samples=47712, sampling_freq=16000,
                                              mono=True)

    buffer = capture_audio(audio_file, audio_capture_params)
    # Extract features and create the preprocessor

    mfcc_params = MFCCParams(sampling_freq=16000, num_fbank_bins=128, mel_lo_freq=0, mel_hi_freq=8000,
                             num_mfcc_feats=13, frame_len=512, use_htk_method=False, n_fft=512)

    wmfcc = Wav2LetterMFCC(mfcc_params)
    preprocessor = W2LAudioPreprocessor(wmfcc, model_input_size=296, stride=160)   
    print("Processing Audio Frames...")
    input_data = []

    for audio_data in buffer:
        # Prepare the input Tensors
        input_data.append(prepare_input_data(audio_data, network.get_data_type(), network.get_input_quantization_scale(0),
                                        network.get_input_quantization_offset(0), preprocessor))
                                        
        
  
    
    starttime = datetime.now()
    runtime = main(args, network, input_data)
    print("Runtime=" + str(runtime))
    print("Realtime=x" + str(sf_secs / runtime))
    starttime = datetime.now()
    runtime = main(args, network, input_data)
    print("Runtime=" + str(runtime))
    print("Realtime=x" + str(sf_secs / runtime))
rock@rock-5b:~/workspace/armnn/python/pyarmnn/examples/speech_recognition$ python3 run_audio_file.py --audio_file_path samples/hp0.wav --model_file_path tflite_int8/wav2letter_int8.tflite --preferred_backends CpuAcc CpuRef
Your ArmNN library instance does not support Onnx models parser functionality.  Skipped IOnnxParser import.
Can't load libOpenCL.so: libOpenCL.so: cannot open shared object file: No such file or directory
Can't load libGLES_mali.so: libGLES_mali.so: cannot open shared object file: No such file or directory
arm_release_ver of this libmali is 'g6p0-01eac0', rk_so_ver is '5'.
Preferred backends: ['CpuAcc', 'CpuRef']
IDeviceSpec { supportedBackends: [CpuAcc, CpuRef, GpuAcc]}
Optimization warnings: ()
Processing Audio Frames...
 henry f 0:00:00.039640
 phillips 0:00:00.081647
 from wic 0:00:00.111952
epedeia 0:00:00.134524
 the free ind cy 0:00:00.153441
ycliopedioa 0:00:00.172588
 at e 0:00:00.191404
 and d 0:00:00.210939
ot we cud 0:00:00.230029
pedia tha 0:00:00.248594
t org 0:00:00.266827
 0:00:00.285864
 henry 0:00:00.305864
 f philip 0:00:00.324896
s 0:00:00.344135
 from  0:00:00.363412
 wickepedia  0:00:00.381937
 the freie andcyclo 0:00:00.401530
opedia 0:00:00.420985
 0:00:00.439934
 henry afh 0:00:00.458467
e philips 0:00:00.477728
 eighteen ni 0:00:00.495774
ney to nin 0:00:00.513933
neteen fifty e 0:00:00.532492
ight 0:00:00.551160
 a youas  0:00:00.570730
 businessman 0:00:00.589955
 from portland 0:00:00.608142
d or again 0:00:00.627386
 a 0:00:00.645696
s the honor 0:00:00.664017
 of having the phi 0:00:00.683096
lip's head scre 0:00:00.702685
w and sc 0:00:00.730444
crew driver  0:00:00.752651
 named ater hi 0:00:00.774348
m 0:00:00.793570
 0:00:00.812513
 the importance  0:00:00.831017
 of the cross had i 0:00:00.849545
 screw design 0:00:00.868613
 lies 0:00:00.887060
 in its t self ce 0:00:00.908571
entering property 0:00:00.927869
y u 0:00:00.946793
useful on ad 0:00:00.964844
imated production 0:00:00.984788
n lines t 0:00:01.003689
he use powered 0:00:01.022886
d screw drivers 0:00:01.044580
 0:00:01.065240
 philip 0:00:01.085195
's major contrib 0:00:01.105548
bution was i 0:00:01.124491
n driving the cro 0:00:01.142652
oss head concept 0:00:01.163555
 forward 0:00:01.182046
 to the point 0:00:01.220041
 where it was a 0:00:01.256361
dopted by scre 0:00:01.278730
ew makers an 0:00:01.298773
nd automabiele c 0:00:01.317458
companiese 0:00:01.336092
 0:00:01.354528
 although he recei 0:00:01.373344
ved patents for the 0:00:01.391744
 design in 0:00:01.410014
 nineten thirty  0:00:01.428450
six 0:00:01.449285
 you as pate 0:00:01.467786
nt number  0:00:01.486576
 to million 0:00:01.505696
 forty six 0:00:01.535174
x thousand 0:00:01.563460
 three hundred f 0:00:01.586442
forty three 0:00:01.606334
 you es 0:00:01.625209
 patents t 0:00:01.643606
oo million 0:00:01.661801
 forty six thou 0:00:01.681349
usand aig 0:00:01.701606
he hundred thirty se 0:00:01.734885
ven to t 0:00:01.758276
two million  0:00:01.778433
 forty six thou 0:00:01.797458
sand eight  0:00:01.815965
hundered forty 0:00:01.835066
 0:00:01.853834
 it was so wi 0:00:01.871975
dely copied 0:00:01.890931
 that by nin 0:00:01.909609
neteen forty ni 0:00:01.928052
ne phili 0:00:01.946818
ips lost his p 0:00:01.965393
partent 0:00:01.983982
 0:00:02.005255
 the american  0:00:02.024593
 screw company 0:00:02.045068
 was respon 0:00:02.076243
nsible for devis 0:00:02.105305
sing a means  0:00:02.126591
 of manufactu 0:00:02.145923
ring the screw 0:00:02.164145
 and  0:00:02.182812
successfully pa 0:00:02.204005
tented and  0:00:02.222751
licence that the are 0:00:02.241392
 method 0:00:02.260188
 o 0:00:02.278339
others screw makers 0:00:02.298520
 of the nineteen h th 0:00:02.316845
hirties dis 0:00:02.335092
missed the phillip's c 0:00:02.353498
concept  0:00:02.371793
since it calls for  0:00:02.389805
 relatively co 0:00:02.411886
mplex r 0:00:02.430947
resisets ocke 0:00:02.450992
et sheepe i 0:00:02.470813
in the head of the scr 0:00:02.489585
rew 0:00:02.507690
 as disti 0:00:02.526146
inct from the simple 0:00:02.544312
e milled slaught 0:00:02.562836
 of a slaugh 0:00:02.583057
hted type scre 0:00:02.612682
w 0:00:02.642730
 the philip' 0:00:02.666383
s  screw compa 0:00:02.686970
any and  0:00:02.705801
 the american sc 0:00:02.723954
rew company 0:00:02.743269
 went on  0:00:02.761700
 to devise  0:00:02.779779
the posi drive 0:00:02.799883
e screw 0:00:02.818531
 which  0:00:02.837064
 differs from the ph 0:00:02.855034
hilips  0:00:02.873806
 in that it is de 0:00:02.897096
signed to accomo 0:00:02.917923
dae greaterd t 0:00:02.938485
ork than the phi 0:00:02.957239
lips 0:00:02.976965
 0:00:02.995992
 and image a  0:00:03.014475
companied this arti 0:00:03.033712
icol  0:00:03.071588
 caption 0:00:03.109826
 philips  0:00:03.139520
 screw head 0:00:03.161752
 0:00:03.180733
 0:00:03.199879
 the followin 0:00:03.218120
g is an infu bo 0:00:03.236750
x which ac 0:00:03.256835
ccompanies this ar 0:00:03.276173
rticle 0:00:03.294907
 in  0:00:03.313449
fu box 0:00:03.332069
 part o thes  0:00:03.350309
 series un 0:00:03.369606
screw drive 0:00:03.388178
 types 0:00:03.406281
 0:00:03.425665
 slaughted 0:00:03.444362
 commonl 0:00:03.462749
y eroneou 0:00:03.481221
usly fla 0:00:03.500575
athead 0:00:03.519235
 0:00:03.537542
 phylips 0:00:03.560982
 cross he 0:00:03.583342
ad 0:00:03.603654
 0:00:03.623570
 pasierive 0:00:03.643562
 super  0:00:03.662293
drive 0:00:03.680999
 0:00:03.700240
 tokgs 0:00:03.718740
 0:00:03.737211
 ha 0:00:03.756135
cx a 0:00:03.776041
len 0:00:03.794814
 0:00:03.814861
 roberts son 0:00:03.836249
 0:00:03.856562
  0:00:03.875218
try wing 0:00:03.893283
 0:00:03.915533
 tark 0:00:03.936371
 set 0:00:03.954823
 0:00:03.973410
 span er head 0:00:03.991776
 0:00:04.012391
 0:00:04.032344
 triple square 0:00:04.051670
e ex 0:00:04.070651
sy nd 0:00:04.093896
 0:00:04.116099
 ot 0:00:04.137063
hers 0:00:04.155997
 polly drives 0:00:04.174308
 sp 0:00:04.193733
linmde drive 0:00:04.212975
 double 0:00:04.231921
e hacks 0:00:04.250232
 0:00:04.268701
 many images ac 0:00:04.287337
ccompanyed this in 0:00:04.305945
pu box 0:00:04.326572
 this 0:00:04.345428
 page was last 0:00:04.363623
t modified 0:00:04.382075
 on the ninth of va 0:00:04.400191
april two  0:00:04.418365
 thousand aeight 0:00:04.437033
 at s 0:00:04.455600
seventeen o 0:00:04.474167
 for 0:00:04.492340
 0:00:04.510894
 all te 0:00:04.530033
xt as avaivlable 0:00:04.549138
 under the term 0:00:04.595112
ms of thei ganew 0:00:04.630920
 free document 0:00:04.650296
tation licens 0:00:04.668415
 sea 0:00:04.686839
 copyrites  0:00:04.705349
 for details 0:00:04.725088
 0:00:04.743654
 wichpedia 0:00:04.766718
 is aregister 0:00:04.786224
n trade mark  0:00:04.804714
 of the wikie mmede 0:00:04.822999
ea foundation 0:00:04.841733
 incorporated 0:00:04.859885
 a  0:00:04.878413
 eu as registrud 0:00:04.897591
d fival 0:00:04.921186
 one sea  0:00:04.942560
 three  0:00:04.961235
 tax theductable 0:00:04.979391
 non profhet c 0:00:04.997482
harity 0:00:05.016022
  0:00:05.034412
this sound fi 0:00:05.054727
le and all 0:00:05.076378
 text in the artic 0:00:05.097072
cle or li 0:00:05.115830
cense under  0:00:05.134143
 the thenew fre 0:00:05.152953
e documentation 0:00:05.171724
n license 0:00:05.190068
 availabl 0:00:05.212038
le at  0:00:05.231690
oubl you doubleyou  0:00:05.250553
 dw do 0:00:05.270274
t g 0:00:05.290077
 and you 0:00:05.308365
u dot 0:00:05.327549
 horg 0:00:05.345873
 slash  0:00:05.364095
 cope left 0:00:05.382992
 slash 0:00:05.401753
 f dee 0:00:05.420065
d el 0:00:05.439199
 dout each t 0:00:05.457707
ea m l 0:00:05.476005
 0:00:05.494820
 0:00:05.512901

Inference End: Avg CPU% 44.152573529411804
 henry f 0:00:00.029676
 phillips 0:00:00.054872
 from wic 0:00:00.076020
epedeia 0:00:00.094366
 the free ind cy 0:00:00.114368
ycliopedioa 0:00:00.133422
 at e 0:00:00.152289
 and d 0:00:00.183105
ot we cud 0:00:00.207808
pedia tha 0:00:00.229333
t org 0:00:00.250968
 0:00:00.270178
 henry 0:00:00.288826
 f philip 0:00:00.308294
s 0:00:00.326518
 from  0:00:00.347068
 wickepedia  0:00:00.371150
 the freie andcyclo 0:00:00.393486
opedia 0:00:00.414678
 0:00:00.433286
 henry afh 0:00:00.452419
e philips 0:00:00.479387
 eighteen ni 0:00:00.505646
ney to nin 0:00:00.528157
neteen fifty e 0:00:00.549082
ight 0:00:00.568212
 a youas  0:00:00.587044
 businessman 0:00:00.610898
 from portland 0:00:00.632699
d or again 0:00:00.652812
 a 0:00:00.672068
s the honor 0:00:00.692313
 of having the phi 0:00:00.711468
lip's head scre 0:00:00.730892
w and sc 0:00:00.749864
crew driver  0:00:00.768192
 named ater hi 0:00:00.786391
m 0:00:00.804390
 0:00:00.822623
 the importance  0:00:00.840970
 of the cross had i 0:00:00.860126
 screw design 0:00:00.879214
 lies 0:00:00.897920
 in its t self ce 0:00:00.917412
entering property 0:00:00.935788
y u 0:00:00.954098
useful on ad 0:00:00.973072
imated production 0:00:00.992050
n lines t 0:00:01.010948
he use powered 0:00:01.030059
d screw drivers 0:00:01.048675
 0:00:01.067221
 philip 0:00:01.085260
's major contrib 0:00:01.104387
bution was i 0:00:01.122887
n driving the cro 0:00:01.141004
oss head concept 0:00:01.159672
 forward 0:00:01.177971
 to the point 0:00:01.196020
 where it was a 0:00:01.214700
dopted by scre 0:00:01.233585
ew makers an 0:00:01.254226
nd automabiele c 0:00:01.273182
companiese 0:00:01.291665
 0:00:01.309885
 although he recei 0:00:01.328732
ved patents for the 0:00:01.347082
 design in 0:00:01.365269
 nineten thirty  0:00:01.384728
six 0:00:01.403558
 you as pate 0:00:01.422254
nt number  0:00:01.440639
 to million 0:00:01.458886
 forty six 0:00:01.477751
x thousand 0:00:01.496483
 three hundred f 0:00:01.515026
forty three 0:00:01.533806
 you es 0:00:01.552109
 patents t 0:00:01.570447
oo million 0:00:01.589292
 forty six thou 0:00:01.608702
usand aig 0:00:01.628919
he hundred thirty se 0:00:01.647763
ven to t 0:00:01.666474
two million  0:00:01.685872
 forty six thou 0:00:01.718653
sand eight  0:00:01.746268
hundered forty 0:00:01.765286
 0:00:01.784792
 it was so wi 0:00:01.803680
dely copied 0:00:01.827649
 that by nin 0:00:01.848720
neteen forty ni 0:00:01.867413
ne phili 0:00:01.888695
ips lost his p 0:00:01.907449
partent 0:00:01.926010
 0:00:01.945489
 the american  0:00:01.963833
 screw company 0:00:01.982087
 was respon 0:00:02.003408
nsible for devis 0:00:02.023105
sing a means  0:00:02.042951
 of manufactu 0:00:02.062428
ring the screw 0:00:02.083601
 and  0:00:02.104128
successfully pa 0:00:02.123177
tented and  0:00:02.141869
licence that the are 0:00:02.160193
 method 0:00:02.181144
 o 0:00:02.200624
others screw makers 0:00:02.219218
 of the nineteen h th 0:00:02.237288
hirties dis 0:00:02.255883
missed the phillip's c 0:00:02.274358
concept  0:00:02.294018
since it calls for  0:00:02.312455
 relatively co 0:00:02.330471
mplex r 0:00:02.349534
resisets ocke 0:00:02.367540
et sheepe i 0:00:02.387564
in the head of the scr 0:00:02.405687
rew 0:00:02.423716
 as disti 0:00:02.442485
inct from the simple 0:00:02.461059
e milled slaught 0:00:02.482157
 of a slaugh 0:00:02.500962
hted type scre 0:00:02.521853
w 0:00:02.542050
 the philip' 0:00:02.561338
s  screw compa 0:00:02.580732
any and  0:00:02.607533
 the american sc 0:00:02.630970
rew company 0:00:02.649956
 went on  0:00:02.668940
 to devise  0:00:02.687399
the posi drive 0:00:02.706255
e screw 0:00:02.737116
 which  0:00:02.761253
 differs from the ph 0:00:02.783470
hilips  0:00:02.801990
 in that it is de 0:00:02.820875
signed to accomo 0:00:02.841162
dae greaterd t 0:00:02.860114
ork than the phi 0:00:02.878703
lips 0:00:02.897341
 0:00:02.915621
 and image a  0:00:02.934287
companied this arti 0:00:02.952481
icol  0:00:02.971039
 caption 0:00:02.990690
 philips  0:00:03.010994
 screw head 0:00:03.029708
 0:00:03.049006
 0:00:03.067719
 the followin 0:00:03.086644
g is an infu bo 0:00:03.105915
x which ac 0:00:03.124921
ccompanies this ar 0:00:03.143613
rticle 0:00:03.162389
 in  0:00:03.180519
fu box 0:00:03.199188
 part o thes  0:00:03.217988
 series un 0:00:03.236804
screw drive 0:00:03.255463
 types 0:00:03.273704
 0:00:03.292290
 slaughted 0:00:03.310921
 commonl 0:00:03.329144
y eroneou 0:00:03.347206
usly fla 0:00:03.366471
athead 0:00:03.386305
 0:00:03.405026
 phylips 0:00:03.423253
 cross he 0:00:03.445383
ad 0:00:03.466505
 0:00:03.484779
 pasierive 0:00:03.504122
 super  0:00:03.522533
drive 0:00:03.541154
 0:00:03.560328
 tokgs 0:00:03.578647
 0:00:03.599285
 ha 0:00:03.618935
cx a 0:00:03.637462
len 0:00:03.656392
 0:00:03.675186
 roberts son 0:00:03.694608
 0:00:03.713174
  0:00:03.738853
try wing 0:00:03.761281
 0:00:03.783589
 tark 0:00:03.802532
 set 0:00:03.821357
 0:00:03.840611
 span er head 0:00:03.859893
 0:00:03.878397
 0:00:03.896426
 triple square 0:00:03.915424
e ex 0:00:03.934438
sy nd 0:00:03.952991
 0:00:03.971123
 ot 0:00:03.991752
hers 0:00:04.011104
 polly drives 0:00:04.030115
 sp 0:00:04.048891
linmde drive 0:00:04.067864
 double 0:00:04.086988
e hacks 0:00:04.106561
 0:00:04.125026
 many images ac 0:00:04.143887
ccompanyed this in 0:00:04.162591
pu box 0:00:04.181225
 this 0:00:04.200282
 page was last 0:00:04.219203
t modified 0:00:04.237694
 on the ninth of va 0:00:04.258145
april two  0:00:04.276569
 thousand aeight 0:00:04.294845
 at s 0:00:04.313113
seventeen o 0:00:04.332516
 for 0:00:04.380070
 0:00:04.416976
 all te 0:00:04.438166
xt as avaivlable 0:00:04.457145
 under the term 0:00:04.475252
ms of thei ganew 0:00:04.494399
 free document 0:00:04.512919
tation licens 0:00:04.531204
 sea 0:00:04.552007
 copyrites  0:00:04.570451
 for details 0:00:04.588813
 0:00:04.608612
 wichpedia 0:00:04.627385
 is aregister 0:00:04.646055
n trade mark  0:00:04.682055
 of the wikie mmede 0:00:04.709029
ea foundation 0:00:04.730148
 incorporated 0:00:04.749153
 a  0:00:04.767656
 eu as registrud 0:00:04.786193
d fival 0:00:04.806515
 one sea  0:00:04.839423
 three  0:00:04.877059
 tax theductable 0:00:04.903783
 non profhet c 0:00:04.928306
harity 0:00:04.951226
  0:00:04.971586
this sound fi 0:00:04.990772
le and all 0:00:05.009120
 text in the artic 0:00:05.027202
cle or li 0:00:05.046101
cense under  0:00:05.065126
 the thenew fre 0:00:05.084226
e documentation 0:00:05.102388
n license 0:00:05.122325
 availabl 0:00:05.146357
le at  0:00:05.168419
oubl you doubleyou  0:00:05.188388
 dw do 0:00:05.206907
t g 0:00:05.225644
 and you 0:00:05.244077
u dot 0:00:05.262366
 horg 0:00:05.280884
 slash  0:00:05.326040
 cope left 0:00:05.370482
 slash 0:00:05.397159
 f dee 0:00:05.419872
d el 0:00:05.441224
 dout each t 0:00:05.460983
ea m l 0:00:05.479195
 0:00:05.497677
 0:00:05.516693

Inference End: Avg CPU% 44.149632352941204