Zero3 NCNN Bench

Seems max clk is 1416000 as does tend to get a bit warm with a tiny Pi3 like heatsink.
I just stacked x2 20mm copper shims so I could get a bigger heatsink on the top so no throttle.
The mat/mul vector instructions of the ArmV8.2 A55 giving big improvements over the RpiZero2 and quite a few of the Pi4 benchs where the newer ML based vector math is optimised.

Radxa Zero3W 2Gb

rock@radxa-zero3:~/ncnn/benchmark$ ../build/benchmark/benchncnn 10 4 0 -1 1
loop_count = 10
num_threads = 4
powersave = 0
gpu_device = -1
cooling_down = 1
          squeezenet  min =   32.62  max =   33.27  avg =   32.86
     squeezenet_int8  min =   33.06  max =   34.19  avg =   33.71
           mobilenet  min =   44.09  max =   44.98  avg =   44.41
      mobilenet_int8  min =   35.01  max =   35.75  avg =   35.38
        mobilenet_v2  min =   41.27  max =   45.87  avg =   42.16
        mobilenet_v3  min =   33.76  max =   34.48  avg =   34.08
          shufflenet  min =   27.86  max =   28.53  avg =   28.23
       shufflenet_v2  min =   22.50  max =   23.32  avg =   22.78
             mnasnet  min =   36.62  max =   37.61  avg =   37.28
     proxylessnasnet  min =   42.51  max =   43.19  avg =   42.86
     efficientnet_b0  min =   52.00  max =   53.04  avg =   52.40
   efficientnetv2_b0  min =   66.69  max =   67.73  avg =   67.19
        regnety_400m  min =   60.18  max =   60.94  avg =   60.62
           blazeface  min =    8.14  max =    8.60  avg =    8.32
           googlenet  min =  102.56  max =  105.10  avg =  103.64
      googlenet_int8  min =  102.81  max =  106.86  avg =  104.40
            resnet18  min =   80.75  max =   81.84  avg =   81.32
       resnet18_int8  min =   88.82  max =   90.43  avg =   89.62
             alexnet  min =   67.60  max =   68.85  avg =   68.02
               vgg16  min =  451.53  max =  459.05  avg =  454.25
          vgg16_int8  min =  597.27  max =  699.47  avg =  613.00
            resnet50  min =  192.75  max =  249.05  avg =  216.01
       resnet50_int8  min =  179.77  max =  180.89  avg =  180.46
      squeezenet_ssd  min =  102.21  max =  104.11  avg =  103.23
 squeezenet_ssd_int8  min =  102.31  max =  105.50  avg =  103.73
       mobilenet_ssd  min =   97.58  max =  100.01  avg =   98.70
  mobilenet_ssd_int8  min =   81.87  max =   82.82  avg =   82.37
      mobilenet_yolo  min =  214.53  max =  227.54  avg =  218.71
  mobilenetv2_yolov3  min =  142.61  max =  152.79  avg =  143.95
         yolov4-tiny  min =  179.44  max =  190.21  avg =  184.88
           nanodet_m  min =   66.26  max =   67.77  avg =   66.83
    yolo-fastest-1.1  min =   33.51  max =   34.10  avg =   33.84
      yolo-fastestv2  min =   27.33  max =   29.16  avg =   27.90
  vision_transformer  min = 3088.27  max = 3234.97  avg = 3169.80
          FastestDet  min =   26.97  max =   27.94  avg =   27.43
rock@radxa-zero3:~/ncnn/benchmark$ ../build/benchmark/benchncnn 10 1 0 -1 1
loop_count = 10
num_threads = 1
powersave = 0
gpu_device = -1
cooling_down = 1
          squeezenet  min =   62.38  max =   63.35  avg =   62.85
     squeezenet_int8  min =   63.26  max =   64.41  avg =   63.91
           mobilenet  min =   87.19  max =   87.65  avg =   87.40
      mobilenet_int8  min =   77.29  max =   78.28  avg =   77.83
        mobilenet_v2  min =   67.45  max =   68.05  avg =   67.71
        mobilenet_v3  min =   60.53  max =   61.38  avg =   60.90
          shufflenet  min =   44.95  max =   45.45  avg =   45.22
       shufflenet_v2  min =   43.29  max =   44.48  avg =   43.91
             mnasnet  min =   69.96  max =   70.33  avg =   70.14
     proxylessnasnet  min =   91.81  max =   92.70  avg =   92.11
     efficientnet_b0  min =  106.36  max =  107.38  avg =  106.82
   efficientnetv2_b0  min =  134.24  max =  135.78  avg =  135.15
        regnety_400m  min =   81.71  max =   81.91  avg =   81.81
           blazeface  min =   17.85  max =   18.10  avg =   17.95
           googlenet  min =  229.90  max =  231.25  avg =  230.52
      googlenet_int8  min =  233.97  max =  236.69  avg =  235.35
            resnet18  min =  168.95  max =  170.39  avg =  169.71
       resnet18_int8  min =  187.12  max =  187.94  avg =  187.47
             alexnet  min =  150.03  max =  150.72  avg =  150.46
               vgg16  min =  971.49  max = 1112.57  avg = 1018.23
          vgg16_int8  min = 1416.14  max = 1424.10  avg = 1420.30
            resnet50  min =  417.94  max =  419.63  avg =  418.71
       resnet50_int8  min =  409.01  max =  410.70  avg =  409.87
      squeezenet_ssd  min =  200.16  max =  201.57  avg =  200.92
 squeezenet_ssd_int8  min =  192.35  max =  195.82  avg =  194.22
       mobilenet_ssd  min =  192.56  max =  194.34  avg =  193.47
  mobilenet_ssd_int8  min =  156.52  max =  159.55  avg =  158.48
      mobilenet_yolo  min =  428.72  max =  433.26  avg =  430.08
  mobilenetv2_yolov3  min =  227.89  max =  261.09  avg =  244.74
         yolov4-tiny  min =  376.14  max =  379.97  avg =  378.31
           nanodet_m  min =  119.54  max =  120.77  avg =  120.15
    yolo-fastest-1.1  min =   54.06  max =   56.77  avg =   54.53
      yolo-fastestv2  min =   47.48  max =   48.34  avg =   47.95
  vision_transformer  min = 8531.09  max = 9531.40  avg = 9159.31
          FastestDet  min =   47.06  max =   48.65  avg =   47.85

Raspberry Pi Zero 2 W Broadcom BCM2710A1, Cortex-A53 (ARMv8) (1.0GHz x 4)

loop_count = 8
num_threads = 4
powersave = 0
gpu_device = -1
cooling_down = 1
          squeezenet  min =  119.52  max =  120.29  avg =  119.93
     squeezenet_int8  min =   96.32  max =   96.96  avg =   96.55
           mobilenet  min =  162.60  max =  165.49  avg =  163.19
      mobilenet_int8  min =   90.78  max =   91.39  avg =   91.03
        mobilenet_v2  min =  145.71  max =  148.83  avg =  147.39
        mobilenet_v3  min =  113.89  max =  151.95  avg =  119.04
          shufflenet  min =   72.72  max =   73.27  avg =   72.96
       shufflenet_v2  min =   63.64  max =   64.50  avg =   64.13
             mnasnet  min =  126.07  max =  126.93  avg =  126.53
     proxylessnasnet  min =  139.90  max =  140.84  avg =  140.35
     efficientnet_b0  min =  201.88  max =  202.55  avg =  202.14
   efficientnetv2_b0  min =  227.22  max =  228.84  avg =  228.09
        regnety_400m  min =  156.49  max =  157.47  avg =  156.96
           blazeface  min =   22.79  max =   23.28  avg =   23.10
           googlenet  min =  323.74  max =  324.90  avg =  324.45
      googlenet_int8  min =  250.86  max =  252.82  avg =  251.63
            resnet18  min =  351.37  max =  355.67  avg =  353.45
       resnet18_int8  min =  194.83  max =  196.68  avg =  195.51
             alexnet  min =  271.18  max =  273.53  avg =  272.18
            resnet50  min =  777.44  max =  797.47  avg =  782.63
       resnet50_int8  min =  496.78  max =  498.86  avg =  497.57
      squeezenet_ssd  min =  376.10  max =  382.41  avg =  379.13
 squeezenet_ssd_int8  min =  255.99  max =  257.57  avg =  256.78
       mobilenet_ssd  min =  338.64  max =  339.93  avg =  339.50
  mobilenet_ssd_int8  min =  190.24  max =  190.68  avg =  190.48
      mobilenet_yolo  min =  746.83  max =  748.14  avg =  747.53
  mobilenetv2_yolov3  min =  487.99  max =  491.18  avg =  489.37
         yolov4-tiny  min =  644.73  max =  652.24  avg =  646.64
           nanodet_m  min =  165.27  max =  167.12  avg =  166.27
    yolo-fastest-1.1  min =   98.74  max =  100.02  avg =   99.17
      yolo-fastestv2  min =   80.52  max =   81.86  avg =   81.29

loop_count = 8
num_threads = 1
powersave = 0
gpu_device = -1
cooling_down = 1
          squeezenet  min =  240.53  max =  241.07  avg =  240.77
     squeezenet_int8  min =  212.63  max =  213.23  avg =  212.94
           mobilenet  min =  393.79  max =  394.04  avg =  393.94
      mobilenet_int8  min =  286.58  max =  286.95  avg =  286.75
        mobilenet_v2  min =  273.97  max =  274.51  avg =  274.23
        mobilenet_v3  min =  233.77  max =  234.59  avg =  234.20
          shufflenet  min =  133.05  max =  133.36  avg =  133.23
       shufflenet_v2  min =  128.86  max =  129.47  avg =  129.18
             mnasnet  min =  265.70  max =  266.17  avg =  265.93
     proxylessnasnet  min =  329.78  max =  330.54  avg =  330.13
     efficientnet_b0  min =  518.42  max =  519.38  avg =  519.00
   efficientnetv2_b0  min =  594.37  max =  595.17  avg =  594.74
        regnety_400m  min =  329.53  max =  330.44  avg =  329.87
           blazeface  min =   42.24  max =   45.56  avg =   43.96
           googlenet  min =  780.05  max =  780.63  avg =  780.39
      googlenet_int8  min =  663.83  max =  664.43  avg =  664.15
            resnet18  min =  653.62  max =  657.59  avg =  654.69
       resnet18_int8  min =  479.03  max =  479.72  avg =  479.40
             alexnet  min =  687.99  max =  690.34  avg =  689.15
            resnet50  min = 1800.97  max = 1806.11  avg = 1802.79
       resnet50_int8  min = 1311.68  max = 1314.56  avg = 1313.15
      squeezenet_ssd  min =  563.63  max =  565.57  avg =  564.44
 squeezenet_ssd_int8  min =  481.24  max =  483.97  avg =  482.20
       mobilenet_ssd  min =  799.21  max =  829.10  avg =  803.56
  mobilenet_ssd_int8  min =  568.11  max =  568.88  avg =  568.42
      mobilenet_yolo  min = 1815.60  max = 1816.44  avg = 1815.93
  mobilenetv2_yolov3  min =  951.34  max =  952.15  avg =  951.72
         yolov4-tiny  min = 1258.21  max = 1259.49  avg = 1258.66
           nanodet_m  min =  301.04  max =  304.09  avg =  301.70
    yolo-fastest-1.1  min =  155.04  max =  155.98  avg =  155.53
      yolo-fastestv2  min =  126.77  max =  127.40  avg =  127.05

Raspberry Pi 4 Model B Broadcom BCM2711B0, Cortex-A72 (ARMv8) (1.8GHz x 4)

pi@raspberrypi:~/ncnn/build/benchmark $ ./benchncnn 10 4 0 -1 1
loop_count = 10
num_threads = 4
powersave = 0
gpu_device = -1
cooling_down = 1
          squeezenet  min =   46.28  max =   46.91  avg =   46.65
     squeezenet_int8  min =   42.18  max =   44.98  avg =   42.59
           mobilenet  min =   60.74  max =   61.79  avg =   61.17
      mobilenet_int8  min =   34.19  max =   34.55  avg =   34.37
        mobilenet_v2  min =   61.63  max =   62.02  avg =   61.88
        mobilenet_v3  min =   47.08  max =   48.40  avg =   47.53
          shufflenet  min =   32.91  max =   33.30  avg =   33.09
       shufflenet_v2  min =   24.37  max =   24.73  avg =   24.56
             mnasnet  min =   51.80  max =   52.14  avg =   51.98
     proxylessnasnet  min =   53.02  max =   53.58  avg =   53.32
     efficientnet_b0  min =   73.92  max =   74.44  avg =   74.19
   efficientnetv2_b0  min =   79.10  max =   79.60  avg =   79.34
        regnety_400m  min =   65.27  max =   66.12  avg =   65.70
           blazeface  min =    8.62  max =    8.75  avg =    8.69
           googlenet  min =  113.74  max =  115.14  avg =  114.35
      googlenet_int8  min =  100.87  max =  101.71  avg =  101.25
            resnet18  min =  122.27  max =  125.39  avg =  123.12
       resnet18_int8  min =   82.19  max =   94.12  avg =   83.92
             alexnet  min =   75.75  max =   78.08  avg =   76.40
               vgg16  min =  541.66  max =  552.56  avg =  547.09
          vgg16_int8  min =  391.44  max =  395.73  avg =  394.23
            resnet50  min =  261.90  max =  263.91  avg =  262.83
       resnet50_int8  min =  195.60  max =  198.08  avg =  196.65
      squeezenet_ssd  min =  127.01  max =  129.85  avg =  127.61
 squeezenet_ssd_int8  min =  104.98  max =  107.67  avg =  105.47
       mobilenet_ssd  min =  120.43  max =  123.28  avg =  121.46
  mobilenet_ssd_int8  min =   70.70  max =   72.85  avg =   71.14
      mobilenet_yolo  min =  270.89  max =  273.42  avg =  272.33
  mobilenetv2_yolov3  min =  183.85  max =  185.73  avg =  184.88
         yolov4-tiny  min =  205.95  max =  209.90  avg =  207.22
           nanodet_m  min =   68.08  max =   68.69  avg =   68.38
    yolo-fastest-1.1  min =   47.97  max =   48.20  avg =   48.06
      yolo-fastestv2  min =   37.17  max =   37.69  avg =   37.47
  vision_transformer  min = 1872.31  max = 1964.95  avg = 1909.21
          FastestDet  min =   38.39  max =   39.17  avg =   38.69

pi@raspberrypi:~/ncnn/build/benchmark $ ./benchncnn 10 1 0 -1 1
loop_count = 10
num_threads = 1
powersave = 0
gpu_device = -1
cooling_down = 1
          squeezenet  min =   73.35  max =   75.10  avg =   73.96
     squeezenet_int8  min =   69.17  max =   69.66  avg =   69.42
           mobilenet  min =  123.76  max =  125.35  avg =  124.32
      mobilenet_int8  min =   84.66  max =   85.24  avg =   84.82
        mobilenet_v2  min =   92.98  max =   94.05  avg =   93.48
        mobilenet_v3  min =   72.48  max =   73.14  avg =   72.81
          shufflenet  min =   47.17  max =   47.83  avg =   47.51
       shufflenet_v2  min =   41.62  max =   42.60  avg =   42.12
             mnasnet  min =   83.60  max =   84.35  avg =   83.98
     proxylessnasnet  min =   98.48  max =   99.33  avg =   98.78
     efficientnet_b0  min =  129.45  max =  130.02  avg =  129.73
   efficientnetv2_b0  min =  155.06  max =  156.70  avg =  155.76
        regnety_400m  min =  105.39  max =  106.03  avg =  105.70
           blazeface  min =   12.54  max =   12.84  avg =   12.65
           googlenet  min =  235.38  max =  236.34  avg =  235.94
      googlenet_int8  min =  209.63  max =  210.39  avg =  210.00
            resnet18  min =  190.80  max =  191.43  avg =  191.10
       resnet18_int8  min =  157.92  max =  158.97  avg =  158.50
             alexnet  min =  139.34  max =  139.44  avg =  139.40
               vgg16  min = 1066.58  max = 1079.30  avg = 1071.85
          vgg16_int8  min =  866.15  max =  873.75  avg =  869.84
            resnet50  min =  533.15  max =  535.12  avg =  534.11
       resnet50_int8  min =  423.72  max =  424.24  avg =  423.96
      squeezenet_ssd  min =  178.90  max =  179.53  avg =  179.30
 squeezenet_ssd_int8  min =  157.05  max =  159.06  avg =  157.89
       mobilenet_ssd  min =  250.71  max =  251.26  avg =  251.00
  mobilenet_ssd_int8  min =  170.21  max =  170.96  avg =  170.56
      mobilenet_yolo  min =  557.48  max =  560.08  avg =  558.80
  mobilenetv2_yolov3  min =  301.60  max =  307.98  avg =  306.52
         yolov4-tiny  min =  370.55  max =  375.69  avg =  372.99
           nanodet_m  min =  103.05  max =  103.74  avg =  103.45
    yolo-fastest-1.1  min =   56.58  max =   57.44  avg =   57.01
      yolo-fastestv2  min =   46.69  max =   47.34  avg =   47.03
  vision_transformer  min = 6605.19  max = 6606.66  avg = 6605.73
          FastestDet  min =   52.11  max =   52.97  avg =   52.61