Skip to content

Instantly share code, notes, and snippets.

@sarjsheff
Last active November 9, 2022 06:26
Show Gist options
  • Save sarjsheff/8b36bc8dafe2d2887bbf328ba39164b7 to your computer and use it in GitHub Desktop.
Save sarjsheff/8b36bc8dafe2d2887bbf328ba39164b7 to your computer and use it in GitHub Desktop.
ncnn

Apple m1 pro

4 threads GPU

# cd ../benchmark
# ../build/benchmark/benchncnn 10 10 0 0
[0 Apple M1 Pro]  queueC=0[1]  queueG=0[1]  queueT=0[1]
[0 Apple M1 Pro]  bugsbn1=0  bugbilz=0  bugcopc=0  bugihfa=0
[0 Apple M1 Pro]  fp16-p/s/a=1/1/1  int8-p/s/a=1/1/1
[0 Apple M1 Pro]  subgroup=32  basic=1  vote=1  ballot=1  shuffle=1
loop_count = 10
num_threads = 4
powersave = 0
gpu_device = 0
cooling_down = 1
          squeezenet  min =    1.59  max =    1.88  avg =    1.74
     squeezenet_int8  min =    3.36  max =    3.92  avg =    3.57
           mobilenet  min =    3.12  max =    3.30  avg =    3.23
      mobilenet_int8  min =    4.13  max =    4.60  avg =    4.27
        mobilenet_v2  min =    2.76  max =    5.22  avg =    3.72
        mobilenet_v3  min =    6.68  max =    8.25  avg =    7.65
          shufflenet  min =    3.43  max =    6.32  avg =    4.21
       shufflenet_v2  min =    1.92  max =    2.29  avg =    2.10
             mnasnet  min =    6.59  max =   10.15  avg =    8.15
     proxylessnasnet  min =    2.72  max =    3.55  avg =    3.16
     efficientnet_b0  min =    4.33  max =    6.37  avg =    5.34
   efficientnetv2_b0  min =   10.24  max =   10.63  avg =   10.47
        regnety_400m  min =    7.71  max =   10.89  avg =    9.77
           blazeface  min =    2.20  max =    3.06  avg =    2.39
           googlenet  min =    4.51  max =    6.63  avg =    5.46
      googlenet_int8  min =   14.34  max =   15.30  avg =   14.83
            resnet18  min =    3.15  max =    4.55  avg =    3.77
       resnet18_int8  min =   14.25  max =   15.19  avg =   14.50
             alexnet  min =    2.50  max =    3.03  avg =    2.79
               vgg16  min =    6.65  max =    6.84  avg =    6.73
          vgg16_int8  min =   97.48  max =  115.55  avg =  105.97
            resnet50  min =    5.70  max =    7.36  avg =    6.52
       resnet50_int8  min =   28.04  max =   31.20  avg =   30.28
      squeezenet_ssd  min =    7.26  max =   10.83  avg =    8.67
 squeezenet_ssd_int8  min =   14.31  max =   16.67  avg =   15.46
       mobilenet_ssd  min =    4.40  max =    5.79  avg =    5.09
  mobilenet_ssd_int8  min =    9.84  max =   10.32  avg =   10.08
      mobilenet_yolo  min =    4.38  max =    7.41  avg =    6.25
  mobilenetv2_yolov3  min =    6.48  max =   11.89  avg =    9.48
         yolov4-tiny  min =    6.87  max =   13.31  avg =    9.38
           nanodet_m  min =    5.64  max =    7.08  avg =    6.31
    yolo-fastest-1.1  min =    4.74  max =    6.06  avg =    5.16
      yolo-fastestv2  min =    3.86  max =    4.09  avg =    3.97
  vision_transformer  min =  458.92  max =  482.61  avg =  470.53
          FastestDet  min =    3.86  max =    4.35  avg =    4.02

10 threads GPU

cd ../benchmark
../build/benchmark/benchncnn 10 10 0 0
[0 Apple M1 Pro]  queueC=0[1]  queueG=0[1]  queueT=0[1]
[0 Apple M1 Pro]  bugsbn1=0  bugbilz=114  bugcopc=0  bugihfa=0
[0 Apple M1 Pro]  fp16-p/s/a=1/1/1  int8-p/s/a=1/1/1
[0 Apple M1 Pro]  subgroup=32  basic=1  vote=1  ballot=1  shuffle=1
loop_count = 10
num_threads = 10
powersave = 0
gpu_device = 0
cooling_down = 1
          squeezenet  min =    3.43  max =    3.51  avg =    3.46
     squeezenet_int8  min =    3.36  max =    4.05  avg =    3.56
           mobilenet  min =    4.07  max =    4.70  avg =    4.39
      mobilenet_int8  min =    4.13  max =    4.57  avg =    4.21
        mobilenet_v2  min =    6.56  max =    7.91  avg =    7.04
        mobilenet_v3  min =    5.33  max =    7.55  avg =    6.21
          shufflenet  min =    2.88  max =    3.07  avg =    2.95
       shufflenet_v2  min =    3.72  max =    4.00  avg =    3.86
             mnasnet  min =    6.36  max =   11.14  avg =    7.67
     proxylessnasnet  min =    3.15  max =    4.63  avg =    3.76
     efficientnet_b0  min =    5.91  max =    7.90  avg =    6.84
   efficientnetv2_b0  min =   10.32  max =   11.59  avg =   10.56
        regnety_400m  min =    3.75  max =    5.77  avg =    4.49
           blazeface  min =    1.81  max =    2.00  avg =    1.92
           googlenet  min =    6.13  max =   12.52  avg =    9.48
      googlenet_int8  min =   14.23  max =   14.35  avg =   14.28
            resnet18  min =    3.07  max =    3.72  avg =    3.34
       resnet18_int8  min =   14.27  max =   14.36  avg =   14.31
             alexnet  min =    2.47  max =    2.98  avg =    2.81
               vgg16  min =    6.70  max =    6.81  avg =    6.76
          vgg16_int8  min =   97.64  max =  115.06  avg =  104.77
            resnet50  min =    5.11  max =    6.81  avg =    5.52
       resnet50_int8  min =   27.88  max =   28.07  avg =   27.93
      squeezenet_ssd  min =    6.44  max =    8.23  avg =    7.54
 squeezenet_ssd_int8  min =   15.50  max =   16.66  avg =   16.27
       mobilenet_ssd  min =    4.13  max =    6.75  avg =    5.08
  mobilenet_ssd_int8  min =    9.84  max =   10.36  avg =   10.16
      mobilenet_yolo  min =    5.68  max =    7.41  avg =    6.64
  mobilenetv2_yolov3  min =    7.95  max =   10.78  avg =   10.24
         yolov4-tiny  min =    6.86  max =    9.19  avg =    7.96
           nanodet_m  min =    5.38  max =    6.93  avg =    6.34
    yolo-fastest-1.1  min =    7.09  max =    7.98  avg =    7.33
      yolo-fastestv2  min =    5.03  max =    6.41  avg =    5.68
  vision_transformer  min =  464.24  max =  476.59  avg =  469.72
          FastestDet  min =    4.14  max =   12.50  avg =    5.49

10 threads CPU

cd ../benchmark
../build/benchmark/benchncnn 10 10 0 -1
loop_count = 10
num_threads = 10
powersave = 0
gpu_device = -1
cooling_down = 1
          squeezenet  min =    4.44  max =    5.35  avg =    4.67
     squeezenet_int8  min =    3.36  max =    4.58  avg =    3.81
           mobilenet  min =    8.56  max =    8.91  avg =    8.73
      mobilenet_int8  min =    4.17  max =    5.33  avg =    4.51
        mobilenet_v2  min =    5.09  max =    5.86  avg =    5.26
        mobilenet_v3  min =    4.11  max =    5.90  avg =    4.54
          shufflenet  min =    3.01  max =    4.46  avg =    3.63
       shufflenet_v2  min =    3.23  max =    4.65  avg =    3.80
             mnasnet  min =    5.47  max =    6.33  avg =    5.62
     proxylessnasnet  min =    6.69  max =    6.89  avg =    6.76
     efficientnet_b0  min =    8.21  max =    9.25  avg =    8.51
   efficientnetv2_b0  min =   16.99  max =   17.55  avg =   17.11
        regnety_400m  min =    7.40  max =    7.54  avg =    7.49
           blazeface  min =    2.27  max =    3.29  avg =    2.70
           googlenet  min =   23.31  max =   24.90  avg =   24.03
      googlenet_int8  min =   15.27  max =   15.45  avg =   15.36
            resnet18  min =   15.55  max =   15.92  avg =   15.61
       resnet18_int8  min =   14.25  max =   14.30  avg =   14.27
             alexnet  min =   28.84  max =   33.18  avg =   29.52
               vgg16  min =   70.42  max =   71.35  avg =   70.88
          vgg16_int8  min =   97.09  max =  102.97  avg =   98.20
            resnet50  min =   43.64  max =   46.31  avg =   45.95
       resnet50_int8  min =   27.77  max =   31.11  avg =   29.61
      squeezenet_ssd  min =   20.17  max =   20.80  avg =   20.29
 squeezenet_ssd_int8  min =   14.15  max =   14.54  avg =   14.31
       mobilenet_ssd  min =   19.86  max =   20.23  avg =   19.93
  mobilenet_ssd_int8  min =   10.34  max =   10.46  avg =   10.39
      mobilenet_yolo  min =   39.08  max =   39.58  avg =   39.28
  mobilenetv2_yolov3  min =   19.67  max =   19.73  avg =   19.68
         yolov4-tiny  min =   31.53  max =   32.09  avg =   31.72
           nanodet_m  min =    8.19  max =    9.54  avg =    8.50
    yolo-fastest-1.1  min =    2.80  max =    4.28  avg =    3.48
      yolo-fastestv2  min =    2.87  max =    4.19  avg =    3.38
  vision_transformer  min = 1611.28  max = 1613.80  avg = 1612.02
          FastestDet  min =    2.77  max =    3.30  avg =    3.01
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment