Last active
May 27, 2016 13:33
-
-
Save ogrisel/7fa242ea267028828d03dac887cf8e71 to your computer and use it in GitHub Desktop.
Utility script to time the effective memory bandwidth of CPUs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model name : Intel(R) Xeon(R) CPU @ 2.30GHz | |
Loading arrays to memory | |
Loading speed: 0.670GB/s | |
timing bandwidth for sequential memory access | |
bandwidth: 9.0 GB/s | |
n_workers=2 (threads) | |
bandwidth: 17.4 GB/s (1.9x) | |
n_workers=4 (threads) | |
bandwidth: 29.5 GB/s (3.3x) | |
n_workers=8 (threads) | |
bandwidth: 36.6 GB/s (4.1x) | |
n_workers=16 (threads) | |
bandwidth: 44.8 GB/s (5.0x) | |
n_workers=32 (threads) | |
bandwidth: 60.4 GB/s (6.7x) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model name : Intel(R) Core(TM) i7-6560U CPU @ 2.20GHz | |
Loading arrays to memory | |
Loading speed: 0.530GB/s | |
timing bandwidth for sequential memory access | |
bandwidth: 5.8 GB/s | |
n_workers=2 (threads) | |
bandwidth: 8.5 GB/s (1.5x) | |
n_workers=4 (threads) | |
bandwidth: 8.3 GB/s (1.4x) | |
n_workers=8 (threads) | |
bandwidth: 7.9 GB/s (1.4x) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import time | |
import os | |
import os.path as op | |
import numpy as np | |
from concurrent.futures import ThreadPoolExecutor | |
def prepare_data_files(n=8): | |
fnames = [] | |
for i in range(n): | |
fname = 'random_data_%d.npy' % i | |
if not op.exists(fname): | |
print('generating %s' % fname) | |
a = np.random.randn(int(1e9 / 8)) # 1GB | |
np.save(fname, a) | |
fnames.append(fname) | |
return fnames | |
def run_bench_bandwidth(data_size_gb=8): | |
os.system("cat /proc/cpuinfo | grep 'model name' | uniq") | |
fnames = prepare_data_files(n=data_size_gb) | |
print('Loading arrays to memory') | |
t0 = time() | |
arrays = [np.load(fname) for fname in fnames] | |
duration = time() - t0 | |
print("Loading speed: %0.3fGB/s\n" % (data_size_gb / duration)) | |
# sequential access | |
print("timing bandwidth for sequential memory access") | |
t0 = time() | |
list(map(np.max, arrays)) | |
sequential_access_time = time() - t0 | |
sequential_bandwidth = data_size_gb / sequential_access_time | |
print("bandwidth: %0.1f GB/s" % sequential_bandwidth) | |
print() | |
n_workers = 2 | |
while n_workers <= data_size_gb: | |
print('n_workers=%d (threads)' % n_workers) | |
with ThreadPoolExecutor(n_workers) as e: | |
t0 = time() | |
list(e.map(np.max, arrays)) | |
access_time = time() - t0 | |
bandwidth = data_size_gb / access_time | |
print("bandwidth: %0.1f GB/s (%0.1fx)" % | |
(bandwidth, bandwidth / sequential_bandwidth)) | |
n_workers *= 2 | |
print() | |
if __name__ == "__main__": | |
import sys | |
if len(sys.argv) == 2: | |
n_gb = int(sys.argv[1]) | |
else: | |
n_gb = 8 | |
run_bench_bandwidth(n_gb) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment