Created
January 31, 2014 00:57
-
-
Save schakrava/8724673 to your computer and use it in GitHub Desktop.
Small file and write intensive workload
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import sys | |
import subprocess | |
from os.path import join | |
import time | |
DD = '/usr/bin/dd' | |
RCLI = '/opt/rock-dep/bin/rcli' | |
sf_per_stride = 20000 #prod = 20000 # 100 | |
mf_per_stride = 125 #prod = 125 # 10 | |
lf_per_stride = 25 #prod = 25 # 2 | |
#in KBs | |
sf_size = 64 | |
mf_size = 1048576 #prod = 1048576 (1GB) # 10240 | |
lf_size = 15728640 #prod = 15728640 (15GB) # 40960 | |
sf_bsize = sf_size | |
mf_bsize = 1024 | |
lf_bsize = 1024 | |
sf_bcount = 1 | |
mf_bcount = mf_size / mf_bsize #prod = 1024 | |
lf_bcount = lf_size / lf_bsize #prod = 5120 | |
num_strides = 24 # prod = 24 (totalling 12TB) # 15 | |
tsize_per_stride = ((sf_size * sf_per_stride) + (mf_size * mf_per_stride) + | |
(lf_size * lf_per_stride)) | |
total_size = tsize_per_stride * num_strides | |
tcount_per_stride = sf_per_stride + mf_per_stride + lf_per_stride | |
total_count = tcount_per_stride * num_strides | |
def run_command(cmd, shell=False, stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, throw=True): | |
p = subprocess.Popen(cmd, shell=shell, stdout=stdout, stderr=stderr) | |
out, err = p.communicate() | |
out = out.split('\n') | |
err = err.split('\n') | |
rc = p.returncode | |
return (out, err, rc) | |
def create_stride(count, basedir): | |
""" | |
creates a stride of files. Names of files are numbers starting from 'count' | |
up to the end of the stride | |
""" | |
print('creating small files [%d, %d)' % (count, count+sf_per_stride)) | |
for i in range(sf_per_stride): | |
run_command([DD, 'if=/dev/zero', 'of=%s/%d.sf' % (basedir, count), | |
'bs=%sk' % sf_bsize, 'count=%d' % sf_bcount]) | |
count = count + 1 | |
print('creating medium files [%d, %d)' % (count, count+mf_per_stride)) | |
for i in range(mf_per_stride): | |
run_command([DD, 'if=/dev/zero', 'of=%s/%d.mf' % (basedir, count), | |
'bs=%sk' % mf_bsize, 'count=%d' % mf_bcount]) | |
count = count + 1 | |
print ('creating large files [%d, %d)' % (count, count+lf_per_stride)) | |
for i in range(lf_per_stride): | |
run_command([DD, 'if=/dev/zero', 'of=%s/%d.lf' % (basedir, count), | |
'bs=%sk' % lf_bsize, 'count=%d' % lf_bcount]) | |
count = count + 1 | |
return count | |
def delete_stride(count, basedir): | |
""" | |
overwrite some sf and mfs | |
""" | |
print('deleting small files [%d, %d)' % (count, count + | |
sf_per_stride/2)) | |
for i in range(sf_per_stride/2): | |
run_command(['/bin/rm', '-f', '%s/%d.sf' % (basedir, count)]) | |
count = count + 1 | |
print('deleting medium files [%d, %d)' % (count, count + mf_per_stride/4)) | |
for i in range(mf_per_stride/4): | |
run_command(['/bin/rm', '-f', '%s/%d.mf' % (basedir, count)]) | |
count = count + 1 | |
def create_files(num_strides, basedir, share_name): | |
""" | |
create a whole bunch of files in a loop, one stride at a time | |
""" | |
count = 0 | |
prev_count = 0 | |
t0 = time.time() | |
for i in range(num_strides): | |
bt = time.time() | |
if (i > 0): | |
#delete some of the previous stride | |
delete_stride(prev_count, basedir) | |
basedir = join(basedir, 'stride-%d' % i) | |
os.mkdir(basedir) | |
prev_count = count | |
count = create_stride(count, basedir) | |
at = time.time() | |
print('total size = %dKB stride size = %dKB total count = %d ' | |
'stride_count = %d stride dir = %s. duration = %f' % | |
((tsize_per_stride * (i+1)), tsize_per_stride, count, | |
tcount_per_stride, basedir, at - bt)) | |
print ('taking snaphost -- stride-%d' % i) | |
run_command([RCLI, 'shares', 'share', share_name, 'snapshot', 'add', | |
'stride-%d' % i]) | |
t1 = time.time() | |
print('total size = %dKB number of files = %d. strides = %d ' | |
'count per stride = %d. total_time = %f' % | |
(total_size, count, num_strides, | |
sf_per_stride + mf_per_stride + lf_per_stride, t1 - t0)) | |
def main(basedir, share_name): | |
""" | |
1. create a 9800(64KB) files, 50(1GB) files and 10(15GB) file. each such | |
stride = 10,000 files totalling 200 GB + 612 MB | |
""" | |
create_files(num_strides, basedir, share_name) | |
if __name__ == '__main__': | |
print('Number of files per stride: %d(sf) %d(mf) %d(lf) %d(total)' % | |
(sf_per_stride, mf_per_stride, lf_per_stride, | |
(sf_per_stride + mf_per_stride + lf_per_stride))) | |
print('Size of files in KB: %d(sf) %d(mf) %d(lf) %d(total)' % | |
(sf_size, mf_size, lf_size, sf_size + mf_size + lf_size)) | |
print('Number of strides: %d' % num_strides) | |
print('Total number of files created: %d(sf) %d(mf) %d(lf) %d(total)' % | |
(sf_per_stride * num_strides, mf_per_stride * num_strides, | |
lf_per_stride * num_strides, | |
(sf_per_stride + mf_per_stride + lf_per_stride) * num_strides)) | |
sf_size_per_stride = sf_size * sf_per_stride | |
mf_size_per_stride = mf_size * mf_per_stride | |
lf_size_per_stride = lf_size * lf_per_stride | |
total_size_per_stride = (sf_size_per_stride + mf_size_per_stride + | |
lf_size_per_stride) | |
print('Size per stride: %d(sf) %d(mf) %d(lf) %d(total)' % | |
(sf_size_per_stride, mf_size_per_stride, lf_size_per_stride, | |
total_size_per_stride)) | |
print('Total size of files created: %d(sf) %d(mf) %d(lf) %d(total)' % | |
(sf_size_per_stride * num_strides, | |
mf_size_per_stride * num_strides, | |
lf_size_per_stride * num_strides, | |
total_size_per_stride * num_strides)) | |
sf_deleted = int(sf_per_stride)/2 | |
mf_deleted = int(mf_per_stride)/4 | |
print('Number of files deleted from previous stride: %d(sf) %d(mf) ' | |
'0(lf) %d(total)' % (sf_deleted, mf_deleted, | |
sf_deleted + mf_deleted)) | |
sf_size_deleted = sf_deleted * sf_size | |
mf_size_deleted = mf_deleted * mf_size | |
total_sf_size_deleted = sf_size_deleted * (num_strides - 1) | |
total_mf_size_deleted = mf_size_deleted * (num_strides - 1) | |
print('Deleted size from previous stride: %d(sf) %d(mf) 0(lf) %d(total) ' | |
% (sf_size_deleted, mf_size_deleted, | |
sf_size_deleted + mf_size_deleted)) | |
print('Total size of files deleted: %d(sf) %d(mf) 0(lf) %d(total)' % | |
(total_sf_size_deleted, total_mf_size_deleted, | |
total_sf_size_deleted + total_mf_size_deleted)) | |
main(sys.argv[1], sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment