Created
November 9, 2019 06:46
-
-
Save jaysoncena/592ad5b12b745178f25a317e0a84971d to your computer and use it in GitHub Desktop.
Resize numpy array to a fixed size and pad with np.nan
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def load_npy_files(path, range_val=(), row_col=(0,0)): | |
total_rows = 0 | |
total_cols = 0 | |
max_cols = 0 | |
max_rows = 0 | |
row_size_list = [] | |
dataset_list = [] | |
for fnum in range(*range_val): | |
npload = np.load(f"{path}/{fnum}.npy") | |
rows, cols = npload.shape | |
total_rows += rows | |
total_cols += cols | |
row_size_list.append(rows) | |
if rows > max_rows: max_rows = rows | |
if cols > max_cols: max_cols = cols | |
pad_row = row_col[0] - rows | |
pad_col = row_col[1] - cols | |
if pad_row > 0 or pad_col > 0: | |
npload = np.pad(npload, ((0, pad_row), (0, pad_col)), 'constant', constant_values=np.nan) | |
# just in case there's a bug in reshape & padding code | |
if (336, 40) != npload.shape: | |
logging.error("Unexpected size of array: {npload.shape}") | |
break | |
dataset_list.append(npload) | |
if fnum % 1000 == 0 or fnum >= (range_val[1] - 1): | |
avg_rows = round(sum(row_size_list) / len(row_size_list) + 0.0, 2) | |
logging.info(f"{fnum}: Dataset size: {(max_rows, max_cols)}), row.mean={avg_rows}") | |
load_npy_files("/kaggle/input/train/train", range_val=(0, 30336), row_col=(336, 40)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment