# The scenario for this script is that I have a home server that crashed. # The server used some duplication and balancing of data across 4 disk drives, # 3 of which were still working. The files from all three were copied # to seperate directories on a NAS, named HPSERVER1, HPSERVER2, and HPSERVER3. # The directory structure on each disk mirrored each other, except empty folders # were missing. # This script was run to move the files and folders from all three disk drives # into one consolidated folder (HPSERVER1). I manually deleted the other two # after checking the result import os, shutil from os.path import isdir, isfile, join, getsize import logging print(join(os.path.dirname(__file__), 'example.log')) logging.basicConfig(filename=join( os.path.dirname(__file__),'consolidate.log'), level=logging.DEBUG) STEM = '/share/homes/admin/' MAIN = 'HPSERVER1' OTHERS = ('HPSERVER2', 'HPSERVER3') MAIN_DIR = STEM + MAIN OTHER_DIRS = [STEM + OTHER for OTHER in OTHERS] def get_files(directory): """ Get all files from a directory (str). """ return [f for f in os.listdir(directory) if isfile(join(directory, f))] def get_dirs(directory): """ Get all directories from a directory (str). """ return [f for f in os.listdir(directory) if isdir(join(directory, f))] def get_size(directory, filename): """ Return the size of a file. """ try: size = getsize(join(directory, filename)) except OSError: size = -1 return size def consolidate_files(main_dir, other_dirs): """ identify the unique files across directories, and ensure the largest version of the file is in the main directory. Then call itself with all the directories in the next level. Create any directories that don't exist before calling. main_dir: a string representing the directory we want to move files too other_dirs: a list with the main_dir's counterparts """ # collect a list of all files at this level all_files = set([]) for other in other_dirs + [main_dir]: all_files.update(get_files(other)) logging.debug(all_files) # find the largest version in case some files were corrupted for f in all_files: largest_f = None largest_size = 0 main_size = get_size(main_dir, f) if main_size > largest_size: largest_f = join(main_dir, f) largest_size = main_size for other in other_dirs: size = get_size(other, f) if size > largest_size: largest_f = join(other, f) largest_size = size # only replace main_dir version if there's a bigger one if largest_f and largest_size > main_size: logging.debug("moving " + largest_f + " to " + join(main_dir)) shutil.move(largest_f, join(main_dir, f)) # collect a list of all directories at this level main_folders = get_dirs(main_dir) all_folders = set(main_folders) for other in other_dirs: all_folders.update(get_dirs(other)) logging.debug(all_folders) for folder in list(all_folders): if folder not in main_folders: logging.debug("making dir: " + join(main_dir, folder)) os.mkdir(join(main_dir, folder)) for other in other_dirs: if folder not in [f for f in os.listdir(other) if isdir(join(other, f))]: os.mkdir(join(other, folder)) consolidate_files(join(main_dir, folder), [join(other, folder) for other in other_dirs]) consolidate_files(MAIN_DIR, OTHER_DIRS)