# The scenario for this script is that I have a home server that crashed.
# The server used some duplication and balancing of data across 4 disk drives,
# 3 of which were still working.  The files from all three were copied 
# to seperate directories on a NAS, named HPSERVER1, HPSERVER2, and HPSERVER3.
# The directory structure on each disk mirrored each other, except empty folders
# were missing.
# This script was run to move the files and folders from all three disk drives
# into one consolidated folder (HPSERVER1).  I manually deleted the other two
# after checking the result

import os, shutil
from os.path import isdir, isfile, join, getsize
import logging

print(join(os.path.dirname(__file__), 'example.log'))
logging.basicConfig(filename=join(
    os.path.dirname(__file__),'consolidate.log'), level=logging.DEBUG)

STEM = '/share/homes/admin/'

MAIN = 'HPSERVER1'
OTHERS = ('HPSERVER2', 'HPSERVER3')
MAIN_DIR = STEM + MAIN
OTHER_DIRS = [STEM + OTHER for OTHER in OTHERS]


def get_files(directory):
    """ Get all files from a directory (str). """
    return [f for f in os.listdir(directory) if isfile(join(directory, f))]

def get_dirs(directory):
    """ Get all directories from a directory (str). """
    return [f for f in os.listdir(directory) if isdir(join(directory, f))]

def get_size(directory, filename):
    """ Return the size of a file. """
    try:
        size = getsize(join(directory, filename))
    except OSError:
        size = -1
    return size

def consolidate_files(main_dir, other_dirs):
    """ identify the unique files across directories, and ensure the largest
    version of the file is in the main directory.  Then call itself with all
    the directories in the next level.  Create any directories that don't
    exist before calling.
    main_dir: a string representing the directory we want to move files too
    other_dirs: a list with the main_dir's counterparts
    """
    # collect a list of all files at this level
    all_files = set([])
    for other in other_dirs + [main_dir]:
        all_files.update(get_files(other))
    logging.debug(all_files)
    # find the largest version in case some files were corrupted
    for f in all_files:
        largest_f = None
        largest_size = 0
        main_size = get_size(main_dir, f)
        if main_size > largest_size:
            largest_f = join(main_dir, f)
            largest_size = main_size
        for other in other_dirs:
            size = get_size(other, f)
            if size > largest_size:
                largest_f = join(other, f)
                largest_size = size
        # only replace main_dir version if there's a bigger one
        if largest_f and largest_size > main_size:
            logging.debug("moving " + largest_f + " to " + join(main_dir))
            shutil.move(largest_f, join(main_dir, f))
    # collect a list of all directories at this level
    main_folders = get_dirs(main_dir)
    all_folders = set(main_folders)
    for other in other_dirs:
        all_folders.update(get_dirs(other))
    logging.debug(all_folders)
    for folder in list(all_folders):
        if folder not in main_folders:
            logging.debug("making dir: " + join(main_dir, folder))
            os.mkdir(join(main_dir, folder))
        for other in other_dirs:
            if folder not in [f for f in os.listdir(other) 
                    if isdir(join(other, f))]:
                os.mkdir(join(other, folder))
        consolidate_files(join(main_dir, folder), [join(other, folder)
            for other in other_dirs])


 
consolidate_files(MAIN_DIR, OTHER_DIRS)