-
-
Save cmsj/6015f0508365493a73d1892476c9062c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from datetime import datetime | |
import subprocess | |
import getopt | |
import sys | |
import os | |
import re | |
results={} | |
sources = [] | |
# Only track process the actual images inside Photos or Aperture libraries | |
excluded_dirs = [ "Thumbnails", "Previews", "Database", "resources", "Attachments", "ProjectCache", "iLifeShared", "iPod Photo Cache", "Caches", "Backup", "iMovie Cache", "iMovie Movie Cache", "iMovie Stabilization", "iMovie Thumbnails", "database", "private", "Data.noindex", "Data", "Contents", "Modified", "Apple TV Photo Cache", "Aperture.aplib" ] | |
be_verbose="" | |
be_verbose_par2="-q" | |
redundancy_level="10" | |
do_force=False | |
do_repair=False | |
do_update=False | |
do_recursive=True | |
do_missing=False | |
do_fast=False | |
files_checked=0 | |
usage = """Usage: protect [options] | |
Options: | |
-d directory Directory to scan, can be specified multiple times | |
-e,--exclude stem Directory element (no path) to ignore, can be specified multiple times | |
-F,--force Update all checksums and parity files | |
-v,--verbose Log everything | |
-u,--update Update the checksums and parity files for any changed files | |
-r,--repair Repair any bitrot detected | |
-1 Do not recurse into subdirectories | |
-R percentage Level of Redundancy (%). Default: 10 | |
-m,--missing Add missing checksums and parity files | |
""" | |
# Get the command line options. | |
try: | |
options, filenames = getopt.getopt(sys.argv[1:], '1ruvd:e:R:Ffm', | |
["force","verbose","update","repair","exclude=","fast","missing"]) | |
except getopt.GetoptError, err: | |
print str(err) | |
sys.exit(2) | |
for o, a in options: | |
if o == '-d': | |
sources.append(a) | |
elif o in ('-F', '--force'): | |
do_force=True | |
elif o in ('-v', '--verbose'): | |
be_verbose=o | |
be_verbose_par2=o | |
elif o in ('-u', '--update'): | |
do_update=True | |
elif o in ('-m', '--missing'): | |
do_missing=True | |
elif o in ('-f', '--fast'): | |
do_fast=True | |
elif o in ('-r', '--repair'): | |
do_repair=True | |
elif o in ('-e', '--exclude'): | |
excluded_dirs.append(a) | |
elif o == '-R': | |
redundancy_level=a | |
elif o == '-1': | |
do_recursive=False | |
else: | |
print "Unknown option: {0}".format(o) | |
print usage | |
sys.exit() | |
if len(sources) == 0: | |
print usage | |
sys.exit() | |
def file_with_path(path, filename): | |
entry = re.sub("\`", "\\`", filename) | |
if path: | |
entry_with_path = path+os.sep+entry | |
else: | |
entry_with_path = entry | |
if not os.path.isfile(entry_with_path) and not os.path.isdir(entry_with_path): | |
print "* Not found: "+entry_with_path | |
return entry_with_path.replace(os.sep+os.sep, os.sep) | |
def parity_file(path, filename): | |
target = filename | |
if not path: | |
path = filename.split(os.sep)[0:-1] | |
target = filename.split(os.sep)[-1] | |
print "* Reparsed {0} {1} {2}".format(filename, path, target) | |
return file_with_path(path, ".protect.{0}.par2".format(target)) | |
def delete_par2(path): | |
files=[] | |
directories=[] | |
if not os.path.isdir(path): | |
return | |
for entry in os.listdir(path): | |
entry_with_path = file_with_path(path, entry) | |
if os.path.isdir(entry_with_path): | |
directories.append(entry_with_path) | |
elif entry.startswith(".protect") or entry.startswith(".chkbit"): | |
print "Deleting " + entry_with_path | |
os.remove(entry_with_path) | |
for d in directories: | |
delete_par2(d) | |
def process_directory(path): | |
files=[] | |
directories=[] | |
is_valid = True | |
for entry in os.listdir(path): | |
entry_with_path = file_with_path(path, entry) | |
if entry in excluded_dirs: | |
if be_verbose: | |
print "Skipping "+ entry_with_path | |
delete_par2(entry_with_path) | |
results[entry_with_path] = "--" | |
elif os.path.isdir(entry_with_path): | |
directories.append(entry_with_path) | |
elif entry[0] == '.': | |
skip = True | |
elif os.path.isfile(entry_with_path): | |
files.append(entry) | |
else: | |
results[entry_with_path] = '?' | |
print "Unhandled " + entry_with_path | |
return ( directories, files ) | |
def repair_with_parity(path, filename): | |
parity = parity_file(path, filename) | |
target = file_with_path(path, filename) | |
if not do_repair: | |
return False | |
print "\n + Repairing '{0}'".format(target) | |
try: | |
# -n1 : create only one recovery file. Has no effect on recoverability of the target file. | |
# -r10 : Tolerate up to 10% of the file being corrupted. Default is 5%. | |
subprocess.check_output( | |
'par2 repair "{1}" "{2}"'.format(be_verbose_par2, parity), | |
stderr=subprocess.STDOUT,shell=True).strip() | |
return True | |
except subprocess.CalledProcessError as e: | |
print " ! Parity repair failed: {0}".format(e) | |
for line in e.output.split('\n'): | |
print "Failed: "+line | |
return False | |
def update_parity(path, filename): | |
parity = parity_file(path, filename) | |
target = file_with_path(path, filename) | |
if be_verbose == "-v": | |
print " * Updating {0}".format(parity) | |
else: | |
sys.stdout.write('.') | |
sys.stdout.flush() | |
try: | |
# -n1 : create only one recovery file. Has no effect on recoverability of the target file. | |
# -r10 : Tolerate up to 10% of the file being corrupted. Default is 5%. | |
subprocess.check_output( | |
'par2 create -n1 -r{0} {1} "{2}" "{3}"'.format(redundancy_level, be_verbose_par2, parity, target), | |
stderr=subprocess.STDOUT,shell=True).strip() | |
except subprocess.CalledProcessError as e: | |
print " ! Parity update failed: {0}".format(e) | |
for line in e.output.split('\n'): | |
if line.startswith("Target:") and not line.endswith("- found."): | |
print line | |
elif line.startswith("Repair"): | |
print line | |
par2_missing = 0 | |
par2_invalid = 1 | |
par2_repair = 2 | |
par2_valid = 3 | |
def check_parity(path, filename): | |
global files_checked | |
parity = parity_file(path, filename) | |
entry_with_path = file_with_path(path, filename) | |
if not os.path.isfile(parity): | |
if be_verbose == "-v": print " * No parity file for '{0}'".format(entry_with_path) | |
#print " * No parity file for '{0}'".format(entry_with_path) | |
if not results.has_key(entry_with_path) or results[entry_with_path] != "?": | |
results[entry_with_path] = "??" | |
return par2_missing | |
#if be_verbose == "-v": print " * Checking parity {0}".format(entry_with_path) | |
try: | |
subprocess.check_output( | |
'par2 verify {0} "{1}"'.format(be_verbose_par2, parity), | |
stderr=subprocess.STDOUT,shell=True).strip() | |
files_checked = files_checked + 1 | |
except subprocess.CalledProcessError as e: | |
print " ! Parity check failed: %s" % e | |
for line in e.output.split('\n'): | |
if line.startswith("Target:") and not line.endswith("- found."): | |
results[entry_with_path] = "M" | |
print line | |
elif line.startswith("Repair is required"): | |
skip=True | |
elif line.startswith("Repair is possible"): | |
results[entry_with_path] = "R" | |
return par2_repair | |
elif line.startswith("Repair is"): | |
raise ValueError("{0} for '{1}'".format(line, entry_with_path)) | |
elif be_verbose == "-v": | |
print "Debug: "+line | |
return par2_invalid | |
return par2_valid | |
def process_chkbit(path, output): | |
changed = [] | |
invalid = False | |
updated = False | |
unhandled = False | |
for line in output.split('\n'): | |
if len(line) > 2: | |
entry_with_path = line[2:] | |
entries = entry_with_path.split(os.sep) | |
code = line[0] | |
for entry in entries: | |
skip = False | |
if entry in excluded_dirs: | |
skip = True | |
# TODO: Delete from the point at which the match was made | |
delete_par2(entry_with_path) | |
results[entry_with_path] = "--" | |
if be_verbose: | |
print "Skipping {0}: {1}".format(entry, entry_with_path) | |
break | |
if skip: | |
#print "CHKBIT Skipping "+ entry_with_path | |
continue | |
elif code == 'u' or code == 'a': | |
results[entry_with_path] = code | |
changed.append(entry_with_path) | |
updated = True | |
if do_update: | |
update_parity(None, entry_with_path) | |
elif code == 'r' or code == 'E': | |
if line.find(".chkbit") > 0: | |
if do_repair: | |
print " + Repairing {0}/.chkbit".format(path) | |
subprocess.check_output('chkbit -force "{0}"'.format(path), stderr=subprocess.STDOUT,shell=True) | |
return [] | |
else: | |
print " ! Corrupted checksum file '{0}'".format(entry_with_path) | |
results[entry_with_path] = 'E' | |
return [] | |
elif repair_with_parity(None, entry_with_path): | |
results[entry_with_path] = code | |
else: | |
results[entry_with_path] = code | |
invalid = True | |
elif code == '?': | |
results[entry_with_path] = code | |
elif line.find("with bitrot") > 0: | |
skipped = True | |
else: | |
print "Unhandled chkbit result: "+line | |
unhandled = True | |
if updated: | |
print " " | |
if unhandled: | |
raise ValueError("Unhandled output from chkbit in '{0}'".format(path)) | |
if invalid: | |
raise ValueError("Invalid hashes detected in '{0}'".format(path)) | |
return changed | |
def verify_directory(path): | |
global files_checked | |
changed = [] | |
#print "[{0}] Protecting {1}...".format(datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'), path) | |
(directories, files) = process_directory(path) | |
if do_fast: | |
sys.stdout.write('.') | |
sys.stdout.flush() | |
files_checked = files_checked + 1 | |
if len(files) and not os.path.isfile(path+os.sep+".chkbit"): | |
results[path+os.sep] = "??" | |
elif len(files): | |
results[path+os.sep] = "^" | |
for entry in directories: | |
verify_directory(entry) | |
return | |
print "[{0}] Protecting {1}...".format(datetime.strftime(datetime.now(), '%a %H:%M:%S'), path) | |
if do_update and do_force: | |
# Start again | |
os.remove(path+os.sep+".chkbit") | |
elif len(files): | |
# Look for changes to known files | |
try: | |
if be_verbose == "-v": print " * Checking hashes" | |
lines = subprocess.check_output('chkbit -verify "{0}"'.format(path), stderr=subprocess.STDOUT,shell=True) | |
changed = process_chkbit(path, lines) | |
except subprocess.CalledProcessError as e: | |
print "Error: %s" % e | |
changed = process_chkbit(path, e.output) | |
if len(files): | |
# Check existing parity files | |
updated = False | |
for f in files: | |
entry_with_path = file_with_path(path, f) | |
if entry_with_path not in changed:# <-- can't work since changed might include very nested files. | |
# Just re-check everything | |
rc = check_parity(path, f) | |
if rc == par2_missing: | |
if do_missing or do_update: | |
update_parity(path, f) | |
updated = True | |
results[entry_with_path] = "A" | |
elif do_repair and rc == par2_repair: | |
if repair_with_parity(path, f): | |
results[entry_with_path] = "F" | |
else: | |
print "Avoided duplicate scan for "+entry_with_path | |
if updated: | |
print " " | |
if len(files) and not os.path.isfile(path+os.sep+".chkbit"): | |
if do_missing: | |
# Create the chkbit file if it was missing | |
# But do it after we validated any existing parity files | |
lines = subprocess.check_output('chkbit "{0}"'.format(path), stderr=subprocess.STDOUT,shell=True) | |
changed = process_chkbit(path, lines) | |
else: | |
results[path+os.sep+".chkbit"] = "??" | |
if do_recursive: | |
for entry in directories: | |
verify_directory(entry) | |
def show_results(): | |
errors=[] | |
result_keys = results.keys() | |
if result_keys and len(result_keys): | |
if do_fast: | |
print "\n\nChecked {0} directories:".format(files_checked) | |
else: | |
print "\n\nChecked {0} files:".format(files_checked) | |
for f in sorted(result_keys): | |
if results[f] == 'u': | |
print " * '{0}' updated".format(f) | |
elif results[f] == '--': | |
print " '{0}' skipped".format(f) | |
elif results[f] == '^': | |
print " '{0}' tracked".format(f) | |
elif results[f] == 'a': | |
print " + '{0}' added".format(f) | |
elif results[f] == 'A': | |
print " ++ '{0}' created missing parity file".format(f) | |
elif results[f] == 'F': | |
print " ! '{0}' fixed".format(f) | |
elif results[f] == '?': | |
print " ? '{0}' not tracked".format(f) | |
elif results[f] == '??' or results[f] == 'E' or results[f] == 'r' or results[f] == 'P' or results[f] == 'R' or results[f] == 'M': | |
errors.append(f) | |
else: | |
print " !! '{0}' unknown key {1}".format(f, results[f]) | |
if errors: | |
print "Detected '{0}' errors".format(len(errors)) | |
for f in errors: | |
if results[f] == 'E': | |
print " !! '{0}' corrupted checksum".format(f) | |
elif results[f] == 'P': | |
print " !! '{0}' corrupted".format(f) | |
elif results[f] == 'M': | |
print " !! '{0}' missing".format(f) | |
elif results[f] == 'r': | |
print " !! '{0}' requires repair (hash)".format(f) | |
elif results[f] == 'R': | |
print " !! '{0}' requires repair (parity)".format(f) | |
elif results[f] == '??': | |
print " ?? '{0}' missing parity file".format(f) | |
sys.exit(1) | |
try: | |
for s in sources: | |
verify_directory(s) | |
except ValueError as e: | |
print "Processing halted: {0}".format(e) | |
show_results() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment