Last active
September 28, 2018 18:47
-
-
Save esaborit4code/f90fab9b2fe079639ea91a7c1cc75f67 to your computer and use it in GitHub Desktop.
Ignore or rename files with duplicated name
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'fileutils' | |
require 'byebug' | |
VERBOSE = false | |
DRY_RUN = false | |
class Conflict | |
SUFFIX_SEPARATOR = '-' | |
require 'digest' | |
# https://github.com/tonytonyjan/exif | |
# gem install exif | |
require 'exif' | |
def initialize(input_file_path, original_output_file_path, conflicts_output_path, suffix = nil) | |
@suffix = suffix | |
@input_file_path = input_file_path | |
@file_name = File.basename @input_file_path | |
@original_output_file_path = original_output_file_path | |
@output_dir = "#{File.dirname @original_output_file_path}/" | |
@output_file_path = suffixed_output_file_path(@suffix) | |
@conflicts_output_path = conflicts_output_path | |
end | |
def resolve | |
if conflicted? | |
copy_to "#{@conflicts_output_path}#{@file_name}" | |
return :copied_to_conflicts | |
end | |
return :duplicated if duplicated? | |
resolve_with_new_name | |
end | |
private | |
def conflicted? | |
same_date? && !same_size? | |
end | |
def duplicated? | |
(same_date? && same_size?) || same_sha? | |
end | |
def resolve_with_new_name | |
new_suffix = @suffix.to_i + 1 | |
new_output_file_path = suffixed_output_file_path(new_suffix) | |
if File.exist?(new_output_file_path) | |
Conflict.new(@input_file_path, @original_output_file_path, @conflicts_output_path, new_suffix).resolve | |
else | |
copy_to new_output_file_path | |
:copied_with_new_name | |
end | |
end | |
def same_size? | |
File.size(@input_file_path) == File.size(@output_file_path) | |
end | |
def same_date? | |
input_date = exif_file_date(@input_file_path) | |
output_date = exif_file_date(@output_file_path) | |
return false if input_date.nil? && output_date.nil? | |
input_date == output_date | |
end | |
def same_sha? | |
# NOTE: MD5 seems to change each time a file is imported | |
Digest::MD5.file(@input_file_path) == Digest::MD5.file(@output_file_path) | |
end | |
def exif_file_date(file_path) | |
Exif::Data.new(File.open(file_path)).date_time_original | |
rescue Exif::NotReadable | |
nil | |
end | |
def suffixed_file_name(file_name, suffix) | |
return file_name unless suffix | |
file_extension = File.extname(file_name) | |
file_name_without_extension = File.basename(file_name, file_extension) | |
"#{file_name_without_extension}#{SUFFIX_SEPARATOR}#{suffix}#{file_extension}" | |
end | |
def suffixed_output_file_path(suffix) | |
"#{@output_dir}#{suffixed_file_name(@file_name, suffix)}" | |
end | |
def copy_to(output_file_path) | |
copy(@input_file_path, output_file_path) | |
end | |
end | |
def copy(from, to) | |
FileUtils.copy(from, to, noop: DRY_RUN, verbose: VERBOSE) | |
end | |
def run | |
input_path = ARGV[0].dup | |
input_path = "#{Dir.pwd}/#{input_path}" unless input_path.start_with?('/') | |
input_path.gsub!('//', '/') | |
input_file_names = Dir["#{input_path}*"].select { |file_path| File.file? file_path } | |
.map { |file_path| File.basename file_path } | |
output_path = ARGV[1].dup | |
output_path = "#{Dir.pwd}/#{output_path}" unless output_path.start_with?('/') | |
output_path.gsub!('//', '/') | |
output_file_names = Dir["#{output_path}*"].select { |file_path| File.file? file_path } | |
.map { |file_path| File.basename file_path } | |
conflicts_output_path = "#{output_path}conflicts/" | |
Dir.mkdir(conflicts_output_path) unless File.exist?(conflicts_output_path) | |
conflicting_file_names = input_file_names & output_file_names | |
non_conflicting_file_names = input_file_names - conflicting_file_names | |
puts "Copying #{non_conflicting_file_names.size} files without conflicts" | |
non_conflicting_file_names.each do |file_name| | |
input_file_path = "#{input_path}#{file_name}" | |
output_file_path = "#{output_path}#{file_name}" | |
copy(input_file_path, output_file_path) | |
end | |
puts "Resolving #{conflicting_file_names.size} file name conflicts" | |
results = { | |
duplicated: [], | |
copied_with_new_name: [], | |
copied_to_conflicts: [] | |
} | |
conflicting_file_names.each do |file_name| | |
input_file_path = "#{input_path}#{file_name}" | |
output_file_path = "#{output_path}#{file_name}" | |
resolution = Conflict.new(input_file_path, output_file_path, conflicts_output_path).resolve | |
results[resolution] << file_name | |
end | |
puts "\t#{results[:duplicated].size} duplicates were ignored\n"\ | |
"\t#{results[:copied_with_new_name].size} were copied with new name\n"\ | |
"\t#{results[:copied_to_conflicts].size} were copied to conflicts" | |
end | |
run |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment