-
-
Save rpanachi/aa8a18bf090b580d6c1c2d4e9c6f51c6 to your computer and use it in GitHub Desktop.
require 'FileUtils' | |
require 'json' | |
class GooglePhotosFixer | |
METADATA_JSON = "supplemental-metadata.json" | |
SUPPORTED_IMAGE_EXT = %w(.jpg .jpeg .png .gif .webp .heic .mov .mp4 .3gp .avi .mkv .webm) | |
attr_reader :fixes, :errors, :takeout_dir | |
def initialize(takeout_dir) | |
@takeout_dir = takeout_dir | |
reset! | |
end | |
def reset! | |
@fixes = [] | |
@errors = [] | |
end | |
def filename(fullpath_filename) | |
File.basename(fullpath_filename) | |
end | |
def filename_without_ext(filename) | |
File.basename(filename).gsub(File.extname(filename), '') | |
end | |
def copy_file(origin, destination) | |
FileUtils.cp(origin, destination) | |
fixes << "#{filename(origin)} copied to #{filename(destination)}" | |
end | |
def move_file(origin, destination) | |
FileUtils.mv(origin, destination) | |
fixes << "#{filename(origin)} moved to #{filename(destination)}" | |
end | |
def delete_file(origin) | |
FileUtils.rm(origin) | |
end | |
def write_file(name, content) | |
File.open(name, 'w') do |f| | |
f.write(content) | |
end | |
fixes << "#{filename(name)} written" | |
end | |
# Returns the default expected metadata filename | |
# image_file: 20210529_155539.jpg | |
# return: 20210529_155539.jpg.supplemental-metadata.json | |
def metadata_file_for(image_file) | |
"#{image_file}.#{METADATA_JSON}" | |
end | |
# Try detect the timestamp from file name pattern | |
def infer_time_from_image_file(image_file) | |
# for 20210529_155539 patterns | |
filename = filename_without_ext(image_file) | |
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})\_(\d{2})(\d{2})(\d{2})/).flatten | |
if tokens.compact == 6 | |
return Time.new(*tokens) | |
end | |
# for CameraZOOM-20131224200623261 patterns | |
# for CameraZOOM-2013 12 24 20 06 23 261 patterns | |
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\d{3})/).flatten | |
if tokens.compact == 7 | |
return Time.new(*tokens) | |
end | |
# for DJI_20250308180700_0070_D patterns | |
tokens = filename.scan(/\_(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\_/).flatten | |
if tokens.compact == 6 | |
return Time.new(*tokens) | |
end | |
# for Photos from 2024/P01020304.jpg or 2024/IMG_123123.jpg pattern | |
tokens = image_file.scan(/Photos\ from\ (\d{4})\//).flatten | |
if tokens.compact == 1 | |
return Time.new(*tokens) | |
end | |
return nil | |
end | |
# Fallback to generate a metadata filename based on filename pattern | |
# image file: 20210529_155539.jpg | |
# generated metadata: 20210529_155539.jpg.supplemental-metadata.json | |
# time on metadata: 2021-05-29 15:55:39 | |
def generate_metadata_for_image_file(image_file) | |
metadata_filename = metadata_file_for(image_file) | |
return if File.exist?(metadata_filename) | |
filename = filename_without_ext(image_file) | |
if time = infer_time_from_image_file(image_file) | |
json_content = { | |
"title" => filename(image_file), | |
"description": "Metadata inferred from #{filename}", | |
"imageViews": "1", | |
"creationTime": { | |
"timestamp": time.to_i.to_s, | |
"formatted": time.to_s | |
}, | |
"photoTakenTime": { | |
"timestamp": time.to_i.to_s, | |
"formatted": time.to_s | |
} | |
} | |
write_file(metadata_filename, content.to_json) | |
else | |
errors << "Unable to infer metadata for #{image_file}" | |
end | |
end | |
# normalize truncated json metadata filenames | |
# original: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.suppl.json | |
# fixed: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.supplemental-metadata.json | |
def fix_divergent_metadata_filename(json_file) | |
unless json_file.end_with?(METADATA_JSON) | |
meta_ext, meta_filename, img_ext, img_file, others = json_file.split('.').reverse | |
fixed_json_file = json_file.gsub("#{meta_filename}.#{meta_ext}", METADATA_JSON) | |
move_file(json_file, fixed_json_file) | |
json_file = fixed_json_file | |
end | |
json_file | |
end | |
# for cases like: | |
# 20210529_155539.jpg | |
# 20210529_155539(1).jpg | |
# 20210529_155539-editada.jpg | |
# 20210529_155539.jpg.supplemental-metadata.json | |
# 20210529_155539.jpg.supplemental-metadata(1).json | |
def fix_metadata_file_for_image(image_file) | |
# Create a metadata json for image "-editada" version | |
# image file: 20210529_155539-editada.jpg | |
# metadata file: 20210529_155539-editada.jpg.supplemental-metadata.json | |
if image_file.index("-editada") | |
original_file = image_file.gsub("-editada", "") | |
original_meta = "#{original_file}.#{METADATA_JSON}" | |
if File.exist?(original_meta) | |
edited_meta = "#{image_file}.#{METADATA_JSON}" | |
copy_file(original_meta, edited_meta) | |
end | |
end | |
# fix metadata filenames for sequencial images filenames | |
# image file: 20210529_155539(1).jpg | |
# wrong metadata: 20210529_155539.jpg.supplemental-metadata(1).json | |
# fixed metadata: 20210529_155539(1).jpg.supplemental-metadata.json | |
matched = filename_without_ext(image_file).match(/(?<num>\(\d+\)$)/) | |
if matched | |
num = matched[:num] | |
filename_without_num = filename(image_file).gsub(num, "") | |
dir = File.dirname(image_file) | |
wrong_json_file = File.join(dir, "#{filename_without_num}.supplemental-metadata#{num}.json") | |
fixed_json_file = File.join(dir, "#{filename(image_file)}.#{METADATA_JSON}") | |
if File.exist?(wrong_json_file) | |
if File.exist?(fixed_json_file) | |
errors << "Metadata file already exist: #{fixed_json_file}" | |
else | |
move_file(wrong_json_file, fixed_json_file) | |
end | |
else | |
errors << "Metadata file: #{wrong_json_file} not exist for image: #{image_file}" | |
end | |
end | |
image_file | |
end | |
def execute | |
reset! | |
all_files = Dir.glob(File.join(takeout_dir, "/**/*")) | |
puts "Total files found on #{takeout_dir}: #{all_files.size}" | |
years_files = all_files.select { |f| File.dirname(f).match?(/Photos\ from\ (\d+)$/) } | |
puts "Total photos from YYYY dirs found: #{years_files.size}" | |
image_files = years_files.select { |f| SUPPORTED_IMAGE_EXT.include?(File.extname(f).downcase) } | |
puts "Total supported photos formats found: #{image_files.size}" | |
json_files = years_files.select { |f| File.extname(f).downcase == '.json' } | |
puts "Total metadata files found: #{json_files.size}" | |
json_files = json_files.map do |json_file| | |
fix_divergent_metadata_filename(json_file) | |
end | |
image_files = image_files.map do |image_file| | |
fixed_metadata = fix_metadata_file_for_image(image_file) | |
generate_metadata_for_image_file(image_file) | |
fixed_metadata | |
end | |
if errors.size > 0 | |
puts "\nProcess finalized with #{errors.size} errors:" | |
errors.each_with_index do |error, index| | |
puts "[#{index+1}/#{errors.size}] #{error}" | |
end | |
end | |
if fixes.size > 0 | |
puts "\nProcess finalized with #{fixes.size} fixes:" | |
fixes.each_with_index do |fix, index| | |
puts "[#{index+1}/#{fixes.size}] #{fix}" | |
end | |
end | |
not_found = image_files.select do |img| | |
!File.exist?(metadata_file_for(img)) | |
end | |
if not_found.size > 0 | |
puts "\nMetadata not found for #{not_found.size} files:" | |
not_found.each_with_index do |file, index| | |
puts "[#{index+1}/#{not_found.size}] #{file}" | |
end | |
end | |
end | |
end | |
takeout_dir = ARGV[0] || raise("Usage: ruby fix_metadata.rb path/to/takeout/dir/") | |
fixer = GooglePhotosFixer.new(takeout_dir) | |
fixer.execute |
Hi LeCollevillais,
I worked on this a bit more after my posting, I found that because we would not iterate over the json files again, I needed to re-search the directory and reload the json list to ensure it was accurate.
After the listing of the file counts I did the following:
# reset all_files and json_files
all_files
json_files
image_files = image_files.map do |image_file|
fix_metadata_file_for_image(image_file)
end
# repopulate all_files and json_files after fix_metadata_file_for_image renames.
all_files = Dir.glob(File.join(takeout_dir, "/**/*"))
json_files = all_files.select { |f| File.extname(f).downcase == '.json' }
json_files = json_files.map do |json_file|
fix_divergent_metadata_filename(json_file)
end
image_files = image_files.map do |image_file|
generate_metadata_for_image_file(image_file)
end
This avoided some errors of it trying to work on json files that no longer existed, not sure if this fixed the same issue you described for using each instead.
To the end of the fix_metadata_file_for_image method I added the following code as well.
# Attempt fix for image file length too long for GD export
# Identify if the json exists but not already matched due to name truncation
# 00100lrPORTRAIT_00100_BURST20200414105542847_CO.jpg
# 00100lrPORTRAIT_00100_BURST20200414105542847_C.json
# 00000IMG_00000_BURST20200407202134858_COVER.jpg
# 00000IMG_00000_BURST20200407202134858_COVER.jp.json
# 00000PORTRAIT_00000_BURST20200427120733370.jpg
# 00000PORTRAIT_00000_BURST20200427120733370.jpg.json
image_file_name = filename(image_file)
if (image_file_name.size.between?(45,51))
dir = File.dirname(image_file)
common_file_name = image_file_name[0,46]
wrong_json_file = File.join(dir,"#{common_file_name}.json")
fixed_json_file = File.join(dir, "#{image_file_name}.#{METADATA_JSON}")
if !(File.exist?(fixed_json_file))
if File.exist?(wrong_json_file)
move_file(wrong_json_file, fixed_json_file)
else
errors << "Metadata file: neither expected file #{fixed_json_file} nor #{wrong_json_file} exists for image: #{image_file}"
end
end
end
I found that the google export has truncated the names of the files so the json file doesn't match exactly, this attempts to find a json file in this scenario. I don't know how accurate this example is for other's.
I was having issues with the not_found logic at the end with it printing the fixes array randomly, I removed it as it wasn't helping me further anyway.
Hi Tetin-cph, your contribution helped me a lot too.
I had some files with (1) not previously correctly handled, and your code fix them.
I have another correction for the "Metadata not found " issue.
In your modified code, the use of map on image_files replaces all values in this array by an empty value. We can either add "image_file" at the end of the last map block or better replace it by a .each block code.
I suggest the latter :
I hope there is not any side effects of this.
I add the complete file here :