-
-
Save rpanachi/aa8a18bf090b580d6c1c2d4e9c6f51c6 to your computer and use it in GitHub Desktop.
require 'FileUtils' | |
require 'json' | |
class GooglePhotosFixer | |
METADATA_JSON = "supplemental-metadata.json" | |
SUPPORTED_IMAGE_EXT = %w(.jpg .jpeg .png .gif .webp .heic .mov .mp4 .3gp .avi .mkv .webm) | |
attr_reader :fixes, :errors, :takeout_dir | |
def initialize(takeout_dir) | |
@takeout_dir = takeout_dir | |
reset! | |
end | |
def reset! | |
@fixes = [] | |
@errors = [] | |
end | |
def filename(fullpath_filename) | |
File.basename(fullpath_filename) | |
end | |
def filename_without_ext(filename) | |
File.basename(filename).gsub(File.extname(filename), '') | |
end | |
def copy_file(origin, destination) | |
FileUtils.cp(origin, destination) | |
fixes << "#{filename(origin)} copied to #{filename(destination)}" | |
end | |
def move_file(origin, destination) | |
FileUtils.mv(origin, destination) | |
fixes << "#{filename(origin)} moved to #{filename(destination)}" | |
end | |
def delete_file(origin) | |
FileUtils.rm(origin) | |
end | |
def write_file(name, content) | |
File.open(name, 'w') do |f| | |
f.write(content) | |
end | |
fixes << "#{filename(name)} written" | |
end | |
# Returns the default expected metadata filename | |
# image_file: 20210529_155539.jpg | |
# return: 20210529_155539.jpg.supplemental-metadata.json | |
def metadata_file_for(image_file) | |
"#{image_file}.#{METADATA_JSON}" | |
end | |
# Try detect the timestamp from file name pattern | |
def infer_time_from_image_file(image_file) | |
# for 20210529_155539 patterns | |
filename = filename_without_ext(image_file) | |
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})\_(\d{2})(\d{2})(\d{2})/).flatten | |
if tokens.compact == 6 | |
return Time.new(*tokens) | |
end | |
# for CameraZOOM-20131224200623261 patterns | |
# for CameraZOOM-2013 12 24 20 06 23 261 patterns | |
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\d{3})/).flatten | |
if tokens.compact == 7 | |
return Time.new(*tokens) | |
end | |
# for DJI_20250308180700_0070_D patterns | |
tokens = filename.scan(/\_(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\_/).flatten | |
if tokens.compact == 6 | |
return Time.new(*tokens) | |
end | |
# for Photos from 2024/P01020304.jpg or 2024/IMG_123123.jpg pattern | |
tokens = image_file.scan(/Photos\ from\ (\d{4})\//).flatten | |
if tokens.compact == 1 | |
return Time.new(*tokens) | |
end | |
return nil | |
end | |
# Fallback to generate a metadata filename based on filename pattern | |
# image file: 20210529_155539.jpg | |
# generated metadata: 20210529_155539.jpg.supplemental-metadata.json | |
# time on metadata: 2021-05-29 15:55:39 | |
def generate_metadata_for_image_file(image_file) | |
metadata_filename = metadata_file_for(image_file) | |
return if File.exist?(metadata_filename) | |
filename = filename_without_ext(image_file) | |
if time = infer_time_from_image_file(image_file) | |
json_content = { | |
"title" => filename(image_file), | |
"description": "Metadata inferred from #{filename}", | |
"imageViews": "1", | |
"creationTime": { | |
"timestamp": time.to_i.to_s, | |
"formatted": time.to_s | |
}, | |
"photoTakenTime": { | |
"timestamp": time.to_i.to_s, | |
"formatted": time.to_s | |
} | |
} | |
write_file(metadata_filename, content.to_json) | |
else | |
errors << "Unable to infer metadata for #{image_file}" | |
end | |
end | |
# normalize truncated json metadata filenames | |
# original: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.suppl.json | |
# fixed: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.supplemental-metadata.json | |
def fix_divergent_metadata_filename(json_file) | |
unless json_file.end_with?(METADATA_JSON) | |
meta_ext, meta_filename, img_ext, img_file, others = json_file.split('.').reverse | |
fixed_json_file = json_file.gsub("#{meta_filename}.#{meta_ext}", METADATA_JSON) | |
move_file(json_file, fixed_json_file) | |
json_file = fixed_json_file | |
end | |
json_file | |
end | |
# for cases like: | |
# 20210529_155539.jpg | |
# 20210529_155539(1).jpg | |
# 20210529_155539-editada.jpg | |
# 20210529_155539.jpg.supplemental-metadata.json | |
# 20210529_155539.jpg.supplemental-metadata(1).json | |
def fix_metadata_file_for_image(image_file) | |
# Create a metadata json for image "-editada" version | |
# image file: 20210529_155539-editada.jpg | |
# metadata file: 20210529_155539-editada.jpg.supplemental-metadata.json | |
if image_file.index("-editada") | |
original_file = image_file.gsub("-editada", "") | |
original_meta = "#{original_file}.#{METADATA_JSON}" | |
if File.exist?(original_meta) | |
edited_meta = "#{image_file}.#{METADATA_JSON}" | |
copy_file(original_meta, edited_meta) | |
end | |
end | |
# fix metadata filenames for sequencial images filenames | |
# image file: 20210529_155539(1).jpg | |
# wrong metadata: 20210529_155539.jpg.supplemental-metadata(1).json | |
# fixed metadata: 20210529_155539(1).jpg.supplemental-metadata.json | |
matched = filename_without_ext(image_file).match(/(?<num>\(\d+\)$)/) | |
if matched | |
num = matched[:num] | |
filename_without_num = filename(image_file).gsub(num, "") | |
dir = File.dirname(image_file) | |
wrong_json_file = File.join(dir, "#{filename_without_num}.supplemental-metadata#{num}.json") | |
fixed_json_file = File.join(dir, "#{filename(image_file)}.#{METADATA_JSON}") | |
if File.exist?(wrong_json_file) | |
if File.exist?(fixed_json_file) | |
errors << "Metadata file already exist: #{fixed_json_file}" | |
else | |
move_file(wrong_json_file, fixed_json_file) | |
end | |
else | |
errors << "Metadata file: #{wrong_json_file} not exist for image: #{image_file}" | |
end | |
end | |
image_file | |
end | |
def execute | |
reset! | |
all_files = Dir.glob(File.join(takeout_dir, "/**/*")) | |
puts "Total files found on #{takeout_dir}: #{all_files.size}" | |
years_files = all_files.select { |f| File.dirname(f).match?(/Photos\ from\ (\d+)$/) } | |
puts "Total photos from YYYY dirs found: #{years_files.size}" | |
image_files = years_files.select { |f| SUPPORTED_IMAGE_EXT.include?(File.extname(f).downcase) } | |
puts "Total supported photos formats found: #{image_files.size}" | |
json_files = years_files.select { |f| File.extname(f).downcase == '.json' } | |
puts "Total metadata files found: #{json_files.size}" | |
json_files = json_files.map do |json_file| | |
fix_divergent_metadata_filename(json_file) | |
end | |
image_files = image_files.map do |image_file| | |
fixed_metadata = fix_metadata_file_for_image(image_file) | |
generate_metadata_for_image_file(image_file) | |
fixed_metadata | |
end | |
if errors.size > 0 | |
puts "\nProcess finalized with #{errors.size} errors:" | |
errors.each_with_index do |error, index| | |
puts "[#{index+1}/#{errors.size}] #{error}" | |
end | |
end | |
if fixes.size > 0 | |
puts "\nProcess finalized with #{fixes.size} fixes:" | |
fixes.each_with_index do |fix, index| | |
puts "[#{index+1}/#{fixes.size}] #{fix}" | |
end | |
end | |
not_found = image_files.select do |img| | |
!File.exist?(metadata_file_for(img)) | |
end | |
if not_found.size > 0 | |
puts "\nMetadata not found for #{not_found.size} files:" | |
not_found.each_with_index do |file, index| | |
puts "[#{index+1}/#{not_found.size}] #{file}" | |
end | |
end | |
end | |
end | |
takeout_dir = ARGV[0] || raise("Usage: ruby fix_metadata.rb path/to/takeout/dir/") | |
fixer = GooglePhotosFixer.new(takeout_dir) | |
fixer.execute |
As well as Rodrigo's catch on it throwing an error when not required; I have found a situation where the wrong json is associated with the image.
This resulted in the 0710(1) jpg being associated with the 0710 jpg and removing the other file.
It appears that this is done in the "fix_divergent_metadata_filename" method stage before it can be caught by the "fix_metadata_file_for_image" method.
I am just playing with it at the moment, but I have switched the order around and it has fixed my scenario, I have not checked if it breaks anything else yet.
image_files = image_files.map do |image_file|
fixed_metadata = fix_metadata_file_for_image(image_file)
fixed_metadata
end
json_files = json_files.map do |json_file|
fix_divergent_metadata_filename(json_file)
end
image_files = image_files.map do |image_file|
generate_metadata_for_image_file(image_file)
end
This is the output with the correct association intended by the original scripts.
From the output, some troubleshooting still to do, but thought I would share what I have so far.
Hi Tetin-cph, your contribution helped me a lot too.
I had some files with (1) not previously correctly handled, and your code fix them.
I have another correction for the "Metadata not found " issue.
In your modified code, the use of map on image_files replaces all values in this array by an empty value. We can either add "image_file" at the end of the last map block or better replace it by a .each block code.
I suggest the latter :
image_files = image_files.map do |image_file|
fixed_metadata = fix_metadata_file_for_image(image_file)
fixed_metadata
end
json_files = json_files.map do |json_file|
fix_divergent_metadata_filename(json_file)
end
image_files.each do |image_file|
generate_metadata_for_image_file(image_file)
end
I hope there is not any side effects of this.
I add the complete file here :
require 'FileUtils'
require 'json'
class GooglePhotosFixer
METADATA_JSON = "supplemental-metadata.json"
SUPPORTED_IMAGE_EXT = %w(.jpg .jpeg .png .gif .webp .heic .mov .mp4 .3gp .avi .mkv .webm)
attr_reader :fixes, :errors, :takeout_dir
def initialize(takeout_dir)
@takeout_dir = takeout_dir
reset!
end
def reset!
@fixes = []
@errors = []
end
def filename(fullpath_filename)
File.basename(fullpath_filename)
end
def filename_without_ext(filename)
File.basename(filename).gsub(File.extname(filename), '')
end
def copy_file(origin, destination)
FileUtils.cp(origin, destination)
fixes << "#{filename(origin)} copied to #{filename(destination)}"
end
def move_file(origin, destination)
FileUtils.mv(origin, destination)
fixes << "#{filename(origin)} moved to #{filename(destination)}"
end
def delete_file(origin)
FileUtils.rm(origin)
end
def write_file(name, content)
File.open(name, 'w') do |f|
f.write(content)
end
fixes << "#{filename(name)} written"
end
# Returns the default expected metadata filename
# image_file: 20210529_155539.jpg
# return: 20210529_155539.jpg.supplemental-metadata.json
def metadata_file_for(image_file)
"#{image_file}.#{METADATA_JSON}"
end
# Try detect the timestamp from file name pattern
def infer_time_from_image_file(image_file)
# for 20210529_155539 patterns
filename = filename_without_ext(image_file)
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})\_(\d{2})(\d{2})(\d{2})/).flatten
if tokens.compact == 6
return Time.new(*tokens)
end
# for CameraZOOM-20131224200623261 patterns
# for CameraZOOM-2013 12 24 20 06 23 261 patterns
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\d{3})/).flatten
if tokens.compact == 7
return Time.new(*tokens)
end
# for DJI_20250308180700_0070_D patterns
tokens = filename.scan(/\_(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\_/).flatten
if tokens.compact == 6
return Time.new(*tokens)
end
# for Photos from 2024/P01020304.jpg or 2024/IMG_123123.jpg pattern
tokens = image_file.scan(/Photos\ from\ (\d{4})\//).flatten
if tokens.compact == 1
return Time.new(*tokens)
end
return nil
end
# Fallback to generate a metadata filename based on filename pattern
# image file: 20210529_155539.jpg
# generated metadata: 20210529_155539.jpg.supplemental-metadata.json
# time on metadata: 2021-05-29 15:55:39
def generate_metadata_for_image_file(image_file)
metadata_filename = metadata_file_for(image_file)
return if File.exist?(metadata_filename)
filename = filename_without_ext(image_file)
if time = infer_time_from_image_file(image_file)
json_content = {
"title" => filename(image_file),
"description": "Metadata inferred from #{filename}",
"imageViews": "1",
"creationTime": {
"timestamp": time.to_i.to_s,
"formatted": time.to_s
},
"photoTakenTime": {
"timestamp": time.to_i.to_s,
"formatted": time.to_s
}
}
write_file(metadata_filename, content.to_json)
else
errors << "Unable to infer metadata for #{image_file}"
end
end
# normalize truncated json metadata filenames
# original: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.suppl.json
# fixed: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.supplemental-metadata.json
def fix_divergent_metadata_filename(json_file)
unless json_file.end_with?(METADATA_JSON)
meta_ext, meta_filename, img_ext, img_file, others = json_file.split('.').reverse
fixed_json_file = json_file.gsub("#{meta_filename}.#{meta_ext}", METADATA_JSON)
move_file(json_file, fixed_json_file)
json_file = fixed_json_file
end
json_file
end
# for cases like:
# 20210529_155539.jpg
# 20210529_155539(1).jpg
# 20210529_155539-editada.jpg
# 20210529_155539.jpg.supplemental-metadata.json
# 20210529_155539.jpg.supplemental-metadata(1).json
def fix_metadata_file_for_image(image_file)
# Create a metadata json for image "-editada" version
# image file: 20210529_155539-editada.jpg
# metadata file: 20210529_155539-editada.jpg.supplemental-metadata.json
if image_file.index("-editada")
original_file = image_file.gsub("-editada", "")
original_meta = "#{original_file}.#{METADATA_JSON}"
if File.exist?(original_meta)
edited_meta = "#{image_file}.#{METADATA_JSON}"
copy_file(original_meta, edited_meta)
end
end
# fix metadata filenames for sequencial images filenames
# image file: 20210529_155539(1).jpg
# wrong metadata: 20210529_155539.jpg.supplemental-metadata(1).json
# fixed metadata: 20210529_155539(1).jpg.supplemental-metadata.json
matched = filename_without_ext(image_file).match(/(?<num>\(\d+\)$)/)
if matched
num = matched[:num]
filename_without_num = filename(image_file).gsub(num, "")
dir = File.dirname(image_file)
wrong_json_file = File.join(dir, "#{filename_without_num}.supplemental-metadata#{num}.json")
fixed_json_file = File.join(dir, "#{filename(image_file)}.#{METADATA_JSON}")
# Contrib LeCollevillais : the script does no longer raise an error when the correct supplemental-metadata.json is present
# Example :
# image file: 20210529_155539(0).jpg
# The json file : 20210529_155539(0).jpgsupplemental-metadata.json is present
# => an error "Metadata file: #{wrong_json_file} not exist for image: #{image_file}" was raised
if !(File.exist?(fixed_json_file))
if File.exist?(wrong_json_file)
move_file(wrong_json_file, fixed_json_file)
else
errors << "Metadata file: neither expected file #{fixed_json_file} nor #{wrong_json_file} does exist for image: #{image_file}"
end
end
end
image_file
end
def execute
reset!
all_files = Dir.glob(File.join(takeout_dir, "/**/*"))
puts "Total files found on #{takeout_dir}: #{all_files.size}"
years_files = all_files.select { |f| File.dirname(f).match?(/Photos\ from\ (\d+)$/) }
puts "Total photos from YYYY dirs found: #{years_files.size}"
image_files = years_files.select { |f| SUPPORTED_IMAGE_EXT.include?(File.extname(f).downcase) }
puts "Total supported photos formats found: #{image_files.size}"
json_files = years_files.select { |f| File.extname(f).downcase == '.json' }
puts "Total metadata files found: #{json_files.size}"
# Contrib of tetin-cph on https://gist.github.com/rpanachi/aa8a18bf090b580d6c1c2d4e9c6f51c6
image_files = image_files.map do |image_file|
fixed_metadata = fix_metadata_file_for_image(image_file)
fixed_metadata
end
json_files = json_files.map do |json_file|
fix_divergent_metadata_filename(json_file)
end
# Contrib LeCollevillais : replace .map by .each
image_files.each do |image_file|
generate_metadata_for_image_file(image_file)
end
if errors.size > 0
puts "\nProcess finalized with #{errors.size} errors:"
errors.each_with_index do |error, index|
puts "[#{index+1}/#{errors.size}] #{error}"
end
end
if fixes.size > 0
puts "\nProcess finalized with #{fixes.size} fixes:"
fixes.each_with_index do |fix, index|
puts "[#{index+1}/#{fixes.size}] #{fix}"
end
end
not_found = image_files.select do |img|
!File.exist?(metadata_file_for(img))
end
if not_found.size > 0
puts "\nMetadata not found for #{not_found.size} files:"
not_found.each_with_index do |file, index|
puts "[#{index+1}/#{not_found.size}] #{file}"
end
end
end
end
takeout_dir = ARGV[0] || raise("Usage: ruby fix_metadata.rb path/to/takeout/dir/")
fixer = GooglePhotosFixer.new(takeout_dir)
fixer.execute
Hi LeCollevillais,
I worked on this a bit more after my posting, I found that because we would not iterate over the json files again, I needed to re-search the directory and reload the json list to ensure it was accurate.
After the listing of the file counts I did the following:
# reset all_files and json_files
all_files
json_files
image_files = image_files.map do |image_file|
fix_metadata_file_for_image(image_file)
end
# repopulate all_files and json_files after fix_metadata_file_for_image renames.
all_files = Dir.glob(File.join(takeout_dir, "/**/*"))
json_files = all_files.select { |f| File.extname(f).downcase == '.json' }
json_files = json_files.map do |json_file|
fix_divergent_metadata_filename(json_file)
end
image_files = image_files.map do |image_file|
generate_metadata_for_image_file(image_file)
end
This avoided some errors of it trying to work on json files that no longer existed, not sure if this fixed the same issue you described for using each instead.
To the end of the fix_metadata_file_for_image method I added the following code as well.
# Attempt fix for image file length too long for GD export
# Identify if the json exists but not already matched due to name truncation
# 00100lrPORTRAIT_00100_BURST20200414105542847_CO.jpg
# 00100lrPORTRAIT_00100_BURST20200414105542847_C.json
# 00000IMG_00000_BURST20200407202134858_COVER.jpg
# 00000IMG_00000_BURST20200407202134858_COVER.jp.json
# 00000PORTRAIT_00000_BURST20200427120733370.jpg
# 00000PORTRAIT_00000_BURST20200427120733370.jpg.json
image_file_name = filename(image_file)
if (image_file_name.size.between?(45,51))
dir = File.dirname(image_file)
common_file_name = image_file_name[0,46]
wrong_json_file = File.join(dir,"#{common_file_name}.json")
fixed_json_file = File.join(dir, "#{image_file_name}.#{METADATA_JSON}")
if !(File.exist?(fixed_json_file))
if File.exist?(wrong_json_file)
move_file(wrong_json_file, fixed_json_file)
else
errors << "Metadata file: neither expected file #{fixed_json_file} nor #{wrong_json_file} exists for image: #{image_file}"
end
end
end
I found that the google export has truncated the names of the files so the json file doesn't match exactly, this attempts to find a json file in this scenario. I don't know how accurate this example is for other's.
I was having issues with the not_found logic at the end with it printing the fixes array randomly, I removed it as it wasn't helping me further anyway.
Hi Rodrigo,
Many thanks for this work. Very very useful to me !!
I suggest an improvement for the sequence beginning at line 150. The code raises an unwanted error if the json file is present with the correct and expected format (x).jpg.supplemental-metadata.json.
I'm not familiar with github so I could not find how to pull a revision (no button for that?) or send you my modified file :/
Here is my suggestion beginning from line 162 :