Skip to content

Instantly share code, notes, and snippets.

@rpanachi
Created March 17, 2025 00:17
Show Gist options
  • Save rpanachi/aa8a18bf090b580d6c1c2d4e9c6f51c6 to your computer and use it in GitHub Desktop.
Save rpanachi/aa8a18bf090b580d6c1c2d4e9c6f51c6 to your computer and use it in GitHub Desktop.
Google Photos metadata fixer script
require 'FileUtils'
require 'json'
class GooglePhotosFixer
METADATA_JSON = "supplemental-metadata.json"
SUPPORTED_IMAGE_EXT = %w(.jpg .jpeg .png .gif .webp .heic .mov .mp4 .3gp .avi .mkv .webm)
attr_reader :fixes, :errors, :takeout_dir
def initialize(takeout_dir)
@takeout_dir = takeout_dir
reset!
end
def reset!
@fixes = []
@errors = []
end
def filename(fullpath_filename)
File.basename(fullpath_filename)
end
def filename_without_ext(filename)
File.basename(filename).gsub(File.extname(filename), '')
end
def copy_file(origin, destination)
FileUtils.cp(origin, destination)
fixes << "#{filename(origin)} copied to #{filename(destination)}"
end
def move_file(origin, destination)
FileUtils.mv(origin, destination)
fixes << "#{filename(origin)} moved to #{filename(destination)}"
end
def delete_file(origin)
FileUtils.rm(origin)
end
def write_file(name, content)
File.open(name, 'w') do |f|
f.write(content)
end
fixes << "#{filename(name)} written"
end
# Returns the default expected metadata filename
# image_file: 20210529_155539.jpg
# return: 20210529_155539.jpg.supplemental-metadata.json
def metadata_file_for(image_file)
"#{image_file}.#{METADATA_JSON}"
end
# Try detect the timestamp from file name pattern
def infer_time_from_image_file(image_file)
# for 20210529_155539 patterns
filename = filename_without_ext(image_file)
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})\_(\d{2})(\d{2})(\d{2})/).flatten
if tokens.compact == 6
return Time.new(*tokens)
end
# for CameraZOOM-20131224200623261 patterns
# for CameraZOOM-2013 12 24 20 06 23 261 patterns
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\d{3})/).flatten
if tokens.compact == 7
return Time.new(*tokens)
end
# for DJI_20250308180700_0070_D patterns
tokens = filename.scan(/\_(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\_/).flatten
if tokens.compact == 6
return Time.new(*tokens)
end
# for Photos from 2024/P01020304.jpg or 2024/IMG_123123.jpg pattern
tokens = image_file.scan(/Photos\ from\ (\d{4})\//).flatten
if tokens.compact == 1
return Time.new(*tokens)
end
return nil
end
# Fallback to generate a metadata filename based on filename pattern
# image file: 20210529_155539.jpg
# generated metadata: 20210529_155539.jpg.supplemental-metadata.json
# time on metadata: 2021-05-29 15:55:39
def generate_metadata_for_image_file(image_file)
metadata_filename = metadata_file_for(image_file)
return if File.exist?(metadata_filename)
filename = filename_without_ext(image_file)
if time = infer_time_from_image_file(image_file)
json_content = {
"title" => filename(image_file),
"description": "Metadata inferred from #{filename}",
"imageViews": "1",
"creationTime": {
"timestamp": time.to_i.to_s,
"formatted": time.to_s
},
"photoTakenTime": {
"timestamp": time.to_i.to_s,
"formatted": time.to_s
}
}
write_file(metadata_filename, content.to_json)
else
errors << "Unable to infer metadata for #{image_file}"
end
end
# normalize truncated json metadata filenames
# original: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.suppl.json
# fixed: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.supplemental-metadata.json
def fix_divergent_metadata_filename(json_file)
unless json_file.end_with?(METADATA_JSON)
meta_ext, meta_filename, img_ext, img_file, others = json_file.split('.').reverse
fixed_json_file = json_file.gsub("#{meta_filename}.#{meta_ext}", METADATA_JSON)
move_file(json_file, fixed_json_file)
json_file = fixed_json_file
end
json_file
end
# for cases like:
# 20210529_155539.jpg
# 20210529_155539(1).jpg
# 20210529_155539-editada.jpg
# 20210529_155539.jpg.supplemental-metadata.json
# 20210529_155539.jpg.supplemental-metadata(1).json
def fix_metadata_file_for_image(image_file)
# Create a metadata json for image "-editada" version
# image file: 20210529_155539-editada.jpg
# metadata file: 20210529_155539-editada.jpg.supplemental-metadata.json
if image_file.index("-editada")
original_file = image_file.gsub("-editada", "")
original_meta = "#{original_file}.#{METADATA_JSON}"
if File.exist?(original_meta)
edited_meta = "#{image_file}.#{METADATA_JSON}"
copy_file(original_meta, edited_meta)
end
end
# fix metadata filenames for sequencial images filenames
# image file: 20210529_155539(1).jpg
# wrong metadata: 20210529_155539.jpg.supplemental-metadata(1).json
# fixed metadata: 20210529_155539(1).jpg.supplemental-metadata.json
matched = filename_without_ext(image_file).match(/(?<num>\(\d+\)$)/)
if matched
num = matched[:num]
filename_without_num = filename(image_file).gsub(num, "")
dir = File.dirname(image_file)
wrong_json_file = File.join(dir, "#{filename_without_num}.supplemental-metadata#{num}.json")
fixed_json_file = File.join(dir, "#{filename(image_file)}.#{METADATA_JSON}")
if File.exist?(wrong_json_file)
if File.exist?(fixed_json_file)
errors << "Metadata file already exist: #{fixed_json_file}"
else
move_file(wrong_json_file, fixed_json_file)
end
else
errors << "Metadata file: #{wrong_json_file} not exist for image: #{image_file}"
end
end
image_file
end
def execute
reset!
all_files = Dir.glob(File.join(takeout_dir, "/**/*"))
puts "Total files found on #{takeout_dir}: #{all_files.size}"
years_files = all_files.select { |f| File.dirname(f).match?(/Photos\ from\ (\d+)$/) }
puts "Total photos from YYYY dirs found: #{years_files.size}"
image_files = years_files.select { |f| SUPPORTED_IMAGE_EXT.include?(File.extname(f).downcase) }
puts "Total supported photos formats found: #{image_files.size}"
json_files = years_files.select { |f| File.extname(f).downcase == '.json' }
puts "Total metadata files found: #{json_files.size}"
json_files = json_files.map do |json_file|
fix_divergent_metadata_filename(json_file)
end
image_files = image_files.map do |image_file|
fixed_metadata = fix_metadata_file_for_image(image_file)
generate_metadata_for_image_file(image_file)
fixed_metadata
end
if errors.size > 0
puts "\nProcess finalized with #{errors.size} errors:"
errors.each_with_index do |error, index|
puts "[#{index+1}/#{errors.size}] #{error}"
end
end
if fixes.size > 0
puts "\nProcess finalized with #{fixes.size} fixes:"
fixes.each_with_index do |fix, index|
puts "[#{index+1}/#{fixes.size}] #{fix}"
end
end
not_found = image_files.select do |img|
!File.exist?(metadata_file_for(img))
end
if not_found.size > 0
puts "\nMetadata not found for #{not_found.size} files:"
not_found.each_with_index do |file, index|
puts "[#{index+1}/#{not_found.size}] #{file}"
end
end
end
end
takeout_dir = ARGV[0] || raise("Usage: ruby fix_metadata.rb path/to/takeout/dir/")
fixer = GooglePhotosFixer.new(takeout_dir)
fixer.execute
@h0st1le
Copy link

h0st1le commented Apr 27, 2025

Yo, thank you so much! You saved me a load of time. Cheers

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment