Skip to content

Instantly share code, notes, and snippets.

@rpanachi
Created March 17, 2025 00:17
Show Gist options
  • Save rpanachi/aa8a18bf090b580d6c1c2d4e9c6f51c6 to your computer and use it in GitHub Desktop.
Save rpanachi/aa8a18bf090b580d6c1c2d4e9c6f51c6 to your computer and use it in GitHub Desktop.
Google Photos metadata fixer script
require 'FileUtils'
require 'json'
class GooglePhotosFixer
METADATA_JSON = "supplemental-metadata.json"
SUPPORTED_IMAGE_EXT = %w(.jpg .jpeg .png .gif .webp .heic .mov .mp4 .3gp .avi .mkv .webm)
attr_reader :fixes, :errors, :takeout_dir
def initialize(takeout_dir)
@takeout_dir = takeout_dir
reset!
end
def reset!
@fixes = []
@errors = []
end
def filename(fullpath_filename)
File.basename(fullpath_filename)
end
def filename_without_ext(filename)
File.basename(filename).gsub(File.extname(filename), '')
end
def copy_file(origin, destination)
FileUtils.cp(origin, destination)
fixes << "#{filename(origin)} copied to #{filename(destination)}"
end
def move_file(origin, destination)
FileUtils.mv(origin, destination)
fixes << "#{filename(origin)} moved to #{filename(destination)}"
end
def delete_file(origin)
FileUtils.rm(origin)
end
def write_file(name, content)
File.open(name, 'w') do |f|
f.write(content)
end
fixes << "#{filename(name)} written"
end
# Returns the default expected metadata filename
# image_file: 20210529_155539.jpg
# return: 20210529_155539.jpg.supplemental-metadata.json
def metadata_file_for(image_file)
"#{image_file}.#{METADATA_JSON}"
end
# Try detect the timestamp from file name pattern
def infer_time_from_image_file(image_file)
# for 20210529_155539 patterns
filename = filename_without_ext(image_file)
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})\_(\d{2})(\d{2})(\d{2})/).flatten
if tokens.compact == 6
return Time.new(*tokens)
end
# for CameraZOOM-20131224200623261 patterns
# for CameraZOOM-2013 12 24 20 06 23 261 patterns
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\d{3})/).flatten
if tokens.compact == 7
return Time.new(*tokens)
end
# for DJI_20250308180700_0070_D patterns
tokens = filename.scan(/\_(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\_/).flatten
if tokens.compact == 6
return Time.new(*tokens)
end
# for Photos from 2024/P01020304.jpg or 2024/IMG_123123.jpg pattern
tokens = image_file.scan(/Photos\ from\ (\d{4})\//).flatten
if tokens.compact == 1
return Time.new(*tokens)
end
return nil
end
# Fallback to generate a metadata filename based on filename pattern
# image file: 20210529_155539.jpg
# generated metadata: 20210529_155539.jpg.supplemental-metadata.json
# time on metadata: 2021-05-29 15:55:39
def generate_metadata_for_image_file(image_file)
metadata_filename = metadata_file_for(image_file)
return if File.exist?(metadata_filename)
filename = filename_without_ext(image_file)
if time = infer_time_from_image_file(image_file)
json_content = {
"title" => filename(image_file),
"description": "Metadata inferred from #{filename}",
"imageViews": "1",
"creationTime": {
"timestamp": time.to_i.to_s,
"formatted": time.to_s
},
"photoTakenTime": {
"timestamp": time.to_i.to_s,
"formatted": time.to_s
}
}
write_file(metadata_filename, content.to_json)
else
errors << "Unable to infer metadata for #{image_file}"
end
end
# normalize truncated json metadata filenames
# original: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.suppl.json
# fixed: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.supplemental-metadata.json
def fix_divergent_metadata_filename(json_file)
unless json_file.end_with?(METADATA_JSON)
meta_ext, meta_filename, img_ext, img_file, others = json_file.split('.').reverse
fixed_json_file = json_file.gsub("#{meta_filename}.#{meta_ext}", METADATA_JSON)
move_file(json_file, fixed_json_file)
json_file = fixed_json_file
end
json_file
end
# for cases like:
# 20210529_155539.jpg
# 20210529_155539(1).jpg
# 20210529_155539-editada.jpg
# 20210529_155539.jpg.supplemental-metadata.json
# 20210529_155539.jpg.supplemental-metadata(1).json
def fix_metadata_file_for_image(image_file)
# Create a metadata json for image "-editada" version
# image file: 20210529_155539-editada.jpg
# metadata file: 20210529_155539-editada.jpg.supplemental-metadata.json
if image_file.index("-editada")
original_file = image_file.gsub("-editada", "")
original_meta = "#{original_file}.#{METADATA_JSON}"
if File.exist?(original_meta)
edited_meta = "#{image_file}.#{METADATA_JSON}"
copy_file(original_meta, edited_meta)
end
end
# fix metadata filenames for sequencial images filenames
# image file: 20210529_155539(1).jpg
# wrong metadata: 20210529_155539.jpg.supplemental-metadata(1).json
# fixed metadata: 20210529_155539(1).jpg.supplemental-metadata.json
matched = filename_without_ext(image_file).match(/(?<num>\(\d+\)$)/)
if matched
num = matched[:num]
filename_without_num = filename(image_file).gsub(num, "")
dir = File.dirname(image_file)
wrong_json_file = File.join(dir, "#{filename_without_num}.supplemental-metadata#{num}.json")
fixed_json_file = File.join(dir, "#{filename(image_file)}.#{METADATA_JSON}")
if File.exist?(wrong_json_file)
if File.exist?(fixed_json_file)
errors << "Metadata file already exist: #{fixed_json_file}"
else
move_file(wrong_json_file, fixed_json_file)
end
else
errors << "Metadata file: #{wrong_json_file} not exist for image: #{image_file}"
end
end
image_file
end
def execute
reset!
all_files = Dir.glob(File.join(takeout_dir, "/**/*"))
puts "Total files found on #{takeout_dir}: #{all_files.size}"
years_files = all_files.select { |f| File.dirname(f).match?(/Photos\ from\ (\d+)$/) }
puts "Total photos from YYYY dirs found: #{years_files.size}"
image_files = years_files.select { |f| SUPPORTED_IMAGE_EXT.include?(File.extname(f).downcase) }
puts "Total supported photos formats found: #{image_files.size}"
json_files = years_files.select { |f| File.extname(f).downcase == '.json' }
puts "Total metadata files found: #{json_files.size}"
json_files = json_files.map do |json_file|
fix_divergent_metadata_filename(json_file)
end
image_files = image_files.map do |image_file|
fixed_metadata = fix_metadata_file_for_image(image_file)
generate_metadata_for_image_file(image_file)
fixed_metadata
end
if errors.size > 0
puts "\nProcess finalized with #{errors.size} errors:"
errors.each_with_index do |error, index|
puts "[#{index+1}/#{errors.size}] #{error}"
end
end
if fixes.size > 0
puts "\nProcess finalized with #{fixes.size} fixes:"
fixes.each_with_index do |fix, index|
puts "[#{index+1}/#{fixes.size}] #{fix}"
end
end
not_found = image_files.select do |img|
!File.exist?(metadata_file_for(img))
end
if not_found.size > 0
puts "\nMetadata not found for #{not_found.size} files:"
not_found.each_with_index do |file, index|
puts "[#{index+1}/#{not_found.size}] #{file}"
end
end
end
end
takeout_dir = ARGV[0] || raise("Usage: ruby fix_metadata.rb path/to/takeout/dir/")
fixer = GooglePhotosFixer.new(takeout_dir)
fixer.execute
@Tetin-cph
Copy link

Hi LeCollevillais,
I worked on this a bit more after my posting, I found that because we would not iterate over the json files again, I needed to re-search the directory and reload the json list to ensure it was accurate.

After the listing of the file counts I did the following:

# reset all_files and json_files
all_files
json_files

image_files = image_files.map do |image_file|
  fix_metadata_file_for_image(image_file)
end

# repopulate all_files and json_files after fix_metadata_file_for_image renames.
all_files = Dir.glob(File.join(takeout_dir, "/**/*"))
json_files = all_files.select { |f| File.extname(f).downcase == '.json' }

json_files = json_files.map do |json_file|
  fix_divergent_metadata_filename(json_file)
end

image_files = image_files.map do |image_file|
  generate_metadata_for_image_file(image_file)
end

This avoided some errors of it trying to work on json files that no longer existed, not sure if this fixed the same issue you described for using each instead.

To the end of the fix_metadata_file_for_image method I added the following code as well.

# Attempt fix for image file length too long for GD export
# Identify if the json exists but not already matched due to name truncation
# 00100lrPORTRAIT_00100_BURST20200414105542847_CO.jpg
# 00100lrPORTRAIT_00100_BURST20200414105542847_C.json
# 00000IMG_00000_BURST20200407202134858_COVER.jpg
# 00000IMG_00000_BURST20200407202134858_COVER.jp.json
# 00000PORTRAIT_00000_BURST20200427120733370.jpg
# 00000PORTRAIT_00000_BURST20200427120733370.jpg.json

image_file_name = filename(image_file)

if (image_file_name.size.between?(45,51))

  dir = File.dirname(image_file)
  common_file_name = image_file_name[0,46]
  wrong_json_file = File.join(dir,"#{common_file_name}.json")
  fixed_json_file = File.join(dir, "#{image_file_name}.#{METADATA_JSON}")

  if !(File.exist?(fixed_json_file))
   if File.exist?(wrong_json_file)
     move_file(wrong_json_file, fixed_json_file)
   else
     errors << "Metadata file: neither expected file #{fixed_json_file} nor #{wrong_json_file} exists for image: #{image_file}"
   end
 end

end

I found that the google export has truncated the names of the files so the json file doesn't match exactly, this attempts to find a json file in this scenario. I don't know how accurate this example is for other's.

I was having issues with the not_found logic at the end with it printing the fixes array randomly, I removed it as it wasn't helping me further anyway.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment