Skip to content

Instantly share code, notes, and snippets.

@rpanachi
Created March 17, 2025 00:17
Show Gist options
  • Save rpanachi/aa8a18bf090b580d6c1c2d4e9c6f51c6 to your computer and use it in GitHub Desktop.
Save rpanachi/aa8a18bf090b580d6c1c2d4e9c6f51c6 to your computer and use it in GitHub Desktop.
Google Photos metadata fixer script
require 'FileUtils'
require 'json'
class GooglePhotosFixer
METADATA_JSON = "supplemental-metadata.json"
SUPPORTED_IMAGE_EXT = %w(.jpg .jpeg .png .gif .webp .heic .mov .mp4 .3gp .avi .mkv .webm)
attr_reader :fixes, :errors, :takeout_dir
def initialize(takeout_dir)
@takeout_dir = takeout_dir
reset!
end
def reset!
@fixes = []
@errors = []
end
def filename(fullpath_filename)
File.basename(fullpath_filename)
end
def filename_without_ext(filename)
File.basename(filename).gsub(File.extname(filename), '')
end
def copy_file(origin, destination)
FileUtils.cp(origin, destination)
fixes << "#{filename(origin)} copied to #{filename(destination)}"
end
def move_file(origin, destination)
FileUtils.mv(origin, destination)
fixes << "#{filename(origin)} moved to #{filename(destination)}"
end
def delete_file(origin)
FileUtils.rm(origin)
end
def write_file(name, content)
File.open(name, 'w') do |f|
f.write(content)
end
fixes << "#{filename(name)} written"
end
# Returns the default expected metadata filename
# image_file: 20210529_155539.jpg
# return: 20210529_155539.jpg.supplemental-metadata.json
def metadata_file_for(image_file)
"#{image_file}.#{METADATA_JSON}"
end
# Try detect the timestamp from file name pattern
def infer_time_from_image_file(image_file)
# for 20210529_155539 patterns
filename = filename_without_ext(image_file)
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})\_(\d{2})(\d{2})(\d{2})/).flatten
if tokens.compact == 6
return Time.new(*tokens)
end
# for CameraZOOM-20131224200623261 patterns
# for CameraZOOM-2013 12 24 20 06 23 261 patterns
tokens = filename.scan(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\d{3})/).flatten
if tokens.compact == 7
return Time.new(*tokens)
end
# for DJI_20250308180700_0070_D patterns
tokens = filename.scan(/\_(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\_/).flatten
if tokens.compact == 6
return Time.new(*tokens)
end
# for Photos from 2024/P01020304.jpg or 2024/IMG_123123.jpg pattern
tokens = image_file.scan(/Photos\ from\ (\d{4})\//).flatten
if tokens.compact == 1
return Time.new(*tokens)
end
return nil
end
# Fallback to generate a metadata filename based on filename pattern
# image file: 20210529_155539.jpg
# generated metadata: 20210529_155539.jpg.supplemental-metadata.json
# time on metadata: 2021-05-29 15:55:39
def generate_metadata_for_image_file(image_file)
metadata_filename = metadata_file_for(image_file)
return if File.exist?(metadata_filename)
filename = filename_without_ext(image_file)
if time = infer_time_from_image_file(image_file)
json_content = {
"title" => filename(image_file),
"description": "Metadata inferred from #{filename}",
"imageViews": "1",
"creationTime": {
"timestamp": time.to_i.to_s,
"formatted": time.to_s
},
"photoTakenTime": {
"timestamp": time.to_i.to_s,
"formatted": time.to_s
}
}
write_file(metadata_filename, content.to_json)
else
errors << "Unable to infer metadata for #{image_file}"
end
end
# normalize truncated json metadata filenames
# original: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.suppl.json
# fixed: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.supplemental-metadata.json
def fix_divergent_metadata_filename(json_file)
unless json_file.end_with?(METADATA_JSON)
meta_ext, meta_filename, img_ext, img_file, others = json_file.split('.').reverse
fixed_json_file = json_file.gsub("#{meta_filename}.#{meta_ext}", METADATA_JSON)
move_file(json_file, fixed_json_file)
json_file = fixed_json_file
end
json_file
end
# for cases like:
# 20210529_155539.jpg
# 20210529_155539(1).jpg
# 20210529_155539-editada.jpg
# 20210529_155539.jpg.supplemental-metadata.json
# 20210529_155539.jpg.supplemental-metadata(1).json
def fix_metadata_file_for_image(image_file)
# Create a metadata json for image "-editada" version
# image file: 20210529_155539-editada.jpg
# metadata file: 20210529_155539-editada.jpg.supplemental-metadata.json
if image_file.index("-editada")
original_file = image_file.gsub("-editada", "")
original_meta = "#{original_file}.#{METADATA_JSON}"
if File.exist?(original_meta)
edited_meta = "#{image_file}.#{METADATA_JSON}"
copy_file(original_meta, edited_meta)
end
end
# fix metadata filenames for sequencial images filenames
# image file: 20210529_155539(1).jpg
# wrong metadata: 20210529_155539.jpg.supplemental-metadata(1).json
# fixed metadata: 20210529_155539(1).jpg.supplemental-metadata.json
matched = filename_without_ext(image_file).match(/(?<num>\(\d+\)$)/)
if matched
num = matched[:num]
filename_without_num = filename(image_file).gsub(num, "")
dir = File.dirname(image_file)
wrong_json_file = File.join(dir, "#{filename_without_num}.supplemental-metadata#{num}.json")
fixed_json_file = File.join(dir, "#{filename(image_file)}.#{METADATA_JSON}")
if File.exist?(wrong_json_file)
if File.exist?(fixed_json_file)
errors << "Metadata file already exist: #{fixed_json_file}"
else
move_file(wrong_json_file, fixed_json_file)
end
else
errors << "Metadata file: #{wrong_json_file} not exist for image: #{image_file}"
end
end
image_file
end
def execute
reset!
all_files = Dir.glob(File.join(takeout_dir, "/**/*"))
puts "Total files found on #{takeout_dir}: #{all_files.size}"
years_files = all_files.select { |f| File.dirname(f).match?(/Photos\ from\ (\d+)$/) }
puts "Total photos from YYYY dirs found: #{years_files.size}"
image_files = years_files.select { |f| SUPPORTED_IMAGE_EXT.include?(File.extname(f).downcase) }
puts "Total supported photos formats found: #{image_files.size}"
json_files = years_files.select { |f| File.extname(f).downcase == '.json' }
puts "Total metadata files found: #{json_files.size}"
json_files = json_files.map do |json_file|
fix_divergent_metadata_filename(json_file)
end
image_files = image_files.map do |image_file|
fixed_metadata = fix_metadata_file_for_image(image_file)
generate_metadata_for_image_file(image_file)
fixed_metadata
end
if errors.size > 0
puts "\nProcess finalized with #{errors.size} errors:"
errors.each_with_index do |error, index|
puts "[#{index+1}/#{errors.size}] #{error}"
end
end
if fixes.size > 0
puts "\nProcess finalized with #{fixes.size} fixes:"
fixes.each_with_index do |fix, index|
puts "[#{index+1}/#{fixes.size}] #{fix}"
end
end
not_found = image_files.select do |img|
!File.exist?(metadata_file_for(img))
end
if not_found.size > 0
puts "\nMetadata not found for #{not_found.size} files:"
not_found.each_with_index do |file, index|
puts "[#{index+1}/#{not_found.size}] #{file}"
end
end
end
end
takeout_dir = ARGV[0] || raise("Usage: ruby fix_metadata.rb path/to/takeout/dir/")
fixer = GooglePhotosFixer.new(takeout_dir)
fixer.execute
@h0st1le
Copy link

h0st1le commented Apr 27, 2025

Yo, thank you so much! You saved me a load of time. Cheers

@LeCollevillais
Copy link

Hi Rodrigo,
Many thanks for this work. Very very useful to me !!
I suggest an improvement for the sequence beginning at line 150. The code raises an unwanted error if the json file is present with the correct and expected format (x).jpg.supplemental-metadata.json.

I'm not familiar with github so I could not find how to pull a revision (no button for that?) or send you my modified file :/

Here is my suggestion beginning from line 162 :

# Fixed : the script does no longer raise an error when the correct supplemental-metadata.json is present
# Example :
# image file: 20210529_155539(0).jpg
# The json file : 20210529_155539(0).jpgsupplemental-metadata.json is present
# => an error "Metadata file: #{wrong_json_file} not exist for image: #{image_file}" was raised
  if !(File.exist?(fixed_json_file))
      if File.exist?(wrong_json_file)
        move_file(wrong_json_file, fixed_json_file)
      else
        errors << "Metadata file: neither expected file #{fixed_json_file} nor #{wrong_json_file} does exist for image: #{image_file}"
      end
    end
  end

@Tetin-cph
Copy link

As well as Rodrigo's catch on it throwing an error when not required; I have found a situation where the wrong json is associated with the image.

Original Takeout
image

This is the command output
image

This resulted in the 0710(1) jpg being associated with the 0710 jpg and removing the other file.
It appears that this is done in the "fix_divergent_metadata_filename" method stage before it can be caught by the "fix_metadata_file_for_image" method.

I am just playing with it at the moment, but I have switched the order around and it has fixed my scenario, I have not checked if it breaks anything else yet.

image_files = image_files.map do |image_file|
  fixed_metadata = fix_metadata_file_for_image(image_file)
  fixed_metadata
end

json_files = json_files.map do |json_file|
  fix_divergent_metadata_filename(json_file)
end

image_files = image_files.map do |image_file|
  generate_metadata_for_image_file(image_file)
end

This is the output with the correct association intended by the original scripts.

image

From the output, some troubleshooting still to do, but thought I would share what I have so far.

image

@LeCollevillais
Copy link

Hi Tetin-cph, your contribution helped me a lot too.
I had some files with (1) not previously correctly handled, and your code fix them.
I have another correction for the "Metadata not found " issue.
In your modified code, the use of map on image_files replaces all values in this array by an empty value. We can either add "image_file" at the end of the last map block or better replace it by a .each block code.

I suggest the latter :

    image_files = image_files.map do |image_file|
      fixed_metadata = fix_metadata_file_for_image(image_file)
      fixed_metadata
    end

    json_files = json_files.map do |json_file|
      fix_divergent_metadata_filename(json_file)
    end

    image_files.each do |image_file|
      generate_metadata_for_image_file(image_file)
    end

I hope there is not any side effects of this.

I add the complete file here :

require 'FileUtils'
require 'json'

class GooglePhotosFixer 
  METADATA_JSON = "supplemental-metadata.json"
  SUPPORTED_IMAGE_EXT = %w(.jpg .jpeg .png .gif .webp .heic .mov .mp4 .3gp .avi .mkv .webm)

  attr_reader :fixes, :errors, :takeout_dir

  def initialize(takeout_dir)
    @takeout_dir = takeout_dir
    reset!
  end

  def reset!
    @fixes = []
    @errors = []
  end

  def filename(fullpath_filename)
    File.basename(fullpath_filename)
  end

  def filename_without_ext(filename)
    File.basename(filename).gsub(File.extname(filename), '')
  end

  def copy_file(origin, destination)
    FileUtils.cp(origin, destination)
    fixes << "#{filename(origin)} copied to #{filename(destination)}"
  end

  def move_file(origin, destination)
    FileUtils.mv(origin, destination)
    fixes << "#{filename(origin)} moved to #{filename(destination)}"
  end

  def delete_file(origin)
    FileUtils.rm(origin)
  end

  def write_file(name, content)
    File.open(name, 'w') do |f|
      f.write(content)
    end
    fixes << "#{filename(name)} written"
  end

  # Returns the default expected metadata filename
  # image_file: 20210529_155539.jpg
  # return: 20210529_155539.jpg.supplemental-metadata.json
  def metadata_file_for(image_file)
    "#{image_file}.#{METADATA_JSON}"
  end

  # Try detect the timestamp from file name pattern
  def infer_time_from_image_file(image_file)
    # for 20210529_155539 patterns
    filename = filename_without_ext(image_file)
    tokens = filename.scan(/(\d{4})(\d{2})(\d{2})\_(\d{2})(\d{2})(\d{2})/).flatten
    if tokens.compact == 6
      return Time.new(*tokens)
    end

    # for CameraZOOM-20131224200623261 patterns
    # for CameraZOOM-2013 12 24 20 06 23 261 patterns
    tokens = filename.scan(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\d{3})/).flatten
    if tokens.compact == 7
      return Time.new(*tokens)
    end

    # for DJI_20250308180700_0070_D patterns
    tokens = filename.scan(/\_(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\_/).flatten
    if tokens.compact == 6
      return Time.new(*tokens)
    end

    # for Photos from 2024/P01020304.jpg or 2024/IMG_123123.jpg pattern
    tokens = image_file.scan(/Photos\ from\ (\d{4})\//).flatten
    if tokens.compact == 1
      return Time.new(*tokens)
    end

    return nil
  end

  # Fallback to generate a metadata filename based on filename pattern
  # image file: 20210529_155539.jpg
  # generated metadata: 20210529_155539.jpg.supplemental-metadata.json
  # time on metadata: 2021-05-29 15:55:39
  def generate_metadata_for_image_file(image_file)
    metadata_filename = metadata_file_for(image_file)
    return if File.exist?(metadata_filename)

    filename = filename_without_ext(image_file)
    if time = infer_time_from_image_file(image_file)
      json_content = {
        "title" => filename(image_file),
        "description": "Metadata inferred from #{filename}",
        "imageViews": "1",
        "creationTime": {
          "timestamp": time.to_i.to_s,
          "formatted": time.to_s
        },
        "photoTakenTime": {
          "timestamp": time.to_i.to_s,
          "formatted": time.to_s
        }
      }
      
      write_file(metadata_filename, content.to_json)
    else
      errors << "Unable to infer metadata for #{image_file}"
    end
  end

  # normalize truncated json metadata filenames
  # original: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.suppl.json
  # fixed: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.supplemental-metadata.json
  def fix_divergent_metadata_filename(json_file)
    unless json_file.end_with?(METADATA_JSON)
      meta_ext, meta_filename, img_ext, img_file, others = json_file.split('.').reverse
      fixed_json_file = json_file.gsub("#{meta_filename}.#{meta_ext}", METADATA_JSON)

      move_file(json_file, fixed_json_file)
      json_file = fixed_json_file
    end

    json_file
  end

  # for cases like:
  # 20210529_155539.jpg
  # 20210529_155539(1).jpg
  # 20210529_155539-editada.jpg
  # 20210529_155539.jpg.supplemental-metadata.json
  # 20210529_155539.jpg.supplemental-metadata(1).json
  def fix_metadata_file_for_image(image_file)
    # Create a metadata json for image "-editada" version
    # image file: 20210529_155539-editada.jpg
    # metadata file: 20210529_155539-editada.jpg.supplemental-metadata.json
    if image_file.index("-editada")
      original_file = image_file.gsub("-editada", "")
      original_meta = "#{original_file}.#{METADATA_JSON}"

      if File.exist?(original_meta)
        edited_meta = "#{image_file}.#{METADATA_JSON}"
        copy_file(original_meta, edited_meta)
      end
    end

    # fix metadata filenames for sequencial images filenames
    # image file: 20210529_155539(1).jpg
    # wrong metadata: 20210529_155539.jpg.supplemental-metadata(1).json
    # fixed metadata: 20210529_155539(1).jpg.supplemental-metadata.json
    matched = filename_without_ext(image_file).match(/(?<num>\(\d+\)$)/)
    if matched
      num = matched[:num]
      filename_without_num = filename(image_file).gsub(num, "")
      dir = File.dirname(image_file)

      wrong_json_file = File.join(dir, "#{filename_without_num}.supplemental-metadata#{num}.json")
      fixed_json_file = File.join(dir, "#{filename(image_file)}.#{METADATA_JSON}")


    # Contrib LeCollevillais : the script does no longer raise an error when the correct supplemental-metadata.json is present
    # Example :
    # image file: 20210529_155539(0).jpg
    # The json file : 20210529_155539(0).jpgsupplemental-metadata.json is present
    # => an error "Metadata file: #{wrong_json_file} not exist for image: #{image_file}" was raised
      if !(File.exist?(fixed_json_file))
        if File.exist?(wrong_json_file)
          move_file(wrong_json_file, fixed_json_file)
        else
          errors << "Metadata file: neither expected file #{fixed_json_file} nor #{wrong_json_file} does exist for image: #{image_file}"
        end
      end
    end

    image_file
  end

  def execute
    reset!

    all_files = Dir.glob(File.join(takeout_dir, "/**/*"))
    puts "Total files found on #{takeout_dir}: #{all_files.size}"

    years_files = all_files.select { |f| File.dirname(f).match?(/Photos\ from\ (\d+)$/) }
    puts "Total photos from YYYY dirs found: #{years_files.size}"

    image_files = years_files.select { |f| SUPPORTED_IMAGE_EXT.include?(File.extname(f).downcase) }
    puts "Total supported photos formats found: #{image_files.size}"

    json_files = years_files.select { |f| File.extname(f).downcase == '.json' }
    puts "Total metadata files found: #{json_files.size}"

    # Contrib of tetin-cph on https://gist.github.com/rpanachi/aa8a18bf090b580d6c1c2d4e9c6f51c6
    image_files = image_files.map do |image_file|
      fixed_metadata = fix_metadata_file_for_image(image_file)
      fixed_metadata
    end

    json_files = json_files.map do |json_file|
      fix_divergent_metadata_filename(json_file)
    end
    # Contrib LeCollevillais : replace .map by .each
    image_files.each do |image_file|
      generate_metadata_for_image_file(image_file)
    end

    if errors.size > 0
      puts "\nProcess finalized with #{errors.size} errors:"
      errors.each_with_index do |error, index|
        puts "[#{index+1}/#{errors.size}] #{error}"
      end
    end

    if fixes.size > 0
      puts "\nProcess finalized with #{fixes.size} fixes:"
      fixes.each_with_index do |fix, index|
        puts "[#{index+1}/#{fixes.size}] #{fix}"
      end
    end

    not_found = image_files.select do |img|
      !File.exist?(metadata_file_for(img))
    end

    if not_found.size > 0
      puts "\nMetadata not found for #{not_found.size} files:"
      not_found.each_with_index do |file, index|
        puts "[#{index+1}/#{not_found.size}] #{file}"
      end
    end
  end
end

takeout_dir = ARGV[0] || raise("Usage: ruby fix_metadata.rb path/to/takeout/dir/")
fixer = GooglePhotosFixer.new(takeout_dir)
fixer.execute

@Tetin-cph
Copy link

Hi LeCollevillais,
I worked on this a bit more after my posting, I found that because we would not iterate over the json files again, I needed to re-search the directory and reload the json list to ensure it was accurate.

After the listing of the file counts I did the following:

# reset all_files and json_files
all_files
json_files

image_files = image_files.map do |image_file|
  fix_metadata_file_for_image(image_file)
end

# repopulate all_files and json_files after fix_metadata_file_for_image renames.
all_files = Dir.glob(File.join(takeout_dir, "/**/*"))
json_files = all_files.select { |f| File.extname(f).downcase == '.json' }

json_files = json_files.map do |json_file|
  fix_divergent_metadata_filename(json_file)
end

image_files = image_files.map do |image_file|
  generate_metadata_for_image_file(image_file)
end

This avoided some errors of it trying to work on json files that no longer existed, not sure if this fixed the same issue you described for using each instead.

To the end of the fix_metadata_file_for_image method I added the following code as well.

# Attempt fix for image file length too long for GD export
# Identify if the json exists but not already matched due to name truncation
# 00100lrPORTRAIT_00100_BURST20200414105542847_CO.jpg
# 00100lrPORTRAIT_00100_BURST20200414105542847_C.json
# 00000IMG_00000_BURST20200407202134858_COVER.jpg
# 00000IMG_00000_BURST20200407202134858_COVER.jp.json
# 00000PORTRAIT_00000_BURST20200427120733370.jpg
# 00000PORTRAIT_00000_BURST20200427120733370.jpg.json

image_file_name = filename(image_file)

if (image_file_name.size.between?(45,51))

  dir = File.dirname(image_file)
  common_file_name = image_file_name[0,46]
  wrong_json_file = File.join(dir,"#{common_file_name}.json")
  fixed_json_file = File.join(dir, "#{image_file_name}.#{METADATA_JSON}")

  if !(File.exist?(fixed_json_file))
   if File.exist?(wrong_json_file)
     move_file(wrong_json_file, fixed_json_file)
   else
     errors << "Metadata file: neither expected file #{fixed_json_file} nor #{wrong_json_file} exists for image: #{image_file}"
   end
 end

end

I found that the google export has truncated the names of the files so the json file doesn't match exactly, this attempts to find a json file in this scenario. I don't know how accurate this example is for other's.

I was having issues with the not_found logic at the end with it printing the fixes array randomly, I removed it as it wasn't helping me further anyway.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment