rpanachi · March 17, 2025 00:17 · h0st1le · Apr 27, 2025
diff --git a/fix_metadata.rb b/fix_metadata.rb
 require 'FileUtils'
 require 'json'

 class GooglePhotosFixer 
  METADATA_JSON = "supplemental-metadata.json"
  SUPPORTED_IMAGE_EXT = %w(.jpg .jpeg .png .gif .webp .heic .mov .mp4 .3gp .avi .mkv .webm)

  attr_reader :fixes, :errors, :takeout_dir

  def initialize(takeout_dir)
    @takeout_dir = takeout_dir
    reset!
  end

  def reset!
    @fixes = []
    @errors = []
  end

  def filename(fullpath_filename)
    File.basename(fullpath_filename)
  end

  def filename_without_ext(filename)
    File.basename(filename).gsub(File.extname(filename), '')
  end

  def copy_file(origin, destination)
    FileUtils.cp(origin, destination)
    fixes << "#{filename(origin)} copied to #{filename(destination)}"
  end

  def move_file(origin, destination)
    FileUtils.mv(origin, destination)
    fixes << "#{filename(origin)} moved to #{filename(destination)}"
  end

  def delete_file(origin)
    FileUtils.rm(origin)
  end

  def write_file(name, content)
    File.open(name, 'w') do |f|
      f.write(content)
    end
    fixes << "#{filename(name)} written"
  end

  # Returns the default expected metadata filename
  # image_file: 20210529_155539.jpg
  # return: 20210529_155539.jpg.supplemental-metadata.json
  def metadata_file_for(image_file)
    "#{image_file}.#{METADATA_JSON}"
  end

  # Try detect the timestamp from file name pattern
  def infer_time_from_image_file(image_file)
    # for 20210529_155539 patterns
    filename = filename_without_ext(image_file)
    tokens = filename.scan(/(\d{4})(\d{2})(\d{2})\_(\d{2})(\d{2})(\d{2})/).flatten
    if tokens.compact == 6
      return Time.new(*tokens)
    end

    # for CameraZOOM-20131224200623261 patterns
    # for CameraZOOM-2013 12 24 20 06 23 261 patterns
    tokens = filename.scan(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\d{3})/).flatten
    if tokens.compact == 7
      return Time.new(*tokens)
    end

    # for DJI_20250308180700_0070_D patterns
    tokens = filename.scan(/\_(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\_/).flatten
    if tokens.compact == 6
      return Time.new(*tokens)
    end

    # for Photos from 2024/P01020304.jpg or 2024/IMG_123123.jpg pattern
    tokens = image_file.scan(/Photos\ from\ (\d{4})\//).flatten
    if tokens.compact == 1
      return Time.new(*tokens)
    end

    return nil
  end

  # Fallback to generate a metadata filename based on filename pattern
  # image file: 20210529_155539.jpg
  # generated metadata: 20210529_155539.jpg.supplemental-metadata.json
  # time on metadata: 2021-05-29 15:55:39
  def generate_metadata_for_image_file(image_file)
    metadata_filename = metadata_file_for(image_file)
    return if File.exist?(metadata_filename)

    filename = filename_without_ext(image_file)
    if time = infer_time_from_image_file(image_file)
      json_content = {
        "title" => filename(image_file),
        "description": "Metadata inferred from #{filename}",
        "imageViews": "1",
        "creationTime": {
          "timestamp": time.to_i.to_s,
          "formatted": time.to_s
        },
        "photoTakenTime": {
          "timestamp": time.to_i.to_s,
          "formatted": time.to_s
        }
      }
      write_file(metadata_filename, content.to_json)
    else
      errors << "Unable to infer metadata for #{image_file}"
    end
  end

  # normalize truncated json metadata filenames
  # original: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.suppl.json
  # fixed: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.supplemental-metadata.json
  def fix_divergent_metadata_filename(json_file)
    unless json_file.end_with?(METADATA_JSON)
      meta_ext, meta_filename, img_ext, img_file, others = json_file.split('.').reverse
      fixed_json_file = json_file.gsub("#{meta_filename}.#{meta_ext}", METADATA_JSON)
      move_file(json_file, fixed_json_file)
      json_file = fixed_json_file
    end

    json_file
  end

  # for cases like:
  # 20210529_155539.jpg
  # 20210529_155539(1).jpg
  # 20210529_155539-editada.jpg
  # 20210529_155539.jpg.supplemental-metadata.json
  # 20210529_155539.jpg.supplemental-metadata(1).json
  def fix_metadata_file_for_image(image_file)
    # Create a metadata json for image "-editada" version
    # image file: 20210529_155539-editada.jpg
    # metadata file: 20210529_155539-editada.jpg.supplemental-metadata.json
    if image_file.index("-editada")
      original_file = image_file.gsub("-editada", "")
      original_meta = "#{original_file}.#{METADATA_JSON}"

      if File.exist?(original_meta)
        edited_meta = "#{image_file}.#{METADATA_JSON}"
        copy_file(original_meta, edited_meta)
      end
    end

    # fix metadata filenames for sequencial images filenames
    # image file: 20210529_155539(1).jpg
    # wrong metadata: 20210529_155539.jpg.supplemental-metadata(1).json
    # fixed metadata: 20210529_155539(1).jpg.supplemental-metadata.json
    matched = filename_without_ext(image_file).match(/(?<num>\(\d+\)$)/)
    if matched
      num = matched[:num]
      filename_without_num = filename(image_file).gsub(num, "")
      dir = File.dirname(image_file)

      wrong_json_file = File.join(dir, "#{filename_without_num}.supplemental-metadata#{num}.json")
      fixed_json_file = File.join(dir, "#{filename(image_file)}.#{METADATA_JSON}")
      if File.exist?(wrong_json_file)
        if File.exist?(fixed_json_file)
          errors << "Metadata file already exist: #{fixed_json_file}"
        else
          move_file(wrong_json_file, fixed_json_file)
        end
      else
        errors << "Metadata file: #{wrong_json_file} not exist for image: #{image_file}"
      end
    end

    image_file
  end

  def execute
    reset!

    all_files = Dir.glob(File.join(takeout_dir, "/**/*"))
    puts "Total files found on #{takeout_dir}: #{all_files.size}"

    years_files = all_files.select { |f| File.dirname(f).match?(/Photos\ from\ (\d+)$/) }
    puts "Total photos from YYYY dirs found: #{years_files.size}"

    image_files = years_files.select { |f| SUPPORTED_IMAGE_EXT.include?(File.extname(f).downcase) }
    puts "Total supported photos formats found: #{image_files.size}"

    json_files = years_files.select { |f| File.extname(f).downcase == '.json' }
    puts "Total metadata files found: #{json_files.size}"

    json_files = json_files.map do |json_file|
      fix_divergent_metadata_filename(json_file)
    end

    image_files = image_files.map do |image_file|
      fixed_metadata = fix_metadata_file_for_image(image_file)
      generate_metadata_for_image_file(image_file)
      fixed_metadata
    end

    if errors.size > 0
      puts "\nProcess finalized with #{errors.size} errors:"
      errors.each_with_index do |error, index|
        puts "[#{index+1}/#{errors.size}] #{error}"
      end
    end

    if fixes.size > 0
      puts "\nProcess finalized with #{fixes.size} fixes:"
      fixes.each_with_index do |fix, index|
        puts "[#{index+1}/#{fixes.size}] #{fix}"
      end
    end

    not_found = image_files.select do |img|
      !File.exist?(metadata_file_for(img))
    end

    if not_found.size > 0
      puts "\nMetadata not found for #{not_found.size} files:"
      not_found.each_with_index do |file, index|
        puts "[#{index+1}/#{not_found.size}] #{file}"
      end
    end
  end
 end

 takeout_dir = ARGV[0] || raise("Usage: ruby fix_metadata.rb path/to/takeout/dir/")
 fixer = GooglePhotosFixer.new(takeout_dir)
 fixer.execute
	require 'FileUtils'
	require 'json'

	class GooglePhotosFixer
	METADATA_JSON = "supplemental-metadata.json"
	SUPPORTED_IMAGE_EXT = %w(.jpg .jpeg .png .gif .webp .heic .mov .mp4 .3gp .avi .mkv .webm)

	attr_reader :fixes, :errors, :takeout_dir

	def initialize(takeout_dir)
	@takeout_dir = takeout_dir
	reset!
	end

	def reset!
	@fixes = []
	@errors = []
	end

	def filename(fullpath_filename)
	File.basename(fullpath_filename)
	end

	def filename_without_ext(filename)
	File.basename(filename).gsub(File.extname(filename), '')
	end

	def copy_file(origin, destination)
	FileUtils.cp(origin, destination)
	fixes << "#{filename(origin)} copied to #{filename(destination)}"
	end

	def move_file(origin, destination)
	FileUtils.mv(origin, destination)
	fixes << "#{filename(origin)} moved to #{filename(destination)}"
	end

	def delete_file(origin)
	FileUtils.rm(origin)
	end

	def write_file(name, content)
	File.open(name, 'w') do \|f\|
	f.write(content)
	end
	fixes << "#{filename(name)} written"
	end

	# Returns the default expected metadata filename
	# image_file: 20210529_155539.jpg
	# return: 20210529_155539.jpg.supplemental-metadata.json
	def metadata_file_for(image_file)
	"#{image_file}.#{METADATA_JSON}"
	end

	# Try detect the timestamp from file name pattern
	def infer_time_from_image_file(image_file)
	# for 20210529_155539 patterns
	filename = filename_without_ext(image_file)
	tokens = filename.scan(/(\d{4})(\d{2})(\d{2})\_(\d{2})(\d{2})(\d{2})/).flatten
	if tokens.compact == 6
	return Time.new(*tokens)
	end

	# for CameraZOOM-20131224200623261 patterns
	# for CameraZOOM-2013 12 24 20 06 23 261 patterns
	tokens = filename.scan(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\d{3})/).flatten
	if tokens.compact == 7
	return Time.new(*tokens)
	end

	# for DJI_20250308180700_0070_D patterns
	tokens = filename.scan(/\_(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\_/).flatten
	if tokens.compact == 6
	return Time.new(*tokens)
	end

	# for Photos from 2024/P01020304.jpg or 2024/IMG_123123.jpg pattern
	tokens = image_file.scan(/Photos\ from\ (\d{4})\//).flatten
	if tokens.compact == 1
	return Time.new(*tokens)
	end

	return nil
	end

	# Fallback to generate a metadata filename based on filename pattern
	# image file: 20210529_155539.jpg
	# generated metadata: 20210529_155539.jpg.supplemental-metadata.json
	# time on metadata: 2021-05-29 15:55:39
	def generate_metadata_for_image_file(image_file)
	metadata_filename = metadata_file_for(image_file)
	return if File.exist?(metadata_filename)

	filename = filename_without_ext(image_file)
	if time = infer_time_from_image_file(image_file)
	json_content = {
	"title" => filename(image_file),
	"description": "Metadata inferred from #{filename}",
	"imageViews": "1",
	"creationTime": {
	"timestamp": time.to_i.to_s,
	"formatted": time.to_s
	},
	"photoTakenTime": {
	"timestamp": time.to_i.to_s,
	"formatted": time.to_s
	}
	}
	write_file(metadata_filename, content.to_json)
	else
	errors << "Unable to infer metadata for #{image_file}"
	end
	end

	# normalize truncated json metadata filenames
	# original: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.suppl.json
	# fixed: e471949f-d0b7-4f22-be33-225f556a92a4.jpg.supplemental-metadata.json
	def fix_divergent_metadata_filename(json_file)
	unless json_file.end_with?(METADATA_JSON)
	meta_ext, meta_filename, img_ext, img_file, others = json_file.split('.').reverse
	fixed_json_file = json_file.gsub("#{meta_filename}.#{meta_ext}", METADATA_JSON)
	move_file(json_file, fixed_json_file)
	json_file = fixed_json_file
	end

	json_file
	end

	# for cases like:
	# 20210529_155539.jpg
	# 20210529_155539(1).jpg
	# 20210529_155539-editada.jpg
	# 20210529_155539.jpg.supplemental-metadata.json
	# 20210529_155539.jpg.supplemental-metadata(1).json
	def fix_metadata_file_for_image(image_file)
	# Create a metadata json for image "-editada" version
	# image file: 20210529_155539-editada.jpg
	# metadata file: 20210529_155539-editada.jpg.supplemental-metadata.json
	if image_file.index("-editada")
	original_file = image_file.gsub("-editada", "")
	original_meta = "#{original_file}.#{METADATA_JSON}"

	if File.exist?(original_meta)
	edited_meta = "#{image_file}.#{METADATA_JSON}"
	copy_file(original_meta, edited_meta)
	end
	end

	# fix metadata filenames for sequencial images filenames
	# image file: 20210529_155539(1).jpg
	# wrong metadata: 20210529_155539.jpg.supplemental-metadata(1).json
	# fixed metadata: 20210529_155539(1).jpg.supplemental-metadata.json
	matched = filename_without_ext(image_file).match(/(?<num>\(\d+\)$)/)
	if matched
	num = matched[:num]
	filename_without_num = filename(image_file).gsub(num, "")
	dir = File.dirname(image_file)

	wrong_json_file = File.join(dir, "#{filename_without_num}.supplemental-metadata#{num}.json")
	fixed_json_file = File.join(dir, "#{filename(image_file)}.#{METADATA_JSON}")
	if File.exist?(wrong_json_file)
	if File.exist?(fixed_json_file)
	errors << "Metadata file already exist: #{fixed_json_file}"
	else
	move_file(wrong_json_file, fixed_json_file)
	end
	else
	errors << "Metadata file: #{wrong_json_file} not exist for image: #{image_file}"
	end
	end

	image_file
	end

	def execute
	reset!

	all_files = Dir.glob(File.join(takeout_dir, "/*/"))
	puts "Total files found on #{takeout_dir}: #{all_files.size}"

	years_files = all_files.select { \|f\| File.dirname(f).match?(/Photos\ from\ (\d+)$/) }
	puts "Total photos from YYYY dirs found: #{years_files.size}"

	image_files = years_files.select { \|f\| SUPPORTED_IMAGE_EXT.include?(File.extname(f).downcase) }
	puts "Total supported photos formats found: #{image_files.size}"

	json_files = years_files.select { \|f\| File.extname(f).downcase == '.json' }
	puts "Total metadata files found: #{json_files.size}"

	json_files = json_files.map do \|json_file\|
	fix_divergent_metadata_filename(json_file)
	end

	image_files = image_files.map do \|image_file\|
	fixed_metadata = fix_metadata_file_for_image(image_file)
	generate_metadata_for_image_file(image_file)
	fixed_metadata
	end

	if errors.size > 0
	puts "\nProcess finalized with #{errors.size} errors:"
	errors.each_with_index do \|error, index\|
	puts "[#{index+1}/#{errors.size}] #{error}"
	end
	end

	if fixes.size > 0
	puts "\nProcess finalized with #{fixes.size} fixes:"
	fixes.each_with_index do \|fix, index\|
	puts "[#{index+1}/#{fixes.size}] #{fix}"
	end
	end

	not_found = image_files.select do \|img\|
	!File.exist?(metadata_file_for(img))
	end

	if not_found.size > 0
	puts "\nMetadata not found for #{not_found.size} files:"
	not_found.each_with_index do \|file, index\|
	puts "[#{index+1}/#{not_found.size}] #{file}"
	end
	end
	end
	end

	takeout_dir = ARGV[0] \|\| raise("Usage: ruby fix_metadata.rb path/to/takeout/dir/")
	fixer = GooglePhotosFixer.new(takeout_dir)
	fixer.execute