Skip to content

Instantly share code, notes, and snippets.

@shakir915
Created June 14, 2025 12:50
Show Gist options
  • Save shakir915/7c2d06e43afddea1a720daa7e884d286 to your computer and use it in GitHub Desktop.
Save shakir915/7c2d06e43afddea1a720daa7e884d286 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import os
import zipfile
import sqlite3
import tempfile
from pathlib import Path
import sys
def is_valid_sqlite(file_path):
"""Check if a file is a valid SQLite database"""
try:
# Test 1: Try to connect to the database
conn = sqlite3.connect(file_path)
cursor = conn.cursor()
# Test 2: Check if it has the SQLite file signature
# SQLite files start with "SQLite format 3\000"
with open(file_path, 'rb') as f:
header = f.read(16)
if not header.startswith(b'SQLite format 3'):
conn.close()
return False
# Test 3: Try to read the schema (tables structure)
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
# Test 4: Try to get database info
cursor.execute("PRAGMA integrity_check;")
integrity_result = cursor.fetchone()
# Test 5: Check if database is not corrupted
if integrity_result and integrity_result[0] != 'ok':
conn.close()
return False
conn.close()
return True
except sqlite3.DatabaseError:
return False
except Exception:
return False
def validate_zip_file(zip_path):
"""Validate a single zip file and its contents"""
zip_name = Path(zip_path).stem # Get filename without extension
results = {
'zip_path': zip_path,
'zip_name': zip_name,
'is_valid_zip': False,
'contains_sqlite': False,
'sqlite_name_matches': False,
'is_valid_sqlite': False,
'errors': []
}
try:
# Check if zip file is valid
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
# Test the zip file integrity
zip_ref.testzip()
results['is_valid_zip'] = True
# Get list of files in zip, excluding macOS and system files
all_files = zip_ref.namelist()
file_list = [f for f in all_files if not should_exclude_file(os.path.basename(f)) and not f.startswith('__MACOSX/')]
# Look for SQLite file with matching name
# Handle cases where zip name already includes .sqlite extension
if zip_name.endswith('.sqlite'):
expected_sqlite_name = zip_name # zip name already has .sqlite
base_name = zip_name[:-7] # Remove .sqlite extension for comparison
else:
expected_sqlite_name = f"{zip_name}.sqlite"
base_name = zip_name
# Check if the expected SQLite file exists
if expected_sqlite_name in file_list:
results['contains_sqlite'] = True
results['sqlite_name_matches'] = True
# Extract SQLite file to temporary location and validate
with tempfile.TemporaryDirectory() as temp_dir:
sqlite_path = zip_ref.extract(expected_sqlite_name, temp_dir)
if is_valid_sqlite(sqlite_path):
results['is_valid_sqlite'] = True
else:
results['errors'].append(f"SQLite file {expected_sqlite_name} is not a valid database")
else:
# Check if there's any .sqlite file (excluding system files)
sqlite_files = [f for f in file_list if f.endswith('.sqlite')]
if sqlite_files:
results['contains_sqlite'] = True
# Check if any sqlite file matches the zip name pattern
matching_sqlite = None
for sqlite_file in sqlite_files:
sqlite_basename = os.path.splitext(sqlite_file)[0] # Remove .sqlite extension
# Check both patterns: exact match or base name match
if sqlite_basename == base_name or sqlite_file == expected_sqlite_name:
matching_sqlite = sqlite_file
results['sqlite_name_matches'] = True
break
if matching_sqlite:
# Extract and validate the matching SQLite file
with tempfile.TemporaryDirectory() as temp_dir:
sqlite_path = zip_ref.extract(matching_sqlite, temp_dir)
if is_valid_sqlite(sqlite_path):
results['is_valid_sqlite'] = True
else:
results['errors'].append(f"SQLite file {matching_sqlite} is not a valid database")
else:
results['errors'].append(f"Found SQLite file(s) {sqlite_files} but expected {expected_sqlite_name}")
else:
results['errors'].append("No SQLite file found in zip")
except zipfile.BadZipFile:
results['errors'].append("Invalid or corrupted zip file")
except Exception as e:
results['errors'].append(f"Error processing zip: {str(e)}")
return results
def should_exclude_file(file_name):
"""Check if a file should be excluded (hidden files, OS files, etc.)"""
# Hidden files (starting with .)
if file_name.startswith('.'):
return True
# Common OS and system files to exclude
excluded_files = {
'Thumbs.db', # Windows thumbnail cache
'desktop.ini', # Windows folder settings
'.DS_Store', # macOS folder settings
'__MACOSX', # macOS archive metadata
'System Volume Information', # Windows system folder
'$RECYCLE.BIN', # Windows recycle bin
'pagefile.sys', # Windows page file
'hiberfil.sys', # Windows hibernation file
'swapfile.sys', # Windows swap file
}
# Case-insensitive check for excluded files
if file_name in excluded_files or file_name.lower() in [f.lower() for f in excluded_files]:
return True
# Temporary files
if file_name.startswith('~') or file_name.endswith('.tmp') or file_name.endswith('.temp'):
return True
# macOS resource forks and metadata
if file_name.startswith('._'):
return True
return False
def validate_directory(directory_path):
"""Validate all zip files in a directory"""
if not os.path.exists(directory_path):
print(f"Error: Directory '{directory_path}' does not exist!")
return
if not os.path.isdir(directory_path):
print(f"Error: '{directory_path}' is not a directory!")
return
# Find all zip files, excluding hidden and OS files
zip_files = []
excluded_count = 0
for file_name in os.listdir(directory_path):
# Skip hidden files and OS files
if should_exclude_file(file_name):
excluded_count += 1
continue
if file_name.lower().endswith('.zip'):
zip_files.append(os.path.join(directory_path, file_name))
if not zip_files:
print(f"No zip files found in '{directory_path}'")
if excluded_count > 0:
print(f"(Excluded {excluded_count} hidden/system file(s))")
return
print(f"Found {len(zip_files)} zip file(s) to validate...")
if excluded_count > 0:
print(f"(Excluded {excluded_count} hidden/system file(s))")
print("=" * 80)
valid_count = 0
invalid_count = 0
invalid_files = []
# Scan all files with progress indicator
for i, zip_path in enumerate(sorted(zip_files), 1):
# Update progress on same line
print(f"\rπŸ” Processing: {i}/{len(zip_files)} files...", end='', flush=True)
result = validate_zip_file(zip_path)
# Check if file is completely valid
is_completely_valid = (result['is_valid_zip'] and
result['contains_sqlite'] and
result['sqlite_name_matches'] and
result['is_valid_sqlite'])
if is_completely_valid:
valid_count += 1
else:
invalid_count += 1
invalid_files.append(result)
# Clear progress line and show results
print(f"\rπŸ” Completed: {len(zip_files)} files processed" + " " * 20)
# Show invalid files if any
if invalid_files:
print("\n" + "=" * 40)
for result in invalid_files:
print(f"❌ INVALID | {result['zip_name']}.zip")
if not result['is_valid_zip']:
print(f" βœ— Invalid zip file")
if not result['contains_sqlite']:
print(f" βœ— No SQLite file found")
if not result['sqlite_name_matches']:
print(f" βœ— SQLite filename doesn't match")
if not result['is_valid_sqlite']:
print(f" βœ— Invalid SQLite database")
if result['errors']:
for error in result['errors']:
print(f" ! {error}")
print("-" * 40)
# Final summary
if invalid_count == 0:
print(f"πŸŽ‰ ALL {len(zip_files)} FILES ARE VALID!")
else:
print(f"\nπŸ“Š SUMMARY:")
print(f"βœ… Valid: {valid_count}")
print(f"❌ Invalid: {invalid_count}")
print(f"πŸ“ Total: {len(zip_files)}")
return valid_count
def main():
"""Main function"""
print("ZIP and SQLite Validator")
print("=" * 40)
directory_path = None
# Method 1: Command line argument
if len(sys.argv) > 1:
directory_path = sys.argv[1]
print(f"Using command line path: {directory_path}")
# Method 2: Interactive input with drag & drop support
if not directory_path:
print("\nπŸ’‘ TIP: You can drag and drop a folder here, then press Enter!")
print("Or type/paste the folder path manually.")
directory_path = input("\nEnter directory path: ").strip()
# Handle quoted paths (common with drag & drop)
# Remove outer quotes - handle multiple quote layers
directory_path = directory_path.strip()
while ((directory_path.startswith('"') and directory_path.endswith('"')) or
(directory_path.startswith("'") and directory_path.endswith("'"))):
directory_path = directory_path[1:-1].strip()
# Handle escaped spaces
directory_path = directory_path.replace('\\ ', ' ')
if not directory_path:
print("❌ No directory specified!")
input("Press Enter to exit...")
return
print(f"\nπŸ” Validating directory: {directory_path}")
validate_directory(directory_path)
if __name__ == "__main__":
main()
@shakir915
Copy link
Author

ZIP SQLite Validator

A Python script to validate directories containing ZIP files with SQLite databases. Ensures proper naming conventions and verifies SQLite file integrity.

File Name

Save the script as: zip_sqlite_validator.py

Features

βœ… Batch Validation - Processes entire directories of ZIP files
βœ… Naming Convention Check - Verifies ZIP and SQLite file names match
βœ… SQLite Integrity Check - Validates database file format and structure
βœ… Progress Indicator - Real-time processing counter
βœ… Clean Output - Only shows problems, silent success
βœ… Auto-Exclude - Ignores system files (.DS_Store, Thumbs.db, etc.)
βœ… Flexible Naming - Supports both name.zip and name.sqlite.zip patterns

Requirements

  • Python 3.6 or higher
  • No additional packages required (uses standard library only)

Usage

Method 1: Command Line

python zip_sqlite_validator.py "/path/to/zip/directory"

Method 2: Interactive Mode

python zip_sqlite_validator.py

Then enter or drag & drop your directory path when prompted.

Method 3: VS Code

  1. Open the script in VS Code
  2. Press F5 or click the ▢️ Run button
  3. Enter directory path when prompted

Supported File Patterns

The script validates two naming patterns:

Pattern 1: Standard naming

  • filename.zip should contain filename.sqlite
  • Example: database1.zip β†’ database1.sqlite

Pattern 2: Extended naming

  • filename.sqlite.zip should contain filename.sqlite
  • Example: database1.sqlite.zip β†’ database1.sqlite

Validation Checks

Each ZIP file undergoes these checks:

  1. ZIP Integrity - File is a valid, non-corrupted ZIP archive
  2. Content Check - Contains exactly one SQLite file with matching name
  3. SQLite Header - File starts with valid SQLite signature
  4. Database Connection - SQLite file can be opened and queried
  5. Integrity Verification - Database passes internal consistency checks

Sample Output

All Files Valid

Found 150 zip file(s) to validate...
================================================================================
πŸ” Completed: 150 files processed                    
πŸŽ‰ ALL 150 FILES ARE VALID!

Files with Issues

Found 150 zip file(s) to validate...
================================================================================
πŸ” Completed: 150 files processed                    

========================================
❌ INVALID | broken_file.zip
βœ— Invalid zip file
! Invalid or corrupted zip file

❌ INVALID | wrong_name.zip
βœ— SQLite filename doesn't match
! Found SQLite file(s) ['other.sqlite'] but expected wrong_name.sqlite

πŸ“Š SUMMARY:
βœ… Valid: 148
❌ Invalid: 2
πŸ“ Total: 150

Error Types

Error Description Solution
Invalid zip file Corrupted or fake ZIP Re-download/recreate the ZIP file
No SQLite file found ZIP is empty or contains wrong files Add the correct SQLite file
SQLite filename doesn't match Wrong file name inside ZIP Rename the SQLite file to match ZIP name
Invalid SQLite database Corrupted or fake SQLite file Repair or recreate the database

Excluded Files

The script automatically ignores these system files:

  • .DS_Store (macOS)
  • Thumbs.db (Windows)
  • desktop.ini (Windows)
  • __MACOSX/ folders
  • Hidden files (starting with .)
  • Temporary files (~, .tmp, .temp)

Performance

  • Speed: ~100-500 files per minute (depends on file sizes)
  • Memory: Low memory usage (processes one file at a time)
  • Progress: Real-time counter shows current progress

Troubleshooting

Common Issues

"Directory does not exist"

  • Check the path is correct
  • Remove extra quotes if copy-pasting

"Permission denied"

  • Run with appropriate permissions
  • Check file/folder access rights

Script hangs on large files

  • Large SQLite files take longer to validate
  • Progress counter shows it's still working

Getting Help

  1. Verify Python version: python --version
  2. Test with a small directory first
  3. Check file permissions on the target directory

License

This script is provided as-is for educational and utility purposes.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment