Created
June 14, 2025 12:50
-
-
Save shakir915/7c2d06e43afddea1a720daa7e884d286 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import zipfile | |
import sqlite3 | |
import tempfile | |
from pathlib import Path | |
import sys | |
def is_valid_sqlite(file_path): | |
"""Check if a file is a valid SQLite database""" | |
try: | |
# Test 1: Try to connect to the database | |
conn = sqlite3.connect(file_path) | |
cursor = conn.cursor() | |
# Test 2: Check if it has the SQLite file signature | |
# SQLite files start with "SQLite format 3\000" | |
with open(file_path, 'rb') as f: | |
header = f.read(16) | |
if not header.startswith(b'SQLite format 3'): | |
conn.close() | |
return False | |
# Test 3: Try to read the schema (tables structure) | |
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") | |
tables = cursor.fetchall() | |
# Test 4: Try to get database info | |
cursor.execute("PRAGMA integrity_check;") | |
integrity_result = cursor.fetchone() | |
# Test 5: Check if database is not corrupted | |
if integrity_result and integrity_result[0] != 'ok': | |
conn.close() | |
return False | |
conn.close() | |
return True | |
except sqlite3.DatabaseError: | |
return False | |
except Exception: | |
return False | |
def validate_zip_file(zip_path): | |
"""Validate a single zip file and its contents""" | |
zip_name = Path(zip_path).stem # Get filename without extension | |
results = { | |
'zip_path': zip_path, | |
'zip_name': zip_name, | |
'is_valid_zip': False, | |
'contains_sqlite': False, | |
'sqlite_name_matches': False, | |
'is_valid_sqlite': False, | |
'errors': [] | |
} | |
try: | |
# Check if zip file is valid | |
with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
# Test the zip file integrity | |
zip_ref.testzip() | |
results['is_valid_zip'] = True | |
# Get list of files in zip, excluding macOS and system files | |
all_files = zip_ref.namelist() | |
file_list = [f for f in all_files if not should_exclude_file(os.path.basename(f)) and not f.startswith('__MACOSX/')] | |
# Look for SQLite file with matching name | |
# Handle cases where zip name already includes .sqlite extension | |
if zip_name.endswith('.sqlite'): | |
expected_sqlite_name = zip_name # zip name already has .sqlite | |
base_name = zip_name[:-7] # Remove .sqlite extension for comparison | |
else: | |
expected_sqlite_name = f"{zip_name}.sqlite" | |
base_name = zip_name | |
# Check if the expected SQLite file exists | |
if expected_sqlite_name in file_list: | |
results['contains_sqlite'] = True | |
results['sqlite_name_matches'] = True | |
# Extract SQLite file to temporary location and validate | |
with tempfile.TemporaryDirectory() as temp_dir: | |
sqlite_path = zip_ref.extract(expected_sqlite_name, temp_dir) | |
if is_valid_sqlite(sqlite_path): | |
results['is_valid_sqlite'] = True | |
else: | |
results['errors'].append(f"SQLite file {expected_sqlite_name} is not a valid database") | |
else: | |
# Check if there's any .sqlite file (excluding system files) | |
sqlite_files = [f for f in file_list if f.endswith('.sqlite')] | |
if sqlite_files: | |
results['contains_sqlite'] = True | |
# Check if any sqlite file matches the zip name pattern | |
matching_sqlite = None | |
for sqlite_file in sqlite_files: | |
sqlite_basename = os.path.splitext(sqlite_file)[0] # Remove .sqlite extension | |
# Check both patterns: exact match or base name match | |
if sqlite_basename == base_name or sqlite_file == expected_sqlite_name: | |
matching_sqlite = sqlite_file | |
results['sqlite_name_matches'] = True | |
break | |
if matching_sqlite: | |
# Extract and validate the matching SQLite file | |
with tempfile.TemporaryDirectory() as temp_dir: | |
sqlite_path = zip_ref.extract(matching_sqlite, temp_dir) | |
if is_valid_sqlite(sqlite_path): | |
results['is_valid_sqlite'] = True | |
else: | |
results['errors'].append(f"SQLite file {matching_sqlite} is not a valid database") | |
else: | |
results['errors'].append(f"Found SQLite file(s) {sqlite_files} but expected {expected_sqlite_name}") | |
else: | |
results['errors'].append("No SQLite file found in zip") | |
except zipfile.BadZipFile: | |
results['errors'].append("Invalid or corrupted zip file") | |
except Exception as e: | |
results['errors'].append(f"Error processing zip: {str(e)}") | |
return results | |
def should_exclude_file(file_name): | |
"""Check if a file should be excluded (hidden files, OS files, etc.)""" | |
# Hidden files (starting with .) | |
if file_name.startswith('.'): | |
return True | |
# Common OS and system files to exclude | |
excluded_files = { | |
'Thumbs.db', # Windows thumbnail cache | |
'desktop.ini', # Windows folder settings | |
'.DS_Store', # macOS folder settings | |
'__MACOSX', # macOS archive metadata | |
'System Volume Information', # Windows system folder | |
'$RECYCLE.BIN', # Windows recycle bin | |
'pagefile.sys', # Windows page file | |
'hiberfil.sys', # Windows hibernation file | |
'swapfile.sys', # Windows swap file | |
} | |
# Case-insensitive check for excluded files | |
if file_name in excluded_files or file_name.lower() in [f.lower() for f in excluded_files]: | |
return True | |
# Temporary files | |
if file_name.startswith('~') or file_name.endswith('.tmp') or file_name.endswith('.temp'): | |
return True | |
# macOS resource forks and metadata | |
if file_name.startswith('._'): | |
return True | |
return False | |
def validate_directory(directory_path): | |
"""Validate all zip files in a directory""" | |
if not os.path.exists(directory_path): | |
print(f"Error: Directory '{directory_path}' does not exist!") | |
return | |
if not os.path.isdir(directory_path): | |
print(f"Error: '{directory_path}' is not a directory!") | |
return | |
# Find all zip files, excluding hidden and OS files | |
zip_files = [] | |
excluded_count = 0 | |
for file_name in os.listdir(directory_path): | |
# Skip hidden files and OS files | |
if should_exclude_file(file_name): | |
excluded_count += 1 | |
continue | |
if file_name.lower().endswith('.zip'): | |
zip_files.append(os.path.join(directory_path, file_name)) | |
if not zip_files: | |
print(f"No zip files found in '{directory_path}'") | |
if excluded_count > 0: | |
print(f"(Excluded {excluded_count} hidden/system file(s))") | |
return | |
print(f"Found {len(zip_files)} zip file(s) to validate...") | |
if excluded_count > 0: | |
print(f"(Excluded {excluded_count} hidden/system file(s))") | |
print("=" * 80) | |
valid_count = 0 | |
invalid_count = 0 | |
invalid_files = [] | |
# Scan all files with progress indicator | |
for i, zip_path in enumerate(sorted(zip_files), 1): | |
# Update progress on same line | |
print(f"\rπ Processing: {i}/{len(zip_files)} files...", end='', flush=True) | |
result = validate_zip_file(zip_path) | |
# Check if file is completely valid | |
is_completely_valid = (result['is_valid_zip'] and | |
result['contains_sqlite'] and | |
result['sqlite_name_matches'] and | |
result['is_valid_sqlite']) | |
if is_completely_valid: | |
valid_count += 1 | |
else: | |
invalid_count += 1 | |
invalid_files.append(result) | |
# Clear progress line and show results | |
print(f"\rπ Completed: {len(zip_files)} files processed" + " " * 20) | |
# Show invalid files if any | |
if invalid_files: | |
print("\n" + "=" * 40) | |
for result in invalid_files: | |
print(f"β INVALID | {result['zip_name']}.zip") | |
if not result['is_valid_zip']: | |
print(f" β Invalid zip file") | |
if not result['contains_sqlite']: | |
print(f" β No SQLite file found") | |
if not result['sqlite_name_matches']: | |
print(f" β SQLite filename doesn't match") | |
if not result['is_valid_sqlite']: | |
print(f" β Invalid SQLite database") | |
if result['errors']: | |
for error in result['errors']: | |
print(f" ! {error}") | |
print("-" * 40) | |
# Final summary | |
if invalid_count == 0: | |
print(f"π ALL {len(zip_files)} FILES ARE VALID!") | |
else: | |
print(f"\nπ SUMMARY:") | |
print(f"β Valid: {valid_count}") | |
print(f"β Invalid: {invalid_count}") | |
print(f"π Total: {len(zip_files)}") | |
return valid_count | |
def main(): | |
"""Main function""" | |
print("ZIP and SQLite Validator") | |
print("=" * 40) | |
directory_path = None | |
# Method 1: Command line argument | |
if len(sys.argv) > 1: | |
directory_path = sys.argv[1] | |
print(f"Using command line path: {directory_path}") | |
# Method 2: Interactive input with drag & drop support | |
if not directory_path: | |
print("\nπ‘ TIP: You can drag and drop a folder here, then press Enter!") | |
print("Or type/paste the folder path manually.") | |
directory_path = input("\nEnter directory path: ").strip() | |
# Handle quoted paths (common with drag & drop) | |
# Remove outer quotes - handle multiple quote layers | |
directory_path = directory_path.strip() | |
while ((directory_path.startswith('"') and directory_path.endswith('"')) or | |
(directory_path.startswith("'") and directory_path.endswith("'"))): | |
directory_path = directory_path[1:-1].strip() | |
# Handle escaped spaces | |
directory_path = directory_path.replace('\\ ', ' ') | |
if not directory_path: | |
print("β No directory specified!") | |
input("Press Enter to exit...") | |
return | |
print(f"\nπ Validating directory: {directory_path}") | |
validate_directory(directory_path) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
ZIP SQLite Validator
A Python script to validate directories containing ZIP files with SQLite databases. Ensures proper naming conventions and verifies SQLite file integrity.
File Name
Save the script as:
zip_sqlite_validator.py
Features
β Batch Validation - Processes entire directories of ZIP files
β Naming Convention Check - Verifies ZIP and SQLite file names match
β SQLite Integrity Check - Validates database file format and structure
β Progress Indicator - Real-time processing counter
β Clean Output - Only shows problems, silent success
β Auto-Exclude - Ignores system files (.DS_Store, Thumbs.db, etc.)
β Flexible Naming - Supports both
name.zip
andname.sqlite.zip
patternsRequirements
Usage
Method 1: Command Line
Method 2: Interactive Mode
Then enter or drag & drop your directory path when prompted.
Method 3: VS Code
Supported File Patterns
The script validates two naming patterns:
Pattern 1: Standard naming
filename.zip
should containfilename.sqlite
database1.zip
βdatabase1.sqlite
Pattern 2: Extended naming
filename.sqlite.zip
should containfilename.sqlite
database1.sqlite.zip
βdatabase1.sqlite
Validation Checks
Each ZIP file undergoes these checks:
Sample Output
All Files Valid
Files with Issues
Error Types
Excluded Files
The script automatically ignores these system files:
.DS_Store
(macOS)Thumbs.db
(Windows)desktop.ini
(Windows)__MACOSX/
folders.
)~
,.tmp
,.temp
)Performance
Troubleshooting
Common Issues
"Directory does not exist"
"Permission denied"
Script hangs on large files
Getting Help
python --version
License
This script is provided as-is for educational and utility purposes.