Created
November 1, 2023 02:10
-
-
Save DerfJagged/e624330c09050fcd6f6f1368aa0e289b to your computer and use it in GitHub Desktop.
MediaWiki Delete Duplicate Files Script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import requests | |
import json | |
import mwclient | |
################### CONFIGURE ME ################### | |
site = mwclient.Site('YOUR_WEBSITE.com', path='/wiki/') | |
site.login(username='USERNAME_HERE', password='PASSWORD_HERE') | |
URL = "https://YOUR_WEBSITE.com/wiki/api.php" | |
#################################################### | |
S = requests.Session() | |
PARAMS = { | |
"action": "query", | |
"list": "querypage", | |
"qppage": "ListDuplicatedFiles", | |
"qplimit": "100", #500 max | |
"format": "json" | |
} | |
R = S.get(url=URL, params=PARAMS) | |
pages_with_duplicates_json = R.json() | |
title_list = "" | |
for result in pages_with_duplicates_json['query']['querypage']['results']: | |
title = result['title'] | |
### Remove the following two lines to proceed without asking | |
print(f"Replace {title}? [Y/n]") | |
if (input("") == ''): | |
title_list += result['title'] + "|" | |
title_list = title_list[:-1] | |
if (title_list == ""): | |
print("No pages with duplicates found.") | |
quit() | |
print("\n" + title_list + "\n") | |
PARAMS = { | |
"action": "query", | |
"titles": title_list, | |
"prop": "duplicatefiles", | |
"format": "json" | |
} | |
R = S.get(url=URL, params=PARAMS) | |
data = R.json() | |
# Uncomment to see pages that will be replaced beforehand | |
#print(data) | |
#input("Press Enter to continue...") | |
# Loop through the JSON data and process duplicate files | |
for page_id, page_data in data['query']['pages'].items(): | |
original_file = page_data['title'] | |
try: | |
duplicate_files = page_data['duplicatefiles'] | |
except: | |
print("No more duplicate files found") | |
quit() | |
for duplicate_file in duplicate_files: | |
duplicate_name = "File:" + duplicate_file['name'] | |
# Get information about the duplicate file | |
duplicate_page = site.pages[duplicate_name] | |
# Check if the duplicate file exists | |
if duplicate_page.exists: | |
# Delete the duplicate file | |
duplicate_page.delete(reason='Duplicate file - redirecting to original') | |
# Edit the deleted file's page content to redirect to the original file | |
redirect_text = f"#REDIRECT [[{original_file}]]" | |
duplicate_page.save(redirect_text, summary='Redirecting duplicate file to original') | |
print(f"Redirected {duplicate_name} to {original_file} and deleted the duplicate.") | |
else: | |
print(f"{duplicate_name} doesn't exist.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script locates duplicate files (File:), deletes them, then sets the deleted page to redirect to the original file.
By default, it will ask you for each file, but you can remove lines 30 and 31 to have it delete all of them.