Forked from powpingdone/flatpak-ostree-dedup-stats.py
Created
February 16, 2022 02:47
-
-
Save JayDoubleu/ac363d214869bc7f76cfa054b2c88a2b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This python script will find flatpak deduplication size stats. | |
Of course, this checks the regular flatpak installation at /var/lib/flatpak . | |
If you want to use an alternative path, execute the program with said path. | |
Made with :heart: by powpingdone#3611, or just powpingdone on github. | |
Explaination for output: | |
'no dedupe': The size that the ostree repository would take up if files were not deduplicated. | |
'dedupe': The actual size of the ostree repository. | |
'deduplicated ratio': The ratio of space taken to how much space would be taken if deduplication wasn't in use. | |
'singlelet ratio': The ratio of files that are only shared between applications/runtimes once (ie, only used in one). | |
'orphan file ratio': The ratio of files that are somehow not even referenced by flatpak. | |
""" | |
from glob import iglob as glob | |
from os import path, scandir | |
from sys import argv, exit | |
PATH_ROOT = "/var/lib/flatpak" | |
if len(argv) > 1: | |
PATH_ROOT = " ".join(argv[1:]) | |
if not path.exists(PATH_ROOT): | |
print(f"{PATH_ROOT} does not exist or is not able to be seen, exiting...") | |
exit(1) | |
if not (path.exists(PATH_ROOT + "/app") and path.exists(PATH_ROOT + "/runtime")): | |
print(f"{PATH_ROOT} does not point to a \"valid\" flatpak repo, exiting...") | |
exit(2) | |
not_deduped_size = 0 | |
deduped_size = 0 | |
singlelet_files = 0 | |
orphan_files = 0 | |
all_files = 0 | |
inodes = {} | |
def collect_data(globbed): | |
global not_deduped_size, deduped_size, singlelet_files, orphan_files, all_files, inodes | |
for file in scandir(globbed): | |
if file.is_symlink(): | |
continue | |
if file.is_dir(): | |
collect_data(file.path) | |
continue | |
statout = file.stat(follow_symlinks=False) | |
not_deduped_size += (statout.st_nlink - 1 if statout.st_nlink > 1 else 1) * statout.st_size | |
singlelet_files += 1 if statout.st_nlink == 2 else 0 | |
orphan_files += 1 if statout.st_nlink == 1 else 0 | |
# deduped related stats | |
if statout.st_ino in inodes: | |
continue | |
inodes[statout.st_ino] = None | |
all_files += 1 | |
deduped_size += statout.st_size | |
for app in glob(PATH_ROOT + "/app/*"): | |
print(f"collecting app {app.split('/')[-1]}") | |
collect_data(app) | |
for runtime in glob(PATH_ROOT + "/runtime/*"): | |
print(f"collecting runtime {runtime.split('/')[-1]}") | |
collect_data(runtime) | |
def to_human_readable_size(num): | |
for suffix in ["B", "KB", "MB", "GB"]: | |
if abs(num) < 1024: | |
return f"{num:.1f} {suffix}" | |
num /= 1024 | |
return f"{num:.1f} TB" | |
print(f"no dedupe: {to_human_readable_size(not_deduped_size)} ({not_deduped_size} B)") | |
print(f"dedupe: {to_human_readable_size(deduped_size)} ({deduped_size} B)") | |
print(f"deduplicated ratio: {100*(deduped_size/not_deduped_size):0.2f}") | |
print(f"singlelet file ratio: {100*(singlelet_files/all_files):0.2f}") | |
print(f"orphan file ratio: {100*(orphan_files/all_files):0.2f}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment