Created
September 21, 2019 17:35
-
-
Save jimallman/3a0196299b846958793aa8ba98e3bf69 to your computer and use it in GitHub Desktop.
Gather all unique OpenTree curator names from phylesystem
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Scan all nearby studies for unique '^ot:curatorName' values | |
import os | |
import json | |
studies_found = 0 | |
unique_names = [ ] | |
try: | |
# look for "bundles" of studies with IDs ending in (e.g.) "99" | |
for bundle_dirname in os.listdir('study'): | |
print(" ", bundle_dirname) | |
# look for individual study subdirectores in this bundle, e.g. 'ot_2199' | |
for study_dirname in os.listdir('study/'+ bundle_dirname): | |
print(" ", study_dirname, end=", ") | |
# each study should have one Nexson (JSON) file with the same name | |
full_path_to_nexson = 'study/'+ bundle_dirname +'/'+ study_dirname +'/'+ study_dirname +'.json' | |
try: | |
with open(full_path_to_nexson) as nexson_file: | |
data = json.load(nexson_file) | |
#print(data['nexml'].keys()) | |
nexml = data['nexml'] | |
study_version = nexml.get('@version', "NOT FOUND") | |
print("version: ", study_version, end=", ") | |
study_curators = nexml.get('^ot:curatorName', "NOT FOUND") | |
if type(study_curators) is not list: | |
study_curators = [study_curators] | |
print("^ot:curatorName: ", study_curators) | |
for curator_name in study_curators: | |
if curator_name not in unique_names: | |
unique_names.append(curator_name) | |
studies_found += 1 | |
except: | |
print("Nexson file broken or not found: ", full_path_to_nexson) | |
except: | |
print("Expected to find local dir 'study' with nested subdirectories! Are we in the right place?") | |
print("====") | |
print(studies_found, " studies found and scanned. ") | |
print(len(unique_names), " unique names found. ") | |
for found_name in unique_names: | |
print(" ", found_name) | |
# TODO: Write these to a file, consolidate all found names to Github userids and current name |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment