Last active
August 29, 2015 14:15
-
-
Save lukecampbell/d65b33604fad44d3117a to your computer and use it in GitHub Desktop.
Glider Days
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
glider_days.py | |
Determines the glider days for a thredds catalog | |
''' | |
from dateutil.parser import parse | |
from datetime import datetime | |
from thredds_crawler.crawl import Crawl | |
from netCDF4 import Dataset | |
from collections import defaultdict | |
import sys | |
import pytz | |
import csv | |
utc = pytz.utc | |
class GliderDays: | |
def __init__(self, url): | |
self.institutions = defaultdict(int) | |
self.url = url | |
def parse_catalog(self): | |
rows = [] | |
self.crawler = Crawl(self.url) | |
rows.append(['institution', 'url', 'start', 'end', 'seconds']) | |
for dataset in self.crawler.datasets: | |
error = '' | |
try: | |
with DatasetRecord(dataset) as dataset_record: | |
institution,url,start,end,seconds = dataset_record.parse_dataset() | |
self.institutions[institution] += seconds | |
rows.append([institution, url, start, end, seconds]) | |
except Exception as e: | |
print e.message | |
return rows | |
class DatasetRecord: | |
def __init__(self, dataset): | |
self.services = { s['name'] : s for s in dataset.services } | |
self.nc = None | |
def __enter__(self): | |
self.url = self.services['odap']['url'] | |
print "Parsing",self.url | |
self.nc = Dataset(self.url) | |
return self | |
def __exit__(self, type, value, traceback): | |
self.nc.close() | |
self.nc = None | |
def get_seconds(self, start, end): | |
i_s = start | |
i_e = end | |
# Check the year intersections | |
if start > datetime(2015,1,1, tzinfo=utc): | |
return 0 | |
if end < datetime(2014,1,1, tzinfo=utc): | |
return 0 | |
if(datetime(2014,1,1, tzinfo=utc) - start).total_seconds() > 0: | |
start = datetime(2014,1,1, tzinfo=utc) | |
if(end - datetime(2015,1,1, tzinfo=utc)).total_seconds() > 0: | |
end = datetime(2015,1,1, tzinfo=utc) | |
delta = end - start | |
seconds = delta.total_seconds() | |
return seconds | |
def parse_dataset(self): | |
end = parse(self.nc.time_coverage_end) | |
start = parse(self.nc.time_coverage_start) | |
seconds = self.get_seconds(start, end) | |
return self.nc.institution,self.url,start,end,seconds | |
def main(args): | |
''' | |
Determines the breakdown of glider days per institution | |
''' | |
url = args.url | |
glider_days = GliderDays(url) | |
rows = glider_days.parse_catalog() | |
if args.output: | |
with open(args.output, 'wb') as csvfile: | |
csvwriter = csv.writer(csvfile) | |
for row in rows: | |
csvwriter.writerow(row) | |
for institution in glider_days.institutions: | |
if args.float: | |
print "%s: %s" % (institution, glider_days.institutions[institution] / (3600 * 24.)) | |
else: | |
print "%s: %s" % (institution, glider_days.institutions[institution] / (3600 * 24)) | |
return 0 | |
if __name__ == '__main__': | |
from argparse import ArgumentParser | |
parser = ArgumentParser(description=main.__doc__) | |
parser.add_argument('url', help='Catalog Url') | |
parser.add_argument('-f', '--float', action='store_true', help='Using Floating Point') | |
parser.add_argument('-o','--output',help='CSV Output') | |
args = parser.parse_args() | |
retval = main(args) | |
sys.exit(0) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment