#!/usr/bin/env python ''' glider_days.py Determines the glider days for a thredds catalog ''' from dateutil.parser import parse from datetime import datetime from thredds_crawler.crawl import Crawl from netCDF4 import Dataset from collections import defaultdict import sys import pytz import csv utc = pytz.utc class GliderDays: def __init__(self, url): self.institutions = defaultdict(int) self.url = url def parse_catalog(self): rows = [] self.crawler = Crawl(self.url) rows.append(['institution', 'url', 'start', 'end', 'seconds']) for dataset in self.crawler.datasets: error = '' try: with DatasetRecord(dataset) as dataset_record: institution,url,start,end,seconds = dataset_record.parse_dataset() self.institutions[institution] += seconds rows.append([institution, url, start, end, seconds]) except Exception as e: print e.message return rows class DatasetRecord: def __init__(self, dataset): self.services = { s['name'] : s for s in dataset.services } self.nc = None def __enter__(self): self.url = self.services['odap']['url'] print "Parsing",self.url self.nc = Dataset(self.url) return self def __exit__(self, type, value, traceback): self.nc.close() self.nc = None def get_seconds(self, start, end): i_s = start i_e = end # Check the year intersections if start > datetime(2015,1,1, tzinfo=utc): return 0 if end < datetime(2014,1,1, tzinfo=utc): return 0 if(datetime(2014,1,1, tzinfo=utc) - start).total_seconds() > 0: start = datetime(2014,1,1, tzinfo=utc) if(end - datetime(2015,1,1, tzinfo=utc)).total_seconds() > 0: end = datetime(2015,1,1, tzinfo=utc) delta = end - start seconds = delta.total_seconds() return seconds def parse_dataset(self): end = parse(self.nc.time_coverage_end) start = parse(self.nc.time_coverage_start) seconds = self.get_seconds(start, end) return self.nc.institution,self.url,start,end,seconds def main(args): ''' Determines the breakdown of glider days per institution ''' url = args.url glider_days = GliderDays(url) rows = glider_days.parse_catalog() if args.output: with open(args.output, 'wb') as csvfile: csvwriter = csv.writer(csvfile) for row in rows: csvwriter.writerow(row) for institution in glider_days.institutions: if args.float: print "%s: %s" % (institution, glider_days.institutions[institution] / (3600 * 24.)) else: print "%s: %s" % (institution, glider_days.institutions[institution] / (3600 * 24)) return 0 if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser(description=main.__doc__) parser.add_argument('url', help='Catalog Url') parser.add_argument('-f', '--float', action='store_true', help='Using Floating Point') parser.add_argument('-o','--output',help='CSV Output') args = parser.parse_args() retval = main(args) sys.exit(0)