JotaRata · June 27, 2024 01:12 · JotaRata · May 7, 2024
diff --git a/ztfutils.py b/ztfutils.py
 import math
 import re
 from typing import Literal
 import numpy as np
 import pandas as pd
 import requests
 import multiprocessing
 from multiprocessing.pool import ThreadPool
 import os

 '''
 # ZTF Forced photometry utilities
 '''

 __all__ = ['request_fp', 'download_lightcurves', 'filter_lightcurve', 'read_lc', 'get_airmass', 'color_correction']

 _JulianDay = float | int
 ZTF_AUTH = ('ztffps', 'dontgocrazy!')

 # Function to request forced photometry from ZTF
 def request_fp(ztf_user : str, ztf_passkey : str, coordinates : np.ndarray,
 					start_epoch : _JulianDay | None = None, end_epoch : _JulianDay | None = None) -> None:
 	'''
 	## Perform a request to ZTF Forced Photometry service (ZFPS)
 	### Parameters:
 	* `ztf_user`, `ztf_passkey` (str): ZTF Forced photometry service authentication email and userpass
 	* `coordinates` (ndarray): 2D array containing right ascension and declination in degrees
 	* `start_epoch` (JulianDay): Start epoch for the requested light curves. If None then ZTF will compute it from the start of the survey (JD= 2458194.5)
 	* `end_epoch` (JulianDay): End epoch for the requested light curves. If None, ZTF will take the recentmost epoch from the current day.
 	
 	## Further instructions:
 	https://irsa.ipac.caltech.edu/data/ZTF/docs/ztf_zfps_userguide.pdf 
 	'''
 	url = 'https://ztfweb.ipac.caltech.edu/cgi-bin/batchfp.py/submit'
 	
 	assert bool(start_epoch) == bool(end_epoch), 'Both or None start and end epochs should be specified.'
 	if start_epoch:
 		assert start_epoch < end_epoch, 'start_epoch should always be smaller than end_epoch'
 	
 	if len(coordinates) >= 1500:
 		raise ValueError('Max request batch size is 1500 elements')
 	
 	print('Attempting to send a request to ztfweb.ipac.caltech.edu ...')
 	parameters = {
 				'ra': str(coordinates[:, 0].tolist()),
 				'dec': str(coordinates[:, 1].tolist()),
 				'email': ztf_user,
 				'userpass': ztf_passkey
 				}
 	if start_epoch:
 		parameters['jdstart'] = str(start_epoch)
 		parameters['jdend'] = str(end_epoch)
 	
 	response = requests.post(url, auth= ZTF_AUTH, data = parameters)
 	if response.status_code == 200:
 		print('Done. Check your email tomorrow 👍')
 	
 	print(f'Process exited with code {response.status_code}')
 	
 _ZTFBand = Literal['ZTF_g', 'ZTF_r', 'ZTF_i']
 def filter_lightcurve(lc : pd.DataFrame, *, max_seeing : float = 4,
 							 band : _ZTFBand = 'ZTF_g', strict_calibration : bool = True) -> pd.DataFrame:
 	'''Performs a quality filtering process as described in: https://irsa.ipac.caltech.edu/data/ZTF/docs/ztf_zfps_userguide.pdf '''
 	filter = (
 				(lc['infobitssci'] <= (0 if not strict_calibration else 33554432)) &					# Bad weather conditions
 				 (lc['sciinpseeing'] < max_seeing) &							# Max seeing of 4 arcsec
 				 (lc['filter'] == band) &							# Only use g-band filter
 				 (lc['scisigpix'] <= 25) &
 				#  (lc['mag_tot'] < 20.5) &
 				 (lc['diffmaglim'] > 20.5) &
 				 (lc['ccdid'] == lc['ccdid'].mode()[0]) &
 				 (lc['forcediffimflux'] > -9999)
 				 )
 	return lc[filter]


 def download_lightcurves(ztf_user : str, ztf_passkey : str, download_dir : str, 
 								 threads : int = multiprocessing.cpu_count() - 1) -> None:
 	'''
 	## Download forced photometry processed lightcurves from ZTF
 	Requires to have sent a request to ZTF prior to download.
 	### Parameters:
 	* `ztf_user`, `ztf_passkey` (str): ZTF Forced photometry service authentication email and userpass
 	* `download_dir` (str): Output directory where the lightcurves will be saved. In case that the directory doesnt exist, a new one will be created.
 	* `threads` (int): Number of CPU cores to accelerate the download process, defaults to the number of cores installed in the system minus one.
 	The lightcurves will be saved in `download_dir` without further processing in an ASCII txt format.
 	'''
 	
 	response_url = 'https://ztfweb.ipac.caltech.edu/cgi-bin/getBatchForcedPhotometryRequests.cgi'
 	ztf_url = 'https://ztfweb.ipac.caltech.edu'
 	
 	if not os.path.isdir(download_dir):
 		os.mkdir(download_dir)
 	
 	response = requests.get(response_url ,auth= ZTF_AUTH,
 								params= {
 									'email': ztf_user,
 									'userpass': ztf_passkey,
 									'option': 'All recent jobs',
 									'action': 'Query Database'})
 	if response.status_code != 200:
 		raise ConnectionError(f'Something went wrong, status code: {response.status_code}')
 	response_table = pd.read_html(response.text)[0]

 	session = requests.Session()
 	pool = ThreadPool(threads)

 	def do_download(row : pd.Series):
 		response = session.get(ztf_url + row['lightcurve'], auth= ZTF_AUTH)
 		if response.status_code != 200:
 			print(f'ERROR fetching lightcurve {row["reqId"]}, status code: {response.status_code}')
 			return
 		
 		with open(download_dir + f'{row["ra"]}_{row["dec"]}__fplc{row["reqId"]}.txt', 'w') as out:
 			out.write(response.text)
 		print(f'Sucessfully downloaded light curve {row["reqId"]} at {out.name}')
 	pool.map(do_download, response_table.iloc)
 	pool.close()
 	pool.join()
 	session.close()

 	print('Done.')

 FPCOORD_PATTERN = re.compile(r'([\d|\.]*)_?([\d|\.|\-]*)__.*')
 def read_lc(path : str, include_coords= True, **read_kws)-> pd.DataFrame | tuple[pd.DataFrame, tuple[float, float]]:
 	'''
 	## Reads a forced photometry lightcurve into a DataFrame.
 	By default, the output lightcurves from ZTF cannot be parsed as .csv files by pandas, this function setups many parameters of the read_csv function to read such lightcurves, including chaning the names for the columns and computing useful columns.
 	### Parameters:
 	* `path` (str): The input path to the lightcurve ASCII file (should end in .txt)
 	* `include_coords` (bool): Extract and include the RA, DEC coordinates from the file name?. Note that this only applies if the lighturves were downloaded using `download_lightcurves`  
 	* `read_kws` (Any): Additional parameters passed to pandas.read_csv 
 	Note that passing `sep` , `comment`, `index_col`, `skipinitialspace` in `read_kws` will raise an error since they are already supplied by this function.
 	'''

 	lc = pd.read_csv(path, sep= ' ', comment= '#', index_col= 0, skipinitialspace= True, **read_kws)\
 				.rename(columns= lambda col: col.replace(',', ''))
 	
 	
 	# Useful columns
 	lc['mjd'] = 				lc['jd'] - 2400000.5
 	lc['nearestrefflux'] = 	10 ** ( 0.4 * (lc['zpdiff'] - lc['nearestrefmag']) )
 	lc['flux_tot'] = 			lc['forcediffimflux'] + lc['nearestrefflux']
 	lc['mag_tot'] = 			lc['zpdiff'] - 2.5 * np.log10(lc['flux_tot'])

 	lc['magerr'] = 				(2.5 / math.log(10)) * (lc['forcediffimfluxunc'] / lc['flux_tot'])
 	lc['snr_pixnoise_mag'] = 	lc['forcediffimsnr'] / lc['scisigpix']
 	lc['snr_pixnoise_flux'] = 	(lc['forcediffimflux'] / lc['scisigpix'])
 	lc['snt'] = 					lc['forcediffimflux']/lc['forcediffimfluxunc']
 	
 	if include_coords:
 		matches = FPCOORD_PATTERN.match(os.path.basename(path))
 		assert len(groups:=matches.groups()) == 2
 		ra, dec = float(groups[0]), float(groups[1])
 		return lc, (ra, dec)
 	return lc

 def get_airmass(lc : pd.DataFrame, coords : tuple[float, float]):
 	from astropy.coordinates.builtin_frames import AltAz
 	from astropy.coordinates.earth import EarthLocation
 	from astropy.coordinates.sky_coordinate import SkyCoord
 	from astropy.time.core import Time
 	import astropy.units as u
 	
 	ra, dec = coords
 	palomar_location = EarthLocation(lat= 33.35805555555555 	 * u.deg,
 												lon= -116.86194444444445 * u.deg,
 												height = 1712 * u.m)
 	skycoord = SkyCoord(ra, dec, unit='deg').transform_to(
 					AltAz(
 						obstime= Time(lc['jd'], format='jd'), 
 						location=palomar_location
 						))
 	return skycoord.secz

 def color_correction(lc : pd.DataFrame, gps1 : float, rps1 : float, gps1_err : float = 0, rps1_err : float = 0,
 							clr_name = 'clrcoeff', clru_name= 'clrcoeffunc', mag_name= 'mag_tot', magerr_name = 'magerr'):
 	lc['mag_corr'] = lc[mag_name] + (gps1 - rps1) * lc[clr_name]

 	# Not verified
 	lc['mag_corr_err'] = np.sqrt( lc[magerr_name]**2 + ((gps1 - rps1) * lc[clru_name])**2 ) + ( (gps1_err - rps1_err) * lc[clr_name] )**2
 	return lc
	import math
	import re
	from typing import Literal
	import numpy as np
	import pandas as pd
	import requests
	import multiprocessing
	from multiprocessing.pool import ThreadPool
	import os

	'''
	# ZTF Forced photometry utilities
	'''

	__all__ = ['request_fp', 'download_lightcurves', 'filter_lightcurve', 'read_lc', 'get_airmass', 'color_correction']

	_JulianDay = float \| int
	ZTF_AUTH = ('ztffps', 'dontgocrazy!')

	# Function to request forced photometry from ZTF
	def request_fp(ztf_user : str, ztf_passkey : str, coordinates : np.ndarray,
	start_epoch : _JulianDay \| None = None, end_epoch : _JulianDay \| None = None) -> None:
	'''
	## Perform a request to ZTF Forced Photometry service (ZFPS)
	### Parameters:
	* `ztf_user`, `ztf_passkey` (str): ZTF Forced photometry service authentication email and userpass
	* `coordinates` (ndarray): 2D array containing right ascension and declination in degrees
	* `start_epoch` (JulianDay): Start epoch for the requested light curves. If None then ZTF will compute it from the start of the survey (JD= 2458194.5)
	* `end_epoch` (JulianDay): End epoch for the requested light curves. If None, ZTF will take the recentmost epoch from the current day.

	## Further instructions:
	https://irsa.ipac.caltech.edu/data/ZTF/docs/ztf_zfps_userguide.pdf
	'''
	url = 'https://ztfweb.ipac.caltech.edu/cgi-bin/batchfp.py/submit'

	assert bool(start_epoch) == bool(end_epoch), 'Both or None start and end epochs should be specified.'
	if start_epoch:
	assert start_epoch < end_epoch, 'start_epoch should always be smaller than end_epoch'

	if len(coordinates) >= 1500:
	raise ValueError('Max request batch size is 1500 elements')

	print('Attempting to send a request to ztfweb.ipac.caltech.edu ...')
	parameters = {
	'ra': str(coordinates[:, 0].tolist()),
	'dec': str(coordinates[:, 1].tolist()),
	'email': ztf_user,
	'userpass': ztf_passkey
	}
	if start_epoch:
	parameters['jdstart'] = str(start_epoch)
	parameters['jdend'] = str(end_epoch)

	response = requests.post(url, auth= ZTF_AUTH, data = parameters)
	if response.status_code == 200:
	print('Done. Check your email tomorrow 👍')

	print(f'Process exited with code {response.status_code}')

	_ZTFBand = Literal['ZTF_g', 'ZTF_r', 'ZTF_i']
	def filter_lightcurve(lc : pd.DataFrame, *, max_seeing : float = 4,
	band : _ZTFBand = 'ZTF_g', strict_calibration : bool = True) -> pd.DataFrame:
	'''Performs a quality filtering process as described in: https://irsa.ipac.caltech.edu/data/ZTF/docs/ztf_zfps_userguide.pdf '''
	filter = (
	(lc['infobitssci'] <= (0 if not strict_calibration else 33554432)) & # Bad weather conditions
	(lc['sciinpseeing'] < max_seeing) & # Max seeing of 4 arcsec
	(lc['filter'] == band) & # Only use g-band filter
	(lc['scisigpix'] <= 25) &
	# (lc['mag_tot'] < 20.5) &
	(lc['diffmaglim'] > 20.5) &
	(lc['ccdid'] == lc['ccdid'].mode()[0]) &
	(lc['forcediffimflux'] > -9999)
	)
	return lc[filter]


	def download_lightcurves(ztf_user : str, ztf_passkey : str, download_dir : str,
	threads : int = multiprocessing.cpu_count() - 1) -> None:
	'''
	## Download forced photometry processed lightcurves from ZTF
	Requires to have sent a request to ZTF prior to download.
	### Parameters:
	* `ztf_user`, `ztf_passkey` (str): ZTF Forced photometry service authentication email and userpass
	* `download_dir` (str): Output directory where the lightcurves will be saved. In case that the directory doesnt exist, a new one will be created.
	* `threads` (int): Number of CPU cores to accelerate the download process, defaults to the number of cores installed in the system minus one.
	The lightcurves will be saved in `download_dir` without further processing in an ASCII txt format.
	'''

	response_url = 'https://ztfweb.ipac.caltech.edu/cgi-bin/getBatchForcedPhotometryRequests.cgi'
	ztf_url = 'https://ztfweb.ipac.caltech.edu'

	if not os.path.isdir(download_dir):
	os.mkdir(download_dir)

	response = requests.get(response_url ,auth= ZTF_AUTH,
	params= {
	'email': ztf_user,
	'userpass': ztf_passkey,
	'option': 'All recent jobs',
	'action': 'Query Database'})
	if response.status_code != 200:
	raise ConnectionError(f'Something went wrong, status code: {response.status_code}')
	response_table = pd.read_html(response.text)[0]

	session = requests.Session()
	pool = ThreadPool(threads)

	def do_download(row : pd.Series):
	response = session.get(ztf_url + row['lightcurve'], auth= ZTF_AUTH)
	if response.status_code != 200:
	print(f'ERROR fetching lightcurve {row["reqId"]}, status code: {response.status_code}')
	return

	with open(download_dir + f'{row["ra"]}_{row["dec"]}__fplc{row["reqId"]}.txt', 'w') as out:
	out.write(response.text)
	print(f'Sucessfully downloaded light curve {row["reqId"]} at {out.name}')
	pool.map(do_download, response_table.iloc)
	pool.close()
	pool.join()
	session.close()

	print('Done.')

	FPCOORD_PATTERN = re.compile(r'([\d\|\.])_?([\d\|\.\|\-])__.*')
	def read_lc(path : str, include_coords= True, **read_kws)-> pd.DataFrame \| tuple[pd.DataFrame, tuple[float, float]]:
	'''
	## Reads a forced photometry lightcurve into a DataFrame.
	By default, the output lightcurves from ZTF cannot be parsed as .csv files by pandas, this function setups many parameters of the read_csv function to read such lightcurves, including chaning the names for the columns and computing useful columns.
	### Parameters:
	* `path` (str): The input path to the lightcurve ASCII file (should end in .txt)
	* `include_coords` (bool): Extract and include the RA, DEC coordinates from the file name?. Note that this only applies if the lighturves were downloaded using `download_lightcurves`
	* `read_kws` (Any): Additional parameters passed to pandas.read_csv
	Note that passing `sep` , `comment`, `index_col`, `skipinitialspace` in `read_kws` will raise an error since they are already supplied by this function.
	'''

	lc = pd.read_csv(path, sep= ' ', comment= '#', index_col= 0, skipinitialspace= True, **read_kws)\
	.rename(columns= lambda col: col.replace(',', ''))


	# Useful columns
	lc['mjd'] = lc['jd'] - 2400000.5
	lc['nearestrefflux'] = 10 ** ( 0.4 * (lc['zpdiff'] - lc['nearestrefmag']) )
	lc['flux_tot'] = lc['forcediffimflux'] + lc['nearestrefflux']
	lc['mag_tot'] = lc['zpdiff'] - 2.5 * np.log10(lc['flux_tot'])

	lc['magerr'] = (2.5 / math.log(10)) * (lc['forcediffimfluxunc'] / lc['flux_tot'])
	lc['snr_pixnoise_mag'] = lc['forcediffimsnr'] / lc['scisigpix']
	lc['snr_pixnoise_flux'] = (lc['forcediffimflux'] / lc['scisigpix'])
	lc['snt'] = lc['forcediffimflux']/lc['forcediffimfluxunc']

	if include_coords:
	matches = FPCOORD_PATTERN.match(os.path.basename(path))
	assert len(groups:=matches.groups()) == 2
	ra, dec = float(groups[0]), float(groups[1])
	return lc, (ra, dec)
	return lc

	def get_airmass(lc : pd.DataFrame, coords : tuple[float, float]):
	from astropy.coordinates.builtin_frames import AltAz
	from astropy.coordinates.earth import EarthLocation
	from astropy.coordinates.sky_coordinate import SkyCoord
	from astropy.time.core import Time
	import astropy.units as u

	ra, dec = coords
	palomar_location = EarthLocation(lat= 33.35805555555555 * u.deg,
	lon= -116.86194444444445 * u.deg,
	height = 1712 * u.m)
	skycoord = SkyCoord(ra, dec, unit='deg').transform_to(
	AltAz(
	obstime= Time(lc['jd'], format='jd'),
	location=palomar_location
	))
	return skycoord.secz

	def color_correction(lc : pd.DataFrame, gps1 : float, rps1 : float, gps1_err : float = 0, rps1_err : float = 0,
	clr_name = 'clrcoeff', clru_name= 'clrcoeffunc', mag_name= 'mag_tot', magerr_name = 'magerr'):
	lc['mag_corr'] = lc[mag_name] + (gps1 - rps1) * lc[clr_name]

	# Not verified
	lc['mag_corr_err'] = np.sqrt( lc[magerr_name]*2 + ((gps1 - rps1) lc[clru_name])*2 ) + ( (gps1_err - rps1_err) lc[clr_name] )**2
	return lc