import os, glob
import numpy as np
from multiprocessing import Pool
from functools import partial
from astropy.stats import sigma_clip
from astropy.convolution import convolve, Box1DKernel
from astropy.wcs import WCS
from astropy.wcs.utils import skycoord_to_pixel
from astropy.coordinates import SkyCoord
from astropy.io import fits
import scipy.interpolate
from scipy.stats import binned_statistic
import warnings
warnings.filterwarnings('ignore')
image_stack = np.zeros((0,32,32))
[docs]def create_folder(fullname, auto=False, overwrite=False):
"""Create a folder unless it exists.
Args:
fullname (string): Full path to the folder to be created.
auto (bool, optional): If the folder already exists, should the folder just be skipped (True)
or should the user be asked whether they want to overwrite the folder or change the folder name (False, Default).
overwrite (bool, optional): Whether you want to overwrite the folder if it already exists
Returns:
string: The final name used for the folder.
"""
solved = 'no'
while(solved == 'no'):
if not os.path.exists(fullname):
# Folder doesn't exist yet and can be safely written to
os.makedirs(fullname)
solved = 'yes'
elif len(os.listdir(fullname))==0:
# Folder exists but is empty and can be safely written to
solved = 'yes'
else:
if overwrite:
solved = 'yes'
elif auto:
fullname = None
solved = 'yes'
else:
folder = fullname.split('/')[-1]
print('Warning:', folder, 'already exists! Are you sure you want to overwrite this folder? (y/n)')
answer = input()
if (answer=='y'):
solved = 'yes'
else:
print('What would you like the new folder name to be?')
folder = input()
fullname = '/'.join(fullname.split('/')[0:-1])+'/'+folder
return fullname
[docs]def get_fnames(directory, AOR_snip):
"""Find paths to all the fits files.
Args:
directory (string): Path to the directory containing all the Spitzer data.
AOR_snip (string): Common first characters of data directory eg. 'r579'.
ch (string): Channel used for the observation eg. 'ch1' for channel 1.
Returns:
tuple: fname, lens (list, list).
List of paths to all bcd.fits files, number of files for each AOR (needed for adding correction stacks).
"""
while directory[-1]=='/':
directory=directory[:-1]
ch = directory.split('/')[-1]
lst = os.listdir(directory)
AOR_list = [folder for folder in lst if AOR_snip==folder[:len(AOR_snip)]]
fnames = []
lens = []
for i in range(len(AOR_list)):
path = directory + '/' + AOR_list[i] + '/' + ch +'/bcd'
files = np.sort(glob.glob(os.path.join(path, '*bcd.fits')))
if len(files)!=0:
fnames.extend(files)
lens.append(len(files))
return fnames, lens
[docs]def get_stacks(calDir, dataDir, AOR_snip):
"""Find paths to all the background subtraction correction stacks FITS files.
Args:
calDir (string): Path to the directory containing the correction stacks.
dataDir (string): Path to the directory containing the Spitzer data to be corrected.
AOR_snip (string): Common first characters of data directory eg. 'r579'.
ch (string): Channel used for the observation eg. 'ch1' for channel 1.
Returns:
list: List of paths to the relevant correction stacks
"""
while dataDir[-1]=='/':
dataDir=dataDir[:-1]
ch = dataDir.split('/')[-1]
stacks = np.array(os.listdir(calDir))
locs = np.array([stacks[i].find('SPITZER_I') for i in range(len(stacks))])
good = np.where(locs!=-1)[0] #filter out all files that don't fit the correct naming convention for correction stacks
offset = 11 #legth of the string "SPITZER_I#_"
keys = np.array([stacks[i][locs[i]+offset:].split('_')[0] for i in good]) #pull out just the key that says what sdark this stack is for
lst = os.listdir(dataDir)
AOR_list = [folder for folder in lst if AOR_snip==folder[:len(AOR_snip)]]
calFiles = []
for i in range(len(AOR_list)):
path = dataDir + '/' + AOR_list[i] + '/' + ch +'/cal/'
if not os.path.isdir(path):
print('Error: Folder \''+path+'\' does not exist, so automatic correction stack selection cannot be performed')
return []
fname = glob.glob(path+'*sdark.fits')[0]
loc = fname.find('SPITZER_I')+offset
key = fname[loc:].split('_')[0]
calFiles.append(os.path.join(calDir, stacks[list(good)][np.where(keys == key)[0][0]]))
return np.array(calFiles)
[docs]def get_time(header, ignoreFrames):
"""Gets the time stamp for each image.
Args:
header (astropy.io.fits.header.Header): Header of fits file.
ignoreFrames (ndarray): Array of frames to ignore (consistently bad frames).
Returns:
ndarray: Updated time stamp array.
"""
if header['NAXIS']==2:
h = 1
else:
h = header['NAXIS3']
sec2day = 1.0/(3600.0*24.0)
step = header['FRAMTIME']*sec2day
t = np.linspace(header['BMJD_OBS'] + step/2, header['BMJD_OBS'] + (h-1)*step, h, endpoint=True)
t = np.ma.masked_invalid(t)
if ignoreFrames != []:
t[ignoreFrames].mask = True
return t
[docs]def oversampling(image_data, scale=2):
"""First, substitutes all invalid/sigma-clipped pixels by interpolating the value, then linearly oversamples the image.
Args:
image_data (ndarray): Data cube of images (2D arrays of pixel values).
scale (int, optional): Sampling factor, e.g. if scale = 2, there will be twice as many data points in the x
and y axis. Default is 2. (Do not recommend larger than 2)
Returns:
ndarray: Data cube of oversampled images (2D arrays of pixel values).
"""
l, h, w = image_data.shape
gridy, gridx = np.mgrid[0:h:1/a, 0:w:1/a]
image_over = np.zeros((l, h*a, w*a))
for i in range(l):
image_masked = np.ma.masked_invalid(image_data[i,:,:])
if np.all(image_masked.mask):
image_over[i,:,:].mask = True
continue
points = np.where(image_masked.mask == False)
image_compre = np.ma.compressed(image_masked)
image_over[i,:,:] = scipy.interpolate.griddata(points, image_compre,
(gridx, gridy),
method = 'linear')
# Mask any bad values
image_masked = np.ma.masked_invalid(image_over)
# conserve flux
return image_masked/(a**2)
[docs]def sigma_clipping(image_stack, bounds = (13, 18, 13, 18), sigma=5, maxiters=3):
"""Sigma clips bad pixels and mask entire frame if the sigma clipped pixel is too close to the target.
Args:
image_stack (3D array): Data cube of images (2D arrays of pixel values).
bounds (tuple, optional): Bounds of box around the target. Default is (13, 18, 13, 18).
sigma (float, optional): How many sigma should something differ by to be clipped. Default is 5 which shouldn't trim any real data for Ndata=64*1000.
maxiters (int, optional): How many iterations of sigma clipping should be done.
Returns:
3D array: sigma_clipped_data - Data cube of sigma clipped images (2D arrays of pixel values).
"""
lbx, ubx, lby, uby = bounds
try:
image_stack = sigma_clip(image_stack, sigma=sigma, maxiters=maxiters,
cenfunc=np.ma.median, stdfunc=np.ma.std, axis = 0)
except TypeError:
image_stack = sigma_clip(image_stack, sigma=sigma, iters=maxiters,
cenfunc=np.ma.median, stdfunc=np.ma.std, axis = 0)
# If any pixels near the target star are bad, mask the entire frame
image_stack[np.any(image_stack.mask[:,lbx:ubx,lby:uby], axis=(1,2))] = np.ma.masked
return image_stack
[docs]def clip_data(arr, highpassWidth, sigma1=5, sigma2=5, maxiters=3):
try:
arr = sigma_clip(arr, sigma=sigma1, maxiters=maxiters, cenfunc=np.ma.median, stdfunc=np.ma.std)
except TypeError:
arr = sigma_clip(arr, sigma=sigma1, iters=maxiters, cenfunc=np.ma.median, stdfunc=np.ma.std)
arr = replace_clipped(arr)
arr = rolling_clip(arr, highpassWidth, sigma=sigma2, maxiters=maxiters)
arr = replace_clipped(arr)
return arr
[docs]def replace_clipped(arr):
for i in np.where(np.ma.getmaskarray(arr))[0]:
inds = np.array([i-2, i-1, i+1, i+2])
inds = inds[inds<len(arr)]
if not np.all(arr[inds].mask):
arr[i] = np.ma.median(arr[inds])
else:
arr[i] = np.ma.median(arr)
arr.mask = np.ma.nomask
return arr
[docs]def rolling_clip(arr, highpassWidth, sigma=5, maxiters=3):
smooth = highpassflist(arr, highpassWidth)
smoothed = (arr - smooth)
try:
smoothed = sigma_clip(smoothed, sigma=sigma, maxiters=maxiters,
cenfunc=np.ma.median, stdfunc=np.ma.std)
except:
smoothed = sigma_clip(smoothed, sigma=sigma, iters=maxiters,
cenfunc=np.ma.median, stdfunc=np.ma.std)
arr[np.ma.getmaskarray(smoothed)] = np.ma.masked
return arr
[docs]def bgsubtract(bounds=(11, 19, 11, 19), i=0):
"""Measure the background level and subtracts the background from each frame.
Args:
bounds (tuple, optional): Bounds of box around the target. Default is (11, 19, 11, 19).
Returns:
tuple: bgsub_data (3D array) Data cube of background subtracted images.
bg_flux (1D array) Updated array of background flux measurements for previous images.
bg_err (1D array) Updated array of uncertainties on background measurements for previous images.
"""
# Access global variable
global image_stack
lbx, ubx, lby, uby = bounds
# Get the initial mask
image = np.ma.masked_invalid(image_stack[i])
# Mask out the target star
image[lbx:ubx,lby:uby] = np.ma.masked
# Compute background and error
bg = np.ma.median(image)
bg_err = np.ma.std(image)
# Subtract background
image_stack[i] -= bg
return np.array([bg, bg_err])
[docs]def bin_array(data, size):
"""Median bin an array.
Args:
data (1D array): Array of data to be binned.
size (int): Number of data points in each bin.
Returns:
tuple: binned_data (1D array) Array of binned data.
binned_data_std (1D array) Array of standard deviation for each entry in binned_data.
"""
data = np.ma.masked_invalid(np.ma.copy(data))
x = np.arange(data.shape[0])
bins = size*np.arange(np.ceil(len(x)/size)+1)-0.5
binned_data = binned_statistic(x, data, statistic=np.ma.median, bins=bins)[0]
binned_data_std = binned_statistic(x, data, statistic=np.ma.std, bins=bins)[0]
return binned_data, binned_data_std
[docs]def highpassflist(signal, highpassWidth):
g = Box1DKernel(highpassWidth)
return convolve(signal, g, boundary='extend')
[docs]def prepare_image(savepath, AOR_snip, fnames, lens, stacks=[], ignoreFrames=[],
oversamp=False, scale=2, reuse_oversamp=True, saveoversamp=True,
addStack=False, stackPath='', maskStars=[], i=0):
# open fits file
with fits.open(fnames[i]) as hdu_list:
header = hdu_list[0].header
time = get_time(header, ignoreFrames)
if len(hdu_list[0].data.shape)==2:
# Reshape fullframe data so that it can be used with our routines
# Getting just the stamp around the sweet spot
image = np.ma.masked_invalid(hdu_list[0].data[np.newaxis,217:249,9:41])
else:
image = np.ma.masked_invalid(hdu_list[0].data)
#ignore any consistently bad frames in datacubes
image[ignoreFrames] = np.nan
image[ignoreFrames].mask = True
#add background correcting stack if requested
if addStack:
j=0 #counter to keep track of which correction stack we're using
while i > np.sum(lens[:j+1]):
j+=1 #if we've moved onto a new AOR, increment j
stackHDU = fits.open(stacks[j])
image += np.ma.masked_invalid(stackHDU[0].data)
if image.shape[0]!=1:
# Sigma clipping within datacubes
image = sigma_clipping(image, sigma=4.)
# convert MJy/str to electron count
convfact = (header['GAIN']*header['EXPTIME']/header['FLUXCONV'])
image = convfact*image
# Mask any other stars in the frame to avoid them influencing the background subtraction
if maskStars != []:
header['CTYPE3'] = 'Time-SIP' #Just need to add a type so astropy doesn't complain
w = WCS(header, naxis=[1,2])
mask = np.ma.getmaskarray(image)
for st in maskStars:
coord = SkyCoord(st[0], st[1])
x,y = np.rint(skycoord_to_pixel(coord, w)).astype(int)
x = x+np.arange(-1,2)
y = y+np.arange(-1,2)
x,y = np.meshgrid(x,y)
mask[:,x,y] = True
image = np.ma.masked_array(image, mask=mask)
# oversampling
if oversamp:
if reuse_oversamp:
savename = savepath + 'Oversampled/' + fnames[i].split('/')[-1].split('_')[-4] + '.pkl'
if os.path.isfile(savename):
image = np.load(savename)
else:
print('Warning: Oversampled images were not previously saved! Making new ones now...')
image = np.ma.masked_invalid(oversampling(image, scale))
if (saveoversamp == True):
# THIS CHANGES FROM ONE SET OF DATA TO ANOTHER!!!
image.dump(savename)
else:
image = np.ma.masked_invalid(oversampling(image))
if saveoversamp:
# THIS CHANGES FROM ONE SET OF DATA TO ANOTHER!!!
savename = savepath + 'Oversampled/' + fnames[i].split('/')[-1].split('_')[-4] + '.pkl'
image.dump(savename)
data = image.reshape(-1,np.product(image.shape[1:]))
data = np.append(data, time[:,np.newaxis], axis=1)
return data
[docs]def prepare_images(basepath, planet, channel, AOR_snip, ignoreFrames=[],
oversamp=False, scale=2, reuse_oversamp=True, saveoversamp=True,
addStack=False, maskStars=[], ncpu=4):
#folder containing properly named correction stacks (will be automatically selected)
stackPath = basepath+'Calibration/'
datapath = basepath+planet+'/data/'+channel
savepath = basepath+planet+'/analysis/'+channel+'/'
if addStack:
savepath += 'addedStack/'
else:
savepath += 'addedBlank/'
if maskStars is None:
maskStars = []
if ignoreFrames is None:
ignoreFrames = []
print('\tGetting frames', end='', flush=True)
if len(ignoreFrames)!=0:
print(', masking bad frames', end='', flush=True)
print('... ', end='')
# get list of filenames and number of files
fnames, lens = get_fnames(datapath, AOR_snip)
# get path where the aor breaks will be saved
breakpath = basepath+planet+'/analysis/'+channel+'/aorBreaks.txt'
# get & write aor breaks
index = 0
breaktimes = []
for length in lens:
with fits.open(fnames[index]) as rawImage:
header = rawImage[0].header
breaktimes.append(get_time(header, []).flatten()[0])
index += length
breaktimes = np.sort(breaktimes)[1:]
with open(breakpath, 'w') as f:
f.write(str(breaktimes)[1:-1])
# if need to add correction stack
if addStack:
stacks = get_stacks(stackPath, datapath, AOR_snip)
else:
stacks = []
# Load all of the images using multiprocessing to speed things up
with Pool(ncpu) as pool:
func = partial(prepare_image, savepath, AOR_snip, fnames, lens, stacks, ignoreFrames,
oversamp, scale, reuse_oversamp, saveoversamp, addStack, stackPath, maskStars)
inds = range(len(fnames))
results = np.ma.masked_array(pool.map(func, inds)).reshape(-1,int(32**2+1))
# Access global variable
global image_stack
time = results[:,-1].flatten()
image_stack = results[:,:-1].reshape(-1, 32, 32)
# Free up a bit of RAM
results = None
# Sort data into correct order
order = np.argsort(time)
time = time[order]
image_stack = image_stack[order]
# Free up a bit of RAM
order = None
print('Sigma clipping... ', end='', flush=True)
# sigma clip bad pixels along full time axis
image_stack = sigma_clipping(image_stack, sigma=5)
print('Subtracting background... ', end='', flush=True)
# background subtraction is done on global variable
with Pool(ncpu) as pool:
func = partial(bgsubtract, (11, 19, 11, 19))
inds = range(image_stack.shape[0])
bg, bg_err = np.array(pool.map(func, inds)).T
print('Frames loaded!', flush=True)
return image_stack, bg, bg_err, time