Skip to content

Commit

Permalink
Merge pull request #40 from HeloiseS/dev1.5
Browse files Browse the repository at this point in the history
The Age Wizard Release - Patch - v1.5.1
  • Loading branch information
HeloiseS authored Jul 10, 2020
2 parents 1e2e79b + 1b93272 commit 58107d1
Show file tree
Hide file tree
Showing 17 changed files with 100,387 additions and 58 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ jobs:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install tox
python3 -m pip install --upgrade pip
python3 -m pip install tox
- name: Run Tox
run: tox
Expand Down
9 changes: 6 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,26 +58,29 @@ This way it'll show you each test as they pass or FAIL. In the pip and github ve

.. code-block:: none
`astropy`, `numpy`, `pandas`, `matplotlib`, `pyyaml`, `wheel`, `emcee`, `corner`
`numpy`, `pandas`, `matplotlib`, `pyyaml`, `wheel`, `emcee`, `corner`
**Note:** Python 2 is not supported

----

Read the docs
^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^


`Click Here! Click Here! Click Here! <https://heloises.github.io/hoki/intro.html>`_

----

Download Tutorials
^^^^^^^^^^^^^^^^^^^^

Check out these Jupyter notebooks I made - you can find them on `this repo! <https://github.com/HeloiseS/hoki_tutorials>`__

----

Citations
Paper and how to cite us!
^^^^^^^^^^^^^^^^^^^^^^^^^^
.. image:: https://joss.theoj.org/papers/10.21105/joss.01987/status.svg
:target: https://doi.org/10.21105/joss.01987
Expand Down
155 changes: 127 additions & 28 deletions hoki/age_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,18 @@ def __init__(self, obs_df, model):

self.obs_df = obs_df
self.coordinates = find_coordinates(self.obs_df, self.model)
self._distributions = calculate_distributions(self.obs_df, self.model)

# This line is obsolete but might need revival if we ever want to add the not normalised distributions again
# self._distributions = calculate_distributions_normalised(self.obs_df, self.model)

self.pdfs = calculate_individual_pdfs(self.obs_df, self.model).fillna(0)
self.sources = self.pdfs.columns.to_list()
self.sample_pdf = None
self._most_likely_age = None

def calculate_sample_pdf(self, not_you=None, return_df=False):
self.sample_pdf = calculate_sample_pdf(self._distributions, not_you=not_you)
#self.sample_pdf = calculate_sample_pdf(self._distributions, not_you=not_you)
self.sample_pdf = calculate_sample_pdf(self.pdfs, not_you=not_you)
if return_df: return self.sample_pdf

@property
Expand All @@ -80,7 +84,8 @@ def most_likely_age(self):
if self._most_likely_age is not None: return self._most_likely_age

if self.sample_pdf is None:
warnings.warn('self.multiplied_pdf is not yet defined -- running AgeWizard.combined_pdfs()', HokiUserWarning)
warnings.warn('self.multiplied_pdf is not yet defined -- running AgeWizard.combined_pdfs()',
HokiUserWarning)
self.calculate_sample_pdf()

index = self.sample_pdf.index[self.sample_pdf.pdf == max(self.sample_pdf.pdf)].tolist()
Expand All @@ -91,7 +96,7 @@ def most_likely_ages(self):
"""
Finds the most likely ages for all the sources given in the obs_df DataFrame.
"""
#index = self.pdfs.drop('time_bins', axis=1).idxmax(axis=0).tolist()
# index = self.pdfs.drop('time_bins', axis=1).idxmax(axis=0).tolist()
index = self.pdfs.idxmax(axis=0).tolist()
return self.t[index]

Expand Down Expand Up @@ -180,22 +185,22 @@ def _find_hrd_coordinates(obs_df, myhrd):
# int( ....) is juuust to make sure we get an integer because Python is a motherfucker and adds s.f. for no reason
# Then we append that index to our list.

for T, L in zip(logT, logL ):
for T, L in zip(logT, logL):

try:
T=float(T)
T = float(T)
# Finds the index that is at the minimum distance in Temperature space and adds it to the list
T_i.append(int((np.where(abs(myhrd.T_coord - T) == abs(myhrd.T_coord - T).min()))[0]))
except ValueError:
warnings.warn("T="+str(T)+" cannot be converted to a float", HokiUserWarning)
warnings.warn("T=" + str(T) + " cannot be converted to a float", HokiUserWarning)
T_i.append(np.nan)

try:
L=float(L)
L = float(L)
# Finds the index that is at the minimum distance in Luminosity space and adds it to the list
L_i.append(int((np.where(abs(myhrd.L_coord - L) == abs(myhrd.L_coord - L).min()))[0]))
except ValueError:
warnings.warn("L="+str(L)+" cannot be converted to a float", HokiUserWarning)
warnings.warn("L=" + str(L) + " cannot be converted to a float", HokiUserWarning)
L_i.append(np.nan)

return T_i, L_i
Expand Down Expand Up @@ -240,37 +245,63 @@ def _find_cmd_coordinates(obs_df, mycmd):
for col, mag in zip(colours, magnitudes):

try:
col=float(col)
col = float(col)
# Finds the index that is at the minimum distance in Colour space and adds it to the list
col_i.append(int((np.where(abs(mycmd.col_range - col) == abs(mycmd.col_range - col).min()))[0]))
except ValueError:
warnings.warn("Colour="+str(col)+" cannot be converted to a float", HokiUserWarning)
warnings.warn("Colour=" + str(col) + " cannot be converted to a float", HokiUserWarning)
col_i.append(np.nan)

try:
mag=float(mag)
mag = float(mag)
# Finds the index that is at the minimum distance in Magnitude space and adds it to the list
mag_i.append(int((np.where(abs(mycmd.mag_range - mag) == abs(mycmd.mag_range - mag).min()))[0]))
except ValueError:
warnings.warn("Magnitude="+str(mag)+" cannot be converted to a float", HokiUserWarning)
warnings.warn("Magnitude=" + str(mag) + " cannot be converted to a float", HokiUserWarning)
mag_i.append(np.nan)

return col_i, mag_i


def normalise_1d(distribution):
def normalise_1d(distribution, crop_the_future=False):
"""
Simple function that devides by the sum of the 1D array or DataFrame given.
"""
if crop_the_future:
distribution = _crop_the_future(distribution)

area = np.sum([bin_t for bin_t in distribution])
return distribution/area
return distribution / area


def _crop_the_future(distribution):
# Anything about 10.1 is the future - time bin 42 and above must have proba == 0
array_that_erases_the_future = np.array([1]*42+[0]*9)
return np.array(distribution)*array_that_erases_the_future


def calculate_individual_pdfs(obs_df, model):
likelihoods = calculate_distributions(obs_df, model)
"""
Calculates the age pdfs of all the stars in the sample and returns them in a dataframe
Parameters
----------
obs_df: pandas.DataFrame
Dataframe containing the observational data
model: hoki.hrdiagrams.HRDiagrams or hoki.cmd.CMD
BPASS HRDiagram or CMD
Returns
-------
pandas.Dataframe containing the age pdfs of each star
"""
likelihoods = calculate_distributions_normalised(obs_df, model)

pdfs = []
for col in likelihoods.columns:
pdfs.append(normalise_1d(likelihoods[col].values))

return pd.DataFrame(np.array(pdfs).T, columns=likelihoods.columns)


Expand All @@ -294,7 +325,7 @@ def calculate_distributions(obs_df, model):
if isinstance(model, hoki.hrdiagrams.HRDiagram):
x_coord, y_coord = find_coordinates(obs_df, model)
if isinstance(model, hoki.cmd.CMD):
y_coord, x_coord = find_coordinates(obs_df, model) # yeah it's reversed... -_-
y_coord, x_coord = find_coordinates(obs_df, model) # yeah it's reversed... -_-

# If source names not given we make our own
try:
Expand All @@ -307,12 +338,12 @@ def calculate_distributions(obs_df, model):

# Time to calcualte the pdfs
for i, name in zip(range(obs_df.shape[0]), source_names):
xi, yi = x_coord[i], y_coord[i] # just saving space
xi, yi = x_coord[i], y_coord[i] # just saving space

# Here we take care of the possibility that a coordinate is a NaN
if np.isnan(xi) or np.isnan(yi):
warnings.warn("NaN Value encountered in coordinates for source: " + name, HokiUserWarning)
likelihoods.append([0] * 51) # Probability is then 0 at all times - That star doesn't exist in our models
likelihoods.append([0] * 51) # Probability is then 0 at all times - That star doesn't exist in our models
continue

# Here we fill our not-yet-nromalised distribution
Expand All @@ -331,7 +362,69 @@ def calculate_distributions(obs_df, model):
# Our list of pdfs (which is a list of lists) is turned into a PDF with the source names as column names
likelihoods_df = pd.DataFrame((np.array(likelihoods)).T, columns=source_names)
# We add the time bins in there because it can make plotting extra convenient.
#distributions_df['time_bins'] = hoki.constants.BPASS_TIME_BINS
# distributions_df['time_bins'] = hoki.constants.BPASS_TIME_BINS

return likelihoods_df


def calculate_distributions_normalised(obs_df, model):
"""
Given observations and an HR Diagram, calculates the distribution across ages NORMALISED
Parameters
----------
obs_df: pandas.DataFrame
Observational data. MUST contain a logT and logL column.
model: hoki.hrdiagrams.HRDiagrams or hoki.cmd.CMD
BPASS HRDiagram or CMD
Returns
-------
Age Probability Distribution Functions in a pandas.DataFrame.
"""
# Checking whether it;s HRD or CMD
if isinstance(model, hoki.hrdiagrams.HRDiagram):
x_coord, y_coord = find_coordinates(obs_df, model)
if isinstance(model, hoki.cmd.CMD):
y_coord, x_coord = find_coordinates(obs_df, model) # yeah it's reversed... -_-

# If source names not given we make our own
try:
source_names = obs_df.name
except AttributeError:
warnings.warn("No source names given so I'll make my own", HokiUserWarning)
source_names = ["s" + str(i) for i in range(obs_df.shape[0])]

likelihoods = []

# Time to calcualte the pdfs
for i, name in zip(range(obs_df.shape[0]), source_names):
xi, yi = x_coord[i], y_coord[i] # just saving space

# Here we take care of the possibility that a coordinate is a NaN
if np.isnan(xi) or np.isnan(yi):
warnings.warn("NaN Value encountered in coordinates for source: " + name, HokiUserWarning)
likelihoods.append([0] * 51) # Probability is then 0 at all times - That star doesn't exist in our models
continue

# Here we fill our not-yet-nromalised distribution
distrib_i = []
for model_i in model:
# For each time step i, we retrieve the proba in CMD_i or HRD_i and fill our distribution element distrib_i
# with it. At the end of the for loop we have iterated over all 51 time bins
distrib_i.append(model_i[xi, yi])

# Then we normalise, so that we have proper probability distributions
# pdf_i = normalise_1d(distrib_i)

# finally our pdf is added to the list
likelihoods.append(normalise_1d(distrib_i, crop_the_future=True))

# Our list of pdfs (which is a list of lists) is turned into a PDF with the source names as column names
likelihoods_df = pd.DataFrame((np.array(likelihoods)).T, columns=source_names)
# We add the time bins in there because it can make plotting extra convenient.
# distributions_df['time_bins'] = hoki.constants.BPASS_TIME_BINS

return likelihoods_df

Expand Down Expand Up @@ -362,20 +455,26 @@ def calculate_sample_pdf(distributions_df, not_you=None):
try:
distributions_df = distributions_df.drop(labels=not_you, axis=1)
except KeyError as e:
message = 'FEATURE DISABLED'+'\nKeyError'+str(e)+'\nHOKI DIALOGUE: Your labels could not be dropped -- ' \
'all pdfs will be combined \nDEBUGGING ASSISTANT: ' \
'Make sure the labels your listed are spelled correctly:)'
message = 'FEATURE DISABLED' + '\nKeyError' + str(
e) + '\nHOKI DIALOGUE: Your labels could not be dropped -- ' \
'all pdfs will be combined \nDEBUGGING ASSISTANT: ' \
'Make sure the labels your listed are spelled correctly:)'
warnings.warn(message, HokiUserWarning)

# We also must be careful not to multiply the time bin column in there so we have a list of the column names
# that remain after the "not_you" exclusion minus the time_bins column.
columns = [col for col in distributions_df.columns if "time_bins" not in col]
#columns = [col for col in distributions_df.columns if "time_bins" not in col]

columns = []
if "time_bins" not in distributions_df.columns:
for col in distributions_df.columns:
columns.append(col)

for col in columns:
#for col in distributions_df.columns:
# for col in distributions_df.columns:
combined_pdf += distributions_df[col].values

combined_df = pd.DataFrame(normalise_1d(combined_pdf))
combined_df = pd.DataFrame(normalise_1d(combined_pdf) )
combined_df.columns = ['pdf']

return combined_df
Expand All @@ -400,6 +499,6 @@ def calculate_p_given_age_range(pdfs, age_range=None):
# Selects only the rows corresponding to the range age_range[0] to age_range[1] (inclusive)
# and then we sum the probabilities up for each column.
probability = pdfs[(np.round(BPASS_TIME_BINS, 2) >= min(age_range))
& (np.round(BPASS_TIME_BINS, 2) <= max(age_range))].sum()
& (np.round(BPASS_TIME_BINS, 2) <= max(age_range))].sum()

return probability
return probability
3 changes: 2 additions & 1 deletion hoki/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ def __init__(self, file,
self._log_ages = None
self._ages = None
self.mag_filter = None
self.filter2 = None
self.col_filter1 = None
self.col_filter2 = None

def make(self, mag_filter, col_filters):
"""
Expand Down
20 changes: 18 additions & 2 deletions hoki/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
import io

#TODO: update documentation and add mentions of set_models path and set_default_bpass_verison in the constants

# module - it will change things in the CMD jupyter notebook I think.


data_path = pkg_resources.resource_filename('hoki', 'data')

path_to_settings = pkg_resources.resource_filename('hoki', 'data/settings.yaml')
Expand All @@ -23,6 +25,22 @@
BPASS_TIME_INTERVALS = np.array([10**(t+0.05) - 10**(t-0.05) for t in BPASS_TIME_BINS])
BPASS_TIME_WEIGHT_GRID = np.array([np.zeros((100,100)) + dt for dt in BPASS_TIME_INTERVALS])

HOKI_NOW = 13.799e9

# Create a deprecation warning when using dummy_dict
dummy_dict = {'timestep': 0, 'age': 1, 'log(R1)': 2, 'log(T1)': 3, 'log(L1)': 4, 'M1': 5, 'He_core1': 6, 'CO_core1': 7,
'ONe_core1': 8, 'X': 10, 'Y': 11, 'C': 12, 'N': 13, 'O': 14, 'Ne': 15, 'MH1': 16, 'MHe1': 17, 'MC1': 18,
'MN1': 19, 'MO1': 20, 'MNe1': 21, 'MMg1': 22, 'MSi1': 23, 'MFe1': 24, 'envelope_binding_E': 25,
'star_binding_E': 26, 'Mrem_weakSN': 27, 'Mej_weakSN': 28, 'Mrem_SN': 29, 'Mej_SN': 30,
'Mrem_superSN': 31, 'Mej_superSN': 32, 'AM_bin': 33,
'P_bin': 34, 'log(a)': 35, 'M2': 37, 'MTOT': 38, 'DM1W': 39, 'DM2W': 40, 'DM1A': 41, 'DM2A': 42,
'DM1R': 43, 'DM2R': 44, 'DAM': 45, 'log(R2)': 46, 'log(T2)': 47, 'log(L2)': 48, '?': 49, 'modelimf': 50,
'mixedimf': 51, 'V-I': 52, 'U': 53, 'B': 54, 'V': 55, 'R': 56, 'I': 57, 'J': 58, 'H': 59, 'K': 60,
'u': 61, 'g': 62, 'r': 63, 'i': 64, 'z': 65, 'f300w': 66, 'f336w': 67, 'f435w': 68, 'f450w': 69,
'f555w': 70, 'f606w': 71, 'f814w': 72, 'U2': 73, 'B2': 74, 'V2': 75, 'R2': 76, 'I2': 77, 'J2': 78,
'H2': 79, 'K2': 80, 'u2': 81, 'g2': 82, 'r2': 83, 'i2': 84, 'z2': 85, 'f300w2': 86, 'f336w2': 87,
'f435w2': 88, 'f450w2': 89, 'f555w2': 90, 'f606w2': 91, 'f814w2': 92, 'Halpha': 93, 'FUV': 94, 'NUV': 95}

DEFAULT_BPASS_VERSION = settings['default_bpass_version']

dummy_dicts = settings['dummy_dicts']
Expand Down Expand Up @@ -85,5 +103,3 @@ def set_default_bpass_version(version):
print('Looks like everything went well! You can check the path was correctly updated by looking at this file:'
'\n'+path_to_settings)



Loading

0 comments on commit 58107d1

Please sign in to comment.