Merge pull request #40 from HeloiseS/dev1.5

The Age Wizard Release - Patch - v1.5.1
HeloiseS · Jul 10, 2020 · 58107d1 · 58107d1
2 parents 1e2e79b + 1b93272
commit 58107d1
Show file tree

Hide file tree

Showing 17 changed files with 100,387 additions and 58 deletions.
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -28,8 +28,8 @@ jobs:
         python-version: '3.x'
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
-        python -m pip install tox
+        python3 -m pip install --upgrade pip
+        python3 -m pip install tox
         
     - name: Run Tox
       run: tox

diff --git a/README.rst b/README.rst
@@ -58,26 +58,29 @@ This way it'll show you each test as they pass or FAIL. In the pip and github ve
 
 .. code-block:: none
 
-   `astropy`, `numpy`, `pandas`, `matplotlib`, `pyyaml`, `wheel`, `emcee`, `corner`
+
+   `numpy`, `pandas`, `matplotlib`, `pyyaml`, `wheel`, `emcee`, `corner`
 
 **Note:** Python 2 is not supported
 
 ----
 
 Read the docs
-^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^
+
 
 `Click Here! Click Here! Click Here! <https://heloises.github.io/hoki/intro.html>`_
 
 ----
 
 Download Tutorials
 ^^^^^^^^^^^^^^^^^^^^
+
 Check out these Jupyter notebooks I made - you can find them on `this repo! <https://github.com/HeloiseS/hoki_tutorials>`__
 
 ---- 
 
-Citations
+Paper and how to cite us!
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. image:: https://joss.theoj.org/papers/10.21105/joss.01987/status.svg
    :target: https://doi.org/10.21105/joss.01987

diff --git a/hoki/age_utils.py b/hoki/age_utils.py
@@ -62,14 +62,18 @@ def __init__(self, obs_df, model):
 
         self.obs_df = obs_df
         self.coordinates = find_coordinates(self.obs_df, self.model)
-        self._distributions = calculate_distributions(self.obs_df, self.model)
+
+        # This line is obsolete but might need revival if we ever want to add the not normalised distributions again
+        # self._distributions = calculate_distributions_normalised(self.obs_df, self.model)
+
         self.pdfs = calculate_individual_pdfs(self.obs_df, self.model).fillna(0)
         self.sources = self.pdfs.columns.to_list()
         self.sample_pdf = None
         self._most_likely_age = None
 
     def calculate_sample_pdf(self, not_you=None, return_df=False):
-        self.sample_pdf = calculate_sample_pdf(self._distributions, not_you=not_you)
+        #self.sample_pdf = calculate_sample_pdf(self._distributions, not_you=not_you)
+        self.sample_pdf = calculate_sample_pdf(self.pdfs, not_you=not_you)
         if return_df: return self.sample_pdf
 
     @property
@@ -80,7 +84,8 @@ def most_likely_age(self):
         if self._most_likely_age is not None: return self._most_likely_age
 
         if self.sample_pdf is None:
-            warnings.warn('self.multiplied_pdf is not yet defined -- running AgeWizard.combined_pdfs()', HokiUserWarning)
+            warnings.warn('self.multiplied_pdf is not yet defined -- running AgeWizard.combined_pdfs()',
+                          HokiUserWarning)
             self.calculate_sample_pdf()
 
         index = self.sample_pdf.index[self.sample_pdf.pdf == max(self.sample_pdf.pdf)].tolist()
@@ -91,7 +96,7 @@ def most_likely_ages(self):
         """
         Finds the most likely ages for all the sources given in the obs_df DataFrame.
         """
-        #index = self.pdfs.drop('time_bins', axis=1).idxmax(axis=0).tolist()
+        # index = self.pdfs.drop('time_bins', axis=1).idxmax(axis=0).tolist()
         index = self.pdfs.idxmax(axis=0).tolist()
         return self.t[index]
 
@@ -180,22 +185,22 @@ def _find_hrd_coordinates(obs_df, myhrd):
     # int( ....) is juuust to make sure we get an integer because Python is a motherfucker and adds s.f. for no reason
     # Then we append that index to our list.
 
-    for T, L in zip(logT, logL ):
+    for T, L in zip(logT, logL):
 
         try:
-            T=float(T)
+            T = float(T)
             # Finds the index that is at the minimum distance in Temperature space and adds it to the list
             T_i.append(int((np.where(abs(myhrd.T_coord - T) == abs(myhrd.T_coord - T).min()))[0]))
         except ValueError:
-            warnings.warn("T="+str(T)+" cannot be converted to a float", HokiUserWarning)
+            warnings.warn("T=" + str(T) + " cannot be converted to a float", HokiUserWarning)
             T_i.append(np.nan)
 
         try:
-            L=float(L)
+            L = float(L)
             # Finds the index that is at the minimum distance in Luminosity space and adds it to the list
             L_i.append(int((np.where(abs(myhrd.L_coord - L) == abs(myhrd.L_coord - L).min()))[0]))
         except ValueError:
-            warnings.warn("L="+str(L)+" cannot be converted to a float", HokiUserWarning)
+            warnings.warn("L=" + str(L) + " cannot be converted to a float", HokiUserWarning)
             L_i.append(np.nan)
 
     return T_i, L_i
@@ -240,37 +245,63 @@ def _find_cmd_coordinates(obs_df, mycmd):
     for col, mag in zip(colours, magnitudes):
 
         try:
-            col=float(col)
+            col = float(col)
             # Finds the index that is at the minimum distance in Colour space and adds it to the list
             col_i.append(int((np.where(abs(mycmd.col_range - col) == abs(mycmd.col_range - col).min()))[0]))
         except ValueError:
-            warnings.warn("Colour="+str(col)+" cannot be converted to a float", HokiUserWarning)
+            warnings.warn("Colour=" + str(col) + " cannot be converted to a float", HokiUserWarning)
             col_i.append(np.nan)
 
         try:
-            mag=float(mag)
+            mag = float(mag)
             # Finds the index that is at the minimum distance in Magnitude space and adds it to the list
             mag_i.append(int((np.where(abs(mycmd.mag_range - mag) == abs(mycmd.mag_range - mag).min()))[0]))
         except ValueError:
-            warnings.warn("Magnitude="+str(mag)+" cannot be converted to a float", HokiUserWarning)
+            warnings.warn("Magnitude=" + str(mag) + " cannot be converted to a float", HokiUserWarning)
             mag_i.append(np.nan)
 
     return col_i, mag_i
 
 
-def normalise_1d(distribution):
+def normalise_1d(distribution, crop_the_future=False):
     """
     Simple function that devides by the sum of the 1D array or DataFrame given.
     """
+    if crop_the_future:
+        distribution = _crop_the_future(distribution)
+
     area = np.sum([bin_t for bin_t in distribution])
-    return distribution/area
+    return distribution / area
+
+
+def _crop_the_future(distribution):
+    # Anything about 10.1 is the future -  time bin 42 and above must have proba == 0
+    array_that_erases_the_future = np.array([1]*42+[0]*9)
+    return np.array(distribution)*array_that_erases_the_future
 
 
 def calculate_individual_pdfs(obs_df, model):
-    likelihoods = calculate_distributions(obs_df, model)
+    """
+     Calculates the age pdfs of all the stars in the sample and returns them in a dataframe
+
+     Parameters
+     ----------
+     obs_df: pandas.DataFrame
+        Dataframe containing the observational data
+     model: hoki.hrdiagrams.HRDiagrams or hoki.cmd.CMD
+        BPASS HRDiagram or CMD
+
+     Returns
+     -------
+     pandas.Dataframe containing the age pdfs of each star
+
+    """
+    likelihoods = calculate_distributions_normalised(obs_df, model)
+
     pdfs = []
     for col in likelihoods.columns:
         pdfs.append(normalise_1d(likelihoods[col].values))
+
     return pd.DataFrame(np.array(pdfs).T, columns=likelihoods.columns)
 
 
@@ -294,7 +325,7 @@ def calculate_distributions(obs_df, model):
     if isinstance(model, hoki.hrdiagrams.HRDiagram):
         x_coord, y_coord = find_coordinates(obs_df, model)
     if isinstance(model, hoki.cmd.CMD):
-        y_coord, x_coord = find_coordinates(obs_df, model) # yeah it's reversed... -_-
+        y_coord, x_coord = find_coordinates(obs_df, model)  # yeah it's reversed... -_-
 
     # If source names not given we make our own
     try:
@@ -307,12 +338,12 @@ def calculate_distributions(obs_df, model):
 
     # Time to calcualte the pdfs
     for i, name in zip(range(obs_df.shape[0]), source_names):
-        xi, yi = x_coord[i], y_coord[i] # just saving space
+        xi, yi = x_coord[i], y_coord[i]  # just saving space
 
         # Here we take care of the possibility that a coordinate is a NaN
         if np.isnan(xi) or np.isnan(yi):
             warnings.warn("NaN Value encountered in coordinates for source: " + name, HokiUserWarning)
-            likelihoods.append([0] * 51) # Probability is then 0 at all times - That star doesn't exist in our models
+            likelihoods.append([0] * 51)  # Probability is then 0 at all times - That star doesn't exist in our models
             continue
 
         # Here we fill our not-yet-nromalised distribution
@@ -331,7 +362,69 @@ def calculate_distributions(obs_df, model):
     # Our list of pdfs (which is a list of lists) is turned into a PDF with the source names as column names
     likelihoods_df = pd.DataFrame((np.array(likelihoods)).T, columns=source_names)
     # We add the time bins in there because it can make plotting extra convenient.
-    #distributions_df['time_bins'] = hoki.constants.BPASS_TIME_BINS
+    # distributions_df['time_bins'] = hoki.constants.BPASS_TIME_BINS
+
+    return likelihoods_df
+
+
+def calculate_distributions_normalised(obs_df, model):
+    """
+    Given observations and an HR Diagram, calculates the distribution across ages NORMALISED
+
+    Parameters
+    ----------
+    obs_df: pandas.DataFrame
+        Observational data. MUST contain a logT and logL column.
+    model: hoki.hrdiagrams.HRDiagrams or hoki.cmd.CMD
+        BPASS HRDiagram or CMD
+
+    Returns
+    -------
+    Age Probability Distribution Functions in a pandas.DataFrame.
+
+    """
+    # Checking whether it;s HRD or CMD
+    if isinstance(model, hoki.hrdiagrams.HRDiagram):
+        x_coord, y_coord = find_coordinates(obs_df, model)
+    if isinstance(model, hoki.cmd.CMD):
+        y_coord, x_coord = find_coordinates(obs_df, model)  # yeah it's reversed... -_-
+
+    # If source names not given we make our own
+    try:
+        source_names = obs_df.name
+    except AttributeError:
+        warnings.warn("No source names given so I'll make my own", HokiUserWarning)
+        source_names = ["s" + str(i) for i in range(obs_df.shape[0])]
+
+    likelihoods = []
+
+    # Time to calcualte the pdfs
+    for i, name in zip(range(obs_df.shape[0]), source_names):
+        xi, yi = x_coord[i], y_coord[i]  # just saving space
+
+        # Here we take care of the possibility that a coordinate is a NaN
+        if np.isnan(xi) or np.isnan(yi):
+            warnings.warn("NaN Value encountered in coordinates for source: " + name, HokiUserWarning)
+            likelihoods.append([0] * 51)  # Probability is then 0 at all times - That star doesn't exist in our models
+            continue
+
+        # Here we fill our not-yet-nromalised distribution
+        distrib_i = []
+        for model_i in model:
+            # For each time step i, we retrieve the proba in CMD_i or HRD_i and fill our distribution element distrib_i
+            # with it. At the end of the for loop we have iterated over all 51 time bins
+            distrib_i.append(model_i[xi, yi])
+
+        # Then we normalise, so that we have proper probability distributions
+        # pdf_i = normalise_1d(distrib_i)
+
+        # finally our pdf is added to the list
+        likelihoods.append(normalise_1d(distrib_i, crop_the_future=True))
+
+    # Our list of pdfs (which is a list of lists) is turned into a PDF with the source names as column names
+    likelihoods_df = pd.DataFrame((np.array(likelihoods)).T, columns=source_names)
+    # We add the time bins in there because it can make plotting extra convenient.
+    # distributions_df['time_bins'] = hoki.constants.BPASS_TIME_BINS
 
     return likelihoods_df
 
@@ -362,20 +455,26 @@ def calculate_sample_pdf(distributions_df, not_you=None):
         try:
             distributions_df = distributions_df.drop(labels=not_you, axis=1)
         except KeyError as e:
-            message = 'FEATURE DISABLED'+'\nKeyError'+str(e)+'\nHOKI DIALOGUE: Your labels could not be dropped -- ' \
-                                                              'all pdfs will be combined \nDEBUGGING ASSISTANT: ' \
-                                                              'Make sure the labels your listed are spelled correctly:)'
+            message = 'FEATURE DISABLED' + '\nKeyError' + str(
+                e) + '\nHOKI DIALOGUE: Your labels could not be dropped -- ' \
+                     'all pdfs will be combined \nDEBUGGING ASSISTANT: ' \
+                     'Make sure the labels your listed are spelled correctly:)'
             warnings.warn(message, HokiUserWarning)
 
     # We also must be careful not to multiply the time bin column in there so we have a list of the column names
     # that remain after the "not_you" exclusion minus the time_bins column.
-    columns = [col for col in distributions_df.columns if "time_bins" not in col]
+    #columns = [col for col in distributions_df.columns if "time_bins" not in col]
 
+    columns = []
+    if "time_bins" not in distributions_df.columns:
+        for col in distributions_df.columns:
+            columns.append(col) 
+
     for col in columns:
-    #for col in distributions_df.columns:
+        # for col in distributions_df.columns:
         combined_pdf += distributions_df[col].values
 
-    combined_df = pd.DataFrame(normalise_1d(combined_pdf))
+    combined_df = pd.DataFrame(normalise_1d(combined_pdf) )
     combined_df.columns = ['pdf']
 
     return combined_df
@@ -400,6 +499,6 @@ def calculate_p_given_age_range(pdfs, age_range=None):
     # Selects only the rows corresponding to the range age_range[0] to age_range[1] (inclusive)
     # and then we sum the probabilities up for each column.
     probability = pdfs[(np.round(BPASS_TIME_BINS, 2) >= min(age_range))
-                            & (np.round(BPASS_TIME_BINS, 2) <= max(age_range))].sum()
+                       & (np.round(BPASS_TIME_BINS, 2) <= max(age_range))].sum()
 
-    return probability
+    return probability
diff --git a/hoki/cmd.py b/hoki/cmd.py
@@ -92,7 +92,8 @@ def __init__(self, file,
         self._log_ages = None
         self._ages = None
         self.mag_filter = None
-        self.filter2 = None
+        self.col_filter1 = None
+        self.col_filter2 = None
 
     def make(self, mag_filter, col_filters):
         """

diff --git a/hoki/constants.py b/hoki/constants.py
@@ -8,8 +8,10 @@
 import io
 
 #TODO: update documentation and add mentions of set_models path and set_default_bpass_verison in the constants
+
 # module - it will change things in the CMD jupyter notebook I think.
 
+
 data_path = pkg_resources.resource_filename('hoki', 'data')
 
 path_to_settings = pkg_resources.resource_filename('hoki', 'data/settings.yaml')
@@ -23,6 +25,22 @@
 BPASS_TIME_INTERVALS = np.array([10**(t+0.05) - 10**(t-0.05) for t in BPASS_TIME_BINS])
 BPASS_TIME_WEIGHT_GRID = np.array([np.zeros((100,100)) + dt for dt in BPASS_TIME_INTERVALS])
 
+HOKI_NOW = 13.799e9
+
+# Create a deprecation warning when using dummy_dict
+dummy_dict = {'timestep': 0, 'age': 1, 'log(R1)': 2, 'log(T1)': 3, 'log(L1)': 4, 'M1': 5, 'He_core1': 6, 'CO_core1': 7,
+              'ONe_core1': 8, 'X': 10, 'Y': 11, 'C': 12, 'N': 13, 'O': 14, 'Ne': 15, 'MH1': 16, 'MHe1': 17, 'MC1': 18,
+              'MN1': 19, 'MO1': 20, 'MNe1': 21, 'MMg1': 22, 'MSi1': 23, 'MFe1': 24, 'envelope_binding_E': 25,
+              'star_binding_E': 26, 'Mrem_weakSN': 27, 'Mej_weakSN': 28, 'Mrem_SN': 29, 'Mej_SN': 30,
+              'Mrem_superSN': 31, 'Mej_superSN': 32, 'AM_bin': 33,
+              'P_bin': 34, 'log(a)': 35, 'M2': 37, 'MTOT': 38, 'DM1W': 39, 'DM2W': 40, 'DM1A': 41, 'DM2A': 42,
+              'DM1R': 43, 'DM2R': 44, 'DAM': 45, 'log(R2)': 46, 'log(T2)': 47, 'log(L2)': 48, '?': 49, 'modelimf': 50,
+              'mixedimf': 51, 'V-I': 52, 'U': 53, 'B': 54, 'V': 55, 'R': 56, 'I': 57, 'J': 58, 'H': 59, 'K': 60,
+              'u': 61, 'g': 62, 'r': 63, 'i': 64, 'z': 65, 'f300w': 66, 'f336w': 67, 'f435w': 68, 'f450w': 69,
+              'f555w': 70, 'f606w': 71, 'f814w': 72, 'U2': 73, 'B2': 74, 'V2': 75, 'R2': 76, 'I2': 77, 'J2': 78,
+              'H2': 79, 'K2': 80, 'u2': 81, 'g2': 82, 'r2': 83, 'i2': 84, 'z2': 85, 'f300w2': 86, 'f336w2': 87,
+              'f435w2': 88, 'f450w2': 89, 'f555w2': 90, 'f606w2': 91, 'f814w2': 92, 'Halpha': 93, 'FUV': 94, 'NUV': 95}
+
 DEFAULT_BPASS_VERSION = settings['default_bpass_version']
 
 dummy_dicts = settings['dummy_dicts']
@@ -85,5 +103,3 @@ def set_default_bpass_version(version):
     print('Looks like everything went well! You can check the path was correctly updated by looking at this file:'
           '\n'+path_to_settings)
 
-
-