From 1b09bec701e752a58e1643a6a856562188e29649 Mon Sep 17 00:00:00 2001 From: dinithins Date: Wed, 17 Apr 2024 22:37:17 +0000 Subject: [PATCH] v0.2.0 update --- LICENSE | 4 +- README.md | 22 +- .../AlignmentDistMan-checkpoint.py | 239 +++ .../ClusterUtils-checkpoint.py | 521 ++++++ .../.ipynb_checkpoints/Main-checkpoint.py | 842 +++++++++ .../MyFunctions-checkpoint.py | 94 + .../.ipynb_checkpoints/OrgAlign-checkpoint.py | 764 ++++++++ .../PathwayAnalyser-checkpoint.py | 129 ++ .../TimeSeriesPreprocessor-checkpoint.py | 221 +++ .../.ipynb_checkpoints/Utils-checkpoint.py | 198 ++ .../VisualUtils-checkpoint.py | 419 +++++ .../.ipynb_checkpoints/__init__-checkpoint.py | 12 + genes2genes/AlignmentDistMan.py | 46 +- genes2genes/BatchAnalyser.py | 104 -- genes2genes/ClusterUtils.py | 191 +- genes2genes/MVG.py | 185 -- genes2genes/Main.py | 1443 +++------------ genes2genes/MyFunctions.py | 76 +- genes2genes/OrgAlign.py | 326 +--- genes2genes/PathwayAnalyser.py | 129 ++ genes2genes/PathwayAnalyserV2.py | 294 --- genes2genes/SimulationExperimentAnalyser.py | 452 ----- genes2genes/TimeSeriesPreprocessor.py | 468 ++--- genes2genes/Utils.py | 198 ++ genes2genes/VisualUtils.py | 568 +----- genes2genes/__init__.py | 14 +- .../AlignmentDistMan.cpython-38.pyc | Bin 0 -> 5917 bytes .../__pycache__/ClusterUtils.cpython-38.pyc | Bin 0 -> 14487 bytes genes2genes/__pycache__/MVG.cpython-38.pyc | Bin 0 -> 3275 bytes genes2genes/__pycache__/Main.cpython-38.pyc | Bin 0 -> 24149 bytes .../__pycache__/MyFunctions.cpython-38.pyc | Bin 0 -> 2078 bytes .../__pycache__/OrgAlign.cpython-38.pyc | Bin 0 -> 19348 bytes .../PathwayAnalyser.cpython-38.pyc | Bin 0 -> 5026 bytes .../PathwayAnalyserV2.cpython-38.pyc | Bin 0 -> 7307 bytes ...imulationExperimentAnalyser.cpython-38.pyc | Bin 0 -> 11141 bytes .../TimeSeriesPreprocessor.cpython-38.pyc | Bin 0 -> 7962 bytes genes2genes/__pycache__/Utils.cpython-38.pyc | Bin 0 -> 5171 bytes .../__pycache__/VisualUtils.cpython-38.pyc | Bin 0 -> 16251 bytes .../__pycache__/__init__.cpython-38.pyc | Bin 0 -> 598 bytes .../G2G_logo-checkpoint.png | Bin 0 -> 24635 bytes .../G2G_logo_new-checkpoint.png | Bin 0 -> 74287 bytes images/G2G_logo.png | Bin 0 -> 24635 bytes images/G2G_logo_new.png | Bin 0 -> 74287 bytes ...s_vs_approx_time_PAM_LPS_G2G_alignment.png | Bin 0 -> 48877 bytes ...n_points_vs_time_PAM_LPS_G2G_alignment.png | Bin 0 -> 26737 bytes .../Supplementary_notebook1-checkpoint.ipynb | 248 +++ .../Supplementary_notebook2-checkpoint.ipynb | 404 ++++ .../Tutorial-checkpoint.ipynb | 1443 +++++++++++++++ notebooks/Supplementary_notebook1.ipynb | 248 +++ notebooks/Supplementary_notebook2.ipynb | 467 +++++ notebooks/Tutorial.ipynb | 1619 ++++++----------- pyproject.toml | 20 +- 52 files changed, 7770 insertions(+), 4638 deletions(-) create mode 100644 genes2genes/.ipynb_checkpoints/AlignmentDistMan-checkpoint.py create mode 100644 genes2genes/.ipynb_checkpoints/ClusterUtils-checkpoint.py create mode 100644 genes2genes/.ipynb_checkpoints/Main-checkpoint.py create mode 100644 genes2genes/.ipynb_checkpoints/MyFunctions-checkpoint.py create mode 100644 genes2genes/.ipynb_checkpoints/OrgAlign-checkpoint.py create mode 100644 genes2genes/.ipynb_checkpoints/PathwayAnalyser-checkpoint.py create mode 100644 genes2genes/.ipynb_checkpoints/TimeSeriesPreprocessor-checkpoint.py create mode 100644 genes2genes/.ipynb_checkpoints/Utils-checkpoint.py create mode 100644 genes2genes/.ipynb_checkpoints/VisualUtils-checkpoint.py create mode 100644 genes2genes/.ipynb_checkpoints/__init__-checkpoint.py delete mode 100644 genes2genes/BatchAnalyser.py delete mode 100644 genes2genes/MVG.py create mode 100644 genes2genes/PathwayAnalyser.py delete mode 100644 genes2genes/PathwayAnalyserV2.py delete mode 100644 genes2genes/SimulationExperimentAnalyser.py create mode 100644 genes2genes/Utils.py create mode 100644 genes2genes/__pycache__/AlignmentDistMan.cpython-38.pyc create mode 100644 genes2genes/__pycache__/ClusterUtils.cpython-38.pyc create mode 100644 genes2genes/__pycache__/MVG.cpython-38.pyc create mode 100644 genes2genes/__pycache__/Main.cpython-38.pyc create mode 100644 genes2genes/__pycache__/MyFunctions.cpython-38.pyc create mode 100644 genes2genes/__pycache__/OrgAlign.cpython-38.pyc create mode 100644 genes2genes/__pycache__/PathwayAnalyser.cpython-38.pyc create mode 100644 genes2genes/__pycache__/PathwayAnalyserV2.cpython-38.pyc create mode 100644 genes2genes/__pycache__/SimulationExperimentAnalyser.cpython-38.pyc create mode 100644 genes2genes/__pycache__/TimeSeriesPreprocessor.cpython-38.pyc create mode 100644 genes2genes/__pycache__/Utils.cpython-38.pyc create mode 100644 genes2genes/__pycache__/VisualUtils.cpython-38.pyc create mode 100644 genes2genes/__pycache__/__init__.cpython-38.pyc create mode 100644 images/.ipynb_checkpoints/G2G_logo-checkpoint.png create mode 100644 images/.ipynb_checkpoints/G2G_logo_new-checkpoint.png create mode 100644 images/G2G_logo.png create mode 100644 images/G2G_logo_new.png create mode 100644 images/cell_numbers_vs_approx_time_PAM_LPS_G2G_alignment.png create mode 100644 images/n_interpolation_points_vs_time_PAM_LPS_G2G_alignment.png create mode 100644 notebooks/.ipynb_checkpoints/Supplementary_notebook1-checkpoint.ipynb create mode 100644 notebooks/.ipynb_checkpoints/Supplementary_notebook2-checkpoint.ipynb create mode 100644 notebooks/.ipynb_checkpoints/Tutorial-checkpoint.ipynb create mode 100644 notebooks/Supplementary_notebook1.ipynb create mode 100644 notebooks/Supplementary_notebook2.ipynb diff --git a/LICENSE b/LICENSE index a012b08..a1ba3ea 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2023 Dinithi Sumanaweera, Teichmann Lab +Copyright (c) 2024 Dinithi Sumanaweera, Teichmann Lab Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 62d1396..1640b35 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

+

# Genes2Genes Project page: https://teichlab.github.io/Genes2Genes @@ -30,7 +30,6 @@ conda create --name g2g_env python=3.8 conda activate g2g_env pip install git+https://github.com/Teichlab/Genes2Genes.git ``` - The package will be made available on PyPi soon. ### **Input to G2G** @@ -40,13 +39,26 @@ The package will be made available on PyPi soon. **Note:** Please ensure that you have reasonable pseudotime estimates that fairly represent the trajectories, as the accuracy and reliability of trajectory alignment entirely depend on the accuracy and reliability of your pseudotime estimation. We recommend users to inspect whether the cell density distribution along estimated pseudotime (in terms of the meta attributes such as the annotated cell type, sampling time points, etc. where applicable) well-represents each trajectory of focus. Users can choose the best pseudotime estimates to compare after testing several different pseudotime estimation tools on their datasets. -### Tutorial +### **Tutorial** Please refer to the notebook [`notebooks/Tutorial.ipynb`](https://github.com/Teichlab/Genes2Genes/blob/main/notebooks/Tutorial.ipynb) which gives an example analysis between a reference and query dataset from literature. Please also refer https://teichlab.github.io/Genes2Genes on how to read a trajectory alignment output generated by G2G.
-**Note**: The runtime of the G2G algorithm depends on the number of cells in the reference and query datasets, the number of interpolation time points, and the number of genes to align. -G2G currently utilizes concurrency through Python multiprocessing to speed up the gene-level alignment process. It creates a number of processes equal to the number of cores in the system, and each process performs a single gene-level alignment at one time. +### **Runtime** + +The runtime of the G2G algorithm depends on the number of cells in the reference and query datasets, the number of interpolation time points, and the number of genes to align. +For an idea, please see below a simple run-time analysis of G2G for 89 genes of the reference (NR = 179 cells) and query (NQ = 290 cells) from literature used in our tutorial. (Note: the number of interpolation points = 14 for the middle plot). Reference: [`notebooks/Supplementary_notebook1.ipynb`]. + +
+ + +

+ +**Further examples from the case studies of our manuscript:** Reference: [`notebooks/Supplementary_notebook2.ipynb`]. + +It took approximately 12min to align 1371 gene trajectories of 20,327 reference cells & 17,176 query cells under 14 interpolation time points; and approximately 4.5min to align 994 gene trajectories of 3157 reference cells & 890 query cells under 13 interpolation time points. + +G2G can also utilize concurrency through Python multiprocessing by creating a number of processes equal to the number of cores in the system where each process performs a single gene-level alignment at one time. However we note that sequential processing (the default setting of G2G) seems to be more efficient than parallel processing, as multiprocessing seems to add an overhead when allocating and sharing resources amongst processes, thus doubling up the runtime. ### Funding Acknowledgement diff --git a/genes2genes/.ipynb_checkpoints/AlignmentDistMan-checkpoint.py b/genes2genes/.ipynb_checkpoints/AlignmentDistMan-checkpoint.py new file mode 100644 index 0000000..74144f7 --- /dev/null +++ b/genes2genes/.ipynb_checkpoints/AlignmentDistMan-checkpoint.py @@ -0,0 +1,239 @@ +import regex +import numpy as np +from tqdm import tqdm +import pandas as pd +from tabulate import tabulate +from scipy.spatial import distance + +""" +This script defines complementary classes and functions for alignment result analysis +""" + +class _TempFSMObj: + + def __init__(self, al_str, gene): + self.al_str = al_str + self.fsm, self.counts = self._get_transition_probs(al_str) + self.al_length = len(al_str) + self.gene = gene + + def _get_transition_probs(self, al_str): + transition_counts = {'MM':0,'MI':0, 'MD':0,'MW':0,'MV':0, 'II':0, 'IM':0,'ID':0, 'IW':0, 'IV':0,'DD':0, 'DM':0,'DI':0, 'DW':0, 'DV':0, + 'WM':0,'WW':0,'WV':0,'WI':0,'WV':0, 'VM':0,'VW':0,'VV':0,'VI':0,'VV':0} + sum_transitions = 0 + for key in transition_counts.keys(): + transition_counts[key] = len(regex.findall(key,al_str, overlapped=True)) + 1 # Adding pseudocount to avoid log(0) + sum_transitions += transition_counts[key] + + transition_probs = transition_counts.copy() + + for key in transition_counts.keys(): + transition_probs[key] = transition_counts[key]/sum_transitions + return transition_probs, transition_counts + +def _compute_msg_len(transition_counts, fsm, al_len): + + msg_len = 0.0 + for key in transition_counts.keys(): + msg_len = -np.log(fsm[key])*transition_counts[key] + msg_len = msg_len/al_len + return msg_len + +def _pairwise_alignment_dist_v2(a1,a2): + + x1 = _compute_msg_len(a1.counts, a1.fsm, a1.al_length) + y1 = _compute_msg_len(a2.counts, a1.fsm, a2.al_length) + x2 = _compute_msg_len(a1.counts, a2.fsm, a1.al_length) + y2 = _compute_msg_len(a2.counts, a2.fsm, a2.al_length) + + return (np.abs(x1-y1) + np.abs(x2-y2))/2 + +def _get_region_str(al_str): + prev = '' + i=0 + regions = '' + for i in range(len(al_str)): + if(i==0): + regions += al_str[i] + continue + if(al_str[i-1]==al_str[i]): + continue + else: + regions += al_str[i] + continue + return regions + +def _test_unique_index_sums(a): + index_sum = 0 + m = {'M':0,'I':0,'D':0,'W':0,'V':0} + + l = 0 + for i in range(len(a)): + if(i==len(a)-1): + if(a[i-1]==a[i]): + m[a[i]] += (index_sum + i)/(l+1) + else: + m[a[i-1]] += index_sum/l + index_sum = 0 + m[a[i]] += (index_sum + i) + break + + if(i==0 or a[i-1]==a[i]): + index_sum += i + l+=1 + else: + m[a[i-1]] = m[a[i-1]] + (index_sum/l) + index_sum = 0 + index_sum += i + l=1 + return m + +class AlignmentDist: + + def __init__(self, aligner_obj): + self.alignments = aligner_obj.results + self.gene_list = aligner_obj.gene_list + self.results_map = aligner_obj.results_map + self.results = aligner_obj.results + + # computing pairwise polygon based distance between each pair of alignments in the set of all gene ref-query alignments + def compute_polygon_area_alignment_dist(self): + + DistMat = [] + for i in range(len(self.alignments)): + DistMat.append(np.repeat(-1,len(self.alignments))) + for i in tqdm(range(len(self.alignments))): + for j in range(len(self.alignments)): + if(DistMat[i][j] < 0): + DistMat[i][j] = Utils.compute_alignment_area_diff_distance(self.alignments[i].alignment_str, self.alignments[j].alignment_str + ,self.alignments[i].fwd_DP.S_len, self.alignments[i].fwd_DP.T_len ) + else: + DistMat[i][j] = DistMat[j][i] + DistMat = pd.DataFrame(DistMat) + DistMat.index = self.gene_list + DistMat.columns = self.gene_list + + DistMat/np.max(np.asarray(DistMat).flatten()) + + return DistMat + + def compute_alignment_ensemble_distance_matrix(self, scheme): + + PolygonDistMat = self.compute_polygon_area_alignment_dist() + if(scheme==1): + return PolygonDistMat + + FSA_objects = [] + FSA_objects_regionwise = [] + + for i in range(len(self.alignments)): + FSA_objects.append(_TempFSMObj(self.alignments[i].alignment_str,self.alignments[i].gene ) ) + region_str = _get_region_str(self.alignments[i].alignment_str) + FSA_objects_regionwise.append(_TempFSMObj(region_str,self.alignments[i].gene )) + self.alignments[i].unique_index_sums = list(_test_unique_index_sums(self.alignments[i].alignment_str).values()) + self.alignments[i].region_str = region_str + + Mat = []; Mat_ui = [] + for i in range(len(self.alignments)): + Mat.append(np.repeat(-1.0,len(self.alignments))) + Mat_ui.append(np.repeat(-1.0,len(self.alignments))) + + for i in range(len(self.alignments)): + for j in range(len(self.alignments)): + if(i==j): + Mat[i][j] = 0.0; Mat_ui[i][j] = 0.0 + if(Mat[i][j]<0): + Mat[i][j] = _pairwise_alignment_dist_v2(FSA_objects[i],FSA_objects[j]) + Mat_ui[i][j] = distance.euclidean(self.alignments[i].unique_index_sums,self.alignments[j].unique_index_sums) + + LikelihoodDistMat = pd.DataFrame(Mat) + LikelihoodDistMat.columns = self.gene_list + LikelihoodDistMat.index = self.gene_list + LikelihoodDistMat = (LikelihoodDistMat/np.max(np.max(LikelihoodDistMat ))) + IndexSumDistMat = pd.DataFrame(Mat_ui) + IndexSumDistMat.columns = self.gene_list + IndexSumDistMat.index = self.gene_list + IndexSumDistMat = IndexSumDistMat /np.max(np.max(IndexSumDistMat)) + + if(scheme==2): + return LikelihoodDistMat + elif(scheme==3): + return IndexSumDistMat + elif(scheme==0): + joint_mat = PolygonDistMat + LikelihoodDistMat + IndexSumDistMat + return joint_mat/3 + elif(scheme==4): + joint_mat = PolygonDistMat + LikelihoodDistMat + return joint_mat/2 + elif(scheme==5): + joint_mat = LikelihoodDistMat + IndexSumDistMat + return joint_mat/2 + elif(scheme==6): + joint_mat = PolygonDistMat + IndexSumDistMat + return joint_mat/2 + + def order_genes_by_alignments(self): + + indices = [] + genes = [] + gene_strs = [] + first_lengths= [] + + for a in self.results: + gene_strs.append(a.alignment_str) + genes.append(a.gene_pair) + w_index = a.alignment_str.find('W') + m_index = a.alignment_str.find('M') + v_index = a.alignment_str.find('V') + if(w_index<0): + w_index = np.inf + if(m_index<0): + m_index = np.inf + if(v_index<0): + v_index = np.inf + + if(m_index<0): + if(w_index >=0 and (w_index=0 and (v_index0.2): + adjacent_region_indices = np.append(adjacent_region_indices,regions[k][0]) + adjacent_region_indices = np.append(adjacent_region_indices, regions[k][1]) + filtered_regions=np.append(filtered_regions,[adjacent_region_start,regions[k][1] ]) + filtered_region_indices = np.append(filtered_region_indices,adjacent_region_indices) + adjacent_region_start = regions[k+1][0] + adjacent_region_indices = [] + else: + adjacent_region_indices=np.append(adjacent_region_indices,regions[k][0]) + continue + else: + if(len(adjacent_region_indices)>0): # check if there is a continuing adjacent region + ended_adjacent_region_len = regions[k][1]- adjacent_region_start + if(ended_adjacent_region_len>0.2): + adjacent_region_indices = np.append(adjacent_region_indices,regions[k][0]) + adjacent_region_indices=np.append(adjacent_region_indices,regions[k][1]) + filtered_regions=np.append(filtered_regions,[adjacent_region_start,regions[k][1] ]) + filtered_region_indices = np.append(filtered_region_indices, adjacent_region_indices) + + return list(filtered_region_indices) + + def check_inconsistent_zero_region(self, gex_arr, pseudotime_arr, trajInterpolator): + + regions = [] + window_range = trajInterpolator.interpolation_points + + for i in range(1,len(window_range)): + sliding_region = np.logical_and(pseudotime_arr>=window_range[i-1], pseudotime_arr ', prev_i, prev_j, prev_state ) + i = prev_i + j = prev_j + state = prev_state + #self.alignment_str = state + self.alignment_str + + return tracked_path + + + + def get_matched_regions(self): + D_regions = [(m.start(0), m.end(0)) for m in regex.finditer("D+", self.alignment_str)] + I_regions = [(m.start(0), m.end(0)) for m in regex.finditer("I+", self.alignment_str)] + M_regions = [(m.start(0), m.end(0)) for m in regex.finditer("M+", self.alignment_str)] + W_regions = [(m.start(0), m.end(0)) for m in regex.finditer("W+", self.alignment_str)] + V_regions = [(m.start(0), m.end(0)) for m in regex.finditer("V+", self.alignment_str)] + def resolve(regions): + for i in range(len(regions)): + x = list(regions[i]); x[1] = x[1]-1; regions[i] = x + return regions + M_regions = resolve(M_regions); D_regions = resolve(D_regions); I_regions = resolve(I_regions) + i = 0; j = 0; m_id = 0; i_id = 0; d_id = 0; c = 0 + S_match_regions = []; T_match_regions = [] + S_non_match_regions = []; T_non_match_regions = [] + a1 = ""; a2 = "" + + while(c= interpolation_points[i-1]; range_length = range_length_corner + else: + logic = np.logical_and(cell_pseudotimes <= interpolation_points[i+1], cell_pseudotimes >= interpolation_points[i-1]) + range_length = range_length_mid + + density_stat = np.count_nonzero(logic) + density_stat = density_stat/range_length + cell_density_estimates.append(density_stat) + #print('** per unit cell density: ', cell_density_estimates) + self.cell_density_estimates = cell_density_estimates + cell_density_estimates = [1/x for x in cell_density_estimates] # taking reciprocal for weighing + + #print('reciprocals: ', cell_density_estimates) + # if this has inf values, use the max weight for them (otherwise it becomes inf resulting same weights 1.0 for all cells) + arr = cell_density_estimates + if(np.any(np.isinf(arr))): + max_w = max(np.asarray(arr)[np.isfinite(arr)] ) + cell_density_estimates = np.where(np.isinf(arr), max_w, arr) + #print('** adaptive weights -- ', cell_density_estimates) + + return cell_density_estimates + + def compute_adaptive_window_denominator(self): # for each interpolation time point + + cell_density_adaptive_weights = self.reciprocal_cell_density_estimates + + # using min-max to stretch the range (for highly adapted window sizes having high window sizes) + cell_density_adaptive_weights =np.asarray(cell_density_adaptive_weights) + scaler = MinMaxScaler() + cell_density_adaptive_weights = scaler.fit_transform(cell_density_adaptive_weights.reshape(-1, 1)).flatten() + cell_density_adaptive_weights = cell_density_adaptive_weights * self.k + + # ======= enforcing the same window_size = kernel_WINDOW_SIZE for the interpolation with the least weighted kernel window size + adaptive_window_sizes = [] + for cd in cell_density_adaptive_weights: + adaptive_window_sizes.append(cd*self.kernel_WINDOW_SIZE) #weighing stadard window size + + # find the interpolation point for which the window_size weighted to be lowest -- furthest to kernel_WINDOW_SIZE + temp = list(np.abs(adaptive_window_sizes - np.repeat(self.kernel_WINDOW_SIZE,self.n_bins))) + least_affected_interpolation_point = temp.index(max(temp)) + residue = np.abs(self.kernel_WINDOW_SIZE - adaptive_window_sizes[least_affected_interpolation_point]) + if(self.k>1): # linear scaling to stretch the range of window size from 0.1 base line. + adaptive_window_sizes = adaptive_window_sizes + (residue/(self.k-1)) + else: + adaptive_window_sizes = adaptive_window_sizes + residue + + # compute adaptive window size based denominator of Gaussian kernel for each cell for each interpolation time point + W = [] + for adw in adaptive_window_sizes: + adaptive_W_size = adw**2 + W.append(adaptive_W_size) + self.adaptive_window_sizes = adaptive_window_sizes + + return W + + # compute Gaussian weights for each interpolation time point and cell + def compute_Weight_matrix(self): + if(self.adaptive_kernel): + adaptive_win_denoms_mat = np.asarray([np.repeat(a, len(self.cell_pseudotimes)) for a in self.adaptive_win_denoms]) + W_matrix = pd.DataFrame(np.exp(-np.divide(np.array(self.abs_timediff_mat**2), adaptive_win_denoms_mat))) + else: + W_matrix = pd.DataFrame(np.exp(-np.array(self.abs_timediff_mat**2)/self.kernel_WINDOW_SIZE**2)) + W_matrix.columns = self.adata.obs_names + self._real_intpl = self.interpolation_points + #self.interpolation_points = [np.round(i,2) for i in self.interpolation_points] + W_matrix.index = self.interpolation_points + #sb.heatmap(W_matrix) + return W_matrix + + def get_effective_cell_pseudotime_range(self, i, effective_weight_threshold): + effective_weights = self.cell_weight_mat.loc[self.interpolation_points[i]] + cell_names = np.asarray(effective_weights.index) + effective_weights = np.asarray(effective_weights) + cell_ids = np.where(effective_weights>effective_weight_threshold)[0] + effective_cell_names = cell_names[cell_ids] + effective_cell_pseudotimes = self.cell_pseudotimes[cell_ids] + return effective_cell_pseudotimes + + # plotting highly effective cell_contribution regions for given interpolation points based on adaptive weighted gaussian kernel + def plot_effective_regions_for_interpolation_points(self, intpointsIdx2plots, effective_weight_threshold=0.5, plot=True): + + cmap = sb.color_palette("viridis", as_cmap=True) + self.n_effective_cells = [] + for i in intpointsIdx2plots: + x = self.get_effective_cell_pseudotime_range(i, effective_weight_threshold= effective_weight_threshold) + self.n_effective_cells.append(len(x)) + if(plot): + sb.kdeplot(x, fill=True, color=cmap(i/self.n_bins), clip=(0.0,1.0)) + + +""" +The below functions define interpolation functions used by the above Interpolator object +(defined outside class for time efficiency) +""" +# ====================== interpolation process of genes +def compute_stat(row, x, cell_densities, user_given_std): + idx = row.name + if(user_given_std[idx] < 0): + cell_weights_sum = np.sum(row) + + # estimate weighted mean + weighted_mean = np.dot(row, x)/cell_weights_sum + #print(weighted_mean) + + # estimate weighted variance + real_mean = np.mean(x); n = len(row) + weighted_sum_std = np.dot(row, (x - real_mean) ** 2 ) + weighted_std = np.sqrt(weighted_sum_std/(cell_weights_sum * (n-1)/n)) + weighted_std = weighted_std * cell_densities[idx] # weighting according to cell density + else: + weighted_mean = 0.0 + weighted_std = user_given_std[idx] # + + D,_,_ = MyFunctions.generate_random_dataset(50, weighted_mean, weighted_std) + return np.asarray([weighted_mean, weighted_std, D] ) + +#row = list(trajInterpolator.cell_weight_mat.loc[intpl_i]) +def interpolate_gene_v2(i, trajInterpolator, user_given_std): + torch.manual_seed(1) + GENE = trajInterpolator.gene_list[i] + #print(GENE) + x = Utils.csr_mat_col_densify(trajInterpolator.mat ,i) + N_cells= len(trajInterpolator.cell_pseudotimes) + + trajInterpolator.cell_weight_mat.index = range(0,len(trajInterpolator.cell_weight_mat)) + cell_densities = list(trajInterpolator.cell_weight_mat.apply(np.sum, axis=1)/N_cells) + + results = trajInterpolator.cell_weight_mat.apply(compute_stat, axis=1, args = ([x,cell_densities, user_given_std]), result_type='expand') + results = pd.DataFrame(results) + + return SummaryTimeSeries(GENE, results[0], results[1], results[2], trajInterpolator.interpolation_points) + +class SummaryTimeSeries: + """ + This class defines an interpolated time series object that carries the interpolated result of a gene expression time series + """ + + def __init__(self, gene_name, mean_trend, std_trend, intpl_gex, time_points): + self.gene_name = gene_name + self.mean_trend = np.asarray([np.mean(data_bin) for data_bin in intpl_gex]) # interpolated dist mean + self.std_trend = np.asarray([np.std(data_bin) for data_bin in intpl_gex]) # interpolated dist std + self.data_bins = list(intpl_gex) + self.intpl_means = list(mean_trend) # actual weighted means + self.intpl_stds = list(std_trend) # actual weighted stds + self.time_points = np.asarray(time_points) + + self.Y = np.asarray([np.asarray(x) for x in self.data_bins]).flatten() + self.X = np.asarray([np.repeat(t,50) for t in self.time_points]).flatten() + + def plot_mean_trend(self, color='midnightblue'): + sb.lineplot(x= self.time_points, y=self.mean_trend, color=color, linewidth=4) + + def plot_std_trend(self, color='midnightblue'): + sb.lineplot(x= self.time_points, y=self.std_trend, color=color, linewidth=4) + + \ No newline at end of file diff --git a/genes2genes/.ipynb_checkpoints/Utils-checkpoint.py b/genes2genes/.ipynb_checkpoints/Utils-checkpoint.py new file mode 100644 index 0000000..7959f86 --- /dev/null +++ b/genes2genes/.ipynb_checkpoints/Utils-checkpoint.py @@ -0,0 +1,198 @@ +import numpy as np +from scipy.sparse import csr_matrix +from . import MyFunctions + +# UTIL FUNCTIONS +def csr_mat_col_densify(csr_matrix, j): + start_ptr = csr_matrix.indptr[j] + end_ptr = csr_matrix.indptr[j + 1] + data = csr_matrix.data[start_ptr:end_ptr] + dense_column = np.zeros(csr_matrix.shape[1]) + dense_column[csr_matrix.indices[start_ptr:end_ptr]] = data + return dense_column + + +def minmax_normalise(arr): + + norm_arr = [] + arr = np.asarray(arr) + arr_max = np.max(arr) + arr_min = np.min(arr) + for i in range(len(arr)): + norm_arr.append((arr[i] - arr_min )/(arr_max - arr_min )) + return norm_arr + + +# computes distributional distance under the MML framework +def compute_mml_dist(ref_adata_subset,query_adata_subset, gene): + + ref_data = np.asarray(ref_adata_subset[:,gene].X.todense()).flatten() + query_data = np.asarray(query_adata_subset[:,gene].X.todense()).flatten() + μ_S = np.mean(ref_data) + μ_T = np.mean(query_data) + σ_S =np.std(ref_data) + σ_T =np.std(query_data) + #print(μ_S,μ_T) + if(not np.any(ref_data)): + σ_S = 0.1 + if(not np.any(query_data)): + σ_T = 0.1 + + I_ref_model, I_refdata_g_ref_model = MyFunctions.run_dist_compute_v3(ref_data, μ_S, σ_S) + I_query_model, I_querydata_g_query_model = MyFunctions.run_dist_compute_v3(query_data, μ_T, σ_T) + I_ref_model, I_querydata_g_ref_model = MyFunctions.run_dist_compute_v3(query_data, μ_S, σ_S) + I_query_model, I_refdata_g_query_model = MyFunctions.run_dist_compute_v3(ref_data, μ_T, σ_T) + + match_encoding_len1 = I_ref_model + I_querydata_g_ref_model + I_refdata_g_ref_model + match_encoding_len1 = match_encoding_len1/(len(query_data)+len(ref_data)) + match_encoding_len2 = I_query_model + I_refdata_g_query_model + I_querydata_g_query_model + match_encoding_len2 = match_encoding_len2/(len(query_data)+len(ref_data)) + match_encoding_len = (match_encoding_len1 + match_encoding_len2 )/2.0 + + null = (I_ref_model + I_refdata_g_ref_model + I_query_model + I_querydata_g_query_model)/(len(query_data)+len(ref_data)) + match_compression = match_encoding_len - null + + return match_compression + + +def sample_state(x): + x = np.cumsum(x) + rand_num = np.random.rand(1) + # print(rand_num) + if(rand_num<=x[0]): + return 0#'M' + elif(rand_num>x[0] and rand_num<=x[1]): + return 1#'W' + elif(rand_num>x[1] and rand_num<=x[2]): + return 2#'V' + elif(rand_num>x[2] and rand_num<=x[3]): + return 3#'D' + elif(rand_num>x[3] and rand_num<=x[4]): + return 4#'I' + + +def compute_alignment_area_diff_distance(A1, A2, S_len, T_len): + + pi = np.arange(1, S_len+T_len+1) # skew diagonal indices + A1_ = "" + for c in A1: + A1_ = A1_ + c + if(c=='M'): + A1_ = A1_ + 'X' + A2_ = "" + for c in A2: + A2_ = A2_ + c + if(c=='M'): + A2_ = A2_ + 'X' + + pi_1_k = 0 + pi_2_k = 0 + #print(0, pi_1_k , pi_2_k ) + A1_al_index = 0 + A2_al_index = 0 + absolute_dist_sum = 0.0 + for k in pi: + #print('k=',k, A1_al_index, A2_al_index) + A1_state = A1_[A1_al_index] + A2_state = A2_[A2_al_index] + if(A1_state=='I' or A1_state=='V'): + pi_1_k = pi_1_k - 1 + elif(A1_state=='D' or A1_state=='W'): + pi_1_k = pi_1_k + 1 + if(A2_state=='I' or A2_state=='V'): + pi_2_k = pi_2_k - 1 + elif(A2_state=='D' or A2_state=='W'): + pi_2_k = pi_2_k + 1 + + absolute_dist_sum = absolute_dist_sum + np.abs(pi_1_k - pi_2_k) + #print('-----') + A1_al_index = A1_al_index + 1 + A2_al_index = A2_al_index + 1 + + return absolute_dist_sum + +def compute_chattergi_coefficient(y1,y2): + df = pd.DataFrame({'S':y1, 'T':y2}) + df['rankS'] = df['S'].rank() + df['rankT'] = df['T'].rank() + # sort df by the S variable first + df = df.sort_values(by='rankS') + return 1 - ((3.0 * df['rankT'].diff().abs().sum())/((len(df)**2)-1)) + + +def plot_different_alignments(paths, S_len, T_len, ax, mat=[]): # pass alignment path coordinates + mat=[] + # if(len(mat)==0): + for i in range(T_len+1): + mat.append(np.repeat(0,S_len+1)) + sb.heatmap(mat, square=True, cmap='viridis', ax=ax, vmin=0, vmax=0, cbar=False,xticklabels=False,yticklabels=False) + path_color = "orange" + + for path in paths: + path_x = [p[0]+0.5 for p in path] + path_y = [p[1]+0.5 for p in path] + ax.plot(path_y, path_x, color=path_color, linewidth=3, alpha=0.5, linestyle='dashed') # path plot + plt.xlabel("S",fontweight='bold') + plt.ylabel("T",fontweight='bold') + + +def check_alignment_clusters(n_clusters , cluster_ids, alignments, n_cols = 5, figsize= (10,6)): + + clusters = [] + S_len = alignments[0].fwd_DP.S_len + T_len = alignments[0].fwd_DP.T_len + + unique_cluster_ids = np.unique(cluster_ids) + n_rows = int(np.ceil(n_clusters/n_cols)) + + + fig, axs = plt.subplots(n_rows,n_cols, figsize = (20,n_rows*3)) # custom -- only for 20 clusters -- TODO change later + axs = axs.flatten() + i = 0 + k=1 + for cluster_id in range(n_clusters): + paths = [] + cluster_genes = [] + cluster_alignments = np.asarray(alignments)[cluster_ids == unique_cluster_ids[cluster_id]] + for a in cluster_alignments: + paths.append(a.fwd_DP.alignment_path) + #print(a.gene) + cluster_genes.append(a.gene);# cluster_genes.append(a.gene) + clusters.append(list(np.unique(cluster_genes)) ) + + plot_different_alignments(paths, S_len, T_len, axs[cluster_id]) + axs[cluster_id].set_title('Cluster-'+str(i) + ' | '+str(len(cluster_alignments))) + + i=i+1 + k=k+1 + + fig.tight_layout() + n = n_cols * n_rows + i = 1 + while(k<=n): + axs.flat[-1*i].set_visible(False) + k = k+1 + i=i+1 + + return clusters + + +# input: log1p gene expression vectors +def compute_KLDivBasedDist(x,y): + + # convert to probabilities + x = x.numpy() + y = y.numpy() + # convering backto counts+1 + x = np.exp(x) + y = np.exp(y) + x = x/np.sum(x) + y = y/np.sum(y) + + sum_term = 0.0 + for i in range(len(x)): + sum_term += x[i]*(np.log(x[i]) - np.log(y[i])) + + return sum_term + + \ No newline at end of file diff --git a/genes2genes/.ipynb_checkpoints/VisualUtils-checkpoint.py b/genes2genes/.ipynb_checkpoints/VisualUtils-checkpoint.py new file mode 100644 index 0000000..7e55a0e --- /dev/null +++ b/genes2genes/.ipynb_checkpoints/VisualUtils-checkpoint.py @@ -0,0 +1,419 @@ +import pandas as pd +import seaborn as sb +import matplotlib.pyplot as plt +import numpy as np +from scipy.stats import zscore +import matplotlib.colors as mcolors +import matplotlib +import matplotlib.patches as mpatches +import regex + +from . import Main + + +vega_20 = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', + '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', + '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5',] + +def plot_celltype_barplot(adata, n_bins, annotation_colname, joint_cmap, plot_cell_counts = False, legend=False): + + if(plot_cell_counts): + normalize = False + else: + normalize = 'columns' + + vec = adata.obs.time + bin_edges = np.linspace(0, 1, num=n_bins) + bin_ids = np.digitize(vec, bin_edges, right=False) # use right=True if we don't need 1.0 cell to always be a single last bin + adata.obs['bin_ids'] = bin_ids + tmp = pd.crosstab(adata.obs[annotation_colname],adata.obs['bin_ids'], normalize=normalize).T.plot(kind='bar', stacked=True, + color=joint_cmap,grid = False, legend=False, width=0.7,align='edge',figsize=(9,1)) + if(legend): + tmp.legend(title='Cell-type annotations', bbox_to_anchor=(1.5, 1.02),loc='upper right') + plt.axis('off') + +def visualize_gene_alignment(alignment, adata_ref, adata_query, annotation_colname, cmap=None): + + if(isinstance(alignment,Main.AligmentObj )): + alignment = alignment.alignment_str + + matched_points_S, matched_points_T = get_matched_time_points(alignment) + + fig = plt.figure(figsize=(4,2)) + heights = [1, 1, 1] + gs = plt.GridSpec(3, 1, height_ratios=heights) + ax1 = fig.add_subplot(gs[0, 0]) + ax2 = fig.add_subplot(gs[1, 0],sharex=ax1) + ax3 = fig.add_subplot(gs[2, 0],sharex=ax1) + + if(cmap is None): + cmap = vega_20 + + plt.subplot(3,1,1) + + metaS = pd.crosstab(adata_ref.obs.bin_ids, adata_ref.obs[annotation_colname]) + metaS.apply(lambda x: x*100/sum(x), axis=1).plot(kind='bar',stacked=True,color=cmap, grid = False, legend=False, width=0.7, ax=ax1) + + metaT = pd.crosstab(adata_query.obs.bin_ids, adata_query.obs[annotation_colname]) + metaT.apply(lambda x: x*100/sum(x), axis=1).plot(kind='bar',stacked=True,color=cmap, grid = False, legend=False, width=0.7,ax=ax3) + + plt.subplot(3,1,2) + for i in range(len(matched_points_S)): + S_timebin = matched_points_S[i] + T_timebin = matched_points_T[i] + x_vals = [T_timebin+1, S_timebin+1] + y_vals = [0,1] + plt.plot(x_vals, y_vals, marker='.', color='black', linewidth=0.5) + + def set_grid_off(ax): + ax.spines['top'].set_visible(False) + ax.spines['bottom'].set_visible(False) + ax.spines['left'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.set_xticks([]) + ax.xaxis.set_ticks_position('none') + ax.set_yticks([]) + ax.figure.tight_layout() + ax.grid(False) + + set_grid_off(ax1); set_grid_off(ax2); set_grid_off(ax3); + ax1.set_ylabel('Ref', rotation=90) + ax3.set_ylabel('Query',rotation=90) + fig.text(0.5, -0.05, 'Pseudotime bins with cell type composition', ha='center') + ax1.set_title('Alignment w.r.t cell type compositions') + + +def get_matched_time_points(alignment_str): + j = 0; i = 0 + FLAG = False + matched_points_S = [] + matched_points_T = [] + prev_c = '' + for c in alignment_str: + if(c=='M'): + if(prev_c=='W'): + i=i+1 + if(prev_c=='V'): + j=j+1 + matched_points_T.append(i) + matched_points_S.append(j) + i=i+1 + j=j+1 + elif(c=='W'): + if(prev_c not in ['W','V']): + i=i-1 + if(prev_c=='V'): + i=i-1 + j=j+1 + if(prev_c=='D' and not FLAG): + FLAG = True + matched_points_T.append(i) + matched_points_S.append(j) + j=j+1 + elif(c=='V'): + if(prev_c not in ['W','V']): + j=j-1 + if(prev_c=='W'): + j=j-1 + i=i+1 + if(prev_c=='I' and not FLAG): + FLAG = True + matched_points_T.append(i) + matched_points_S.append(j) + i=i+1 + elif(c=='I'): + if(prev_c=='W'): + i=i+1 + if(prev_c=='V'): + j=j+1 + i=i+1 + elif(c=='D'): + if(prev_c=='W'): + i=i+1 + if(prev_c=='V'): + j=j+1 + j=j+1 + prev_c = c + assert(len(matched_points_S) == len(matched_points_T)) + return matched_points_S, matched_points_T + + +def plotTimeSeries(gene, aligner, plot_cells = False, plot_mean_trend= False): + + al_obj = aligner.results_map[gene] + plt.subplots(1,3,figsize=(15,3)) + plt.subplot(1,3,1) + plotTimeSeriesAlignment(gene, aligner) + plt.subplot(1,3,2) + max_val = np.max([np.max(np.asarray(aligner.ref_mat[al_obj.gene])), np.max(np.asarray(aligner.query_mat[al_obj.gene]))]) + min_val = np.min([np.min(np.asarray(aligner.ref_mat[al_obj.gene])), np.min(np.asarray(aligner.query_mat[al_obj.gene]))]) + g = sb.scatterplot(x=aligner.query_time, y=np.asarray(aligner.query_mat[al_obj.gene]), alpha=0.7, color = 'midnightblue', legend=False,linewidth=0.3, s=20) + plt.title('Query') + plt.ylim([min_val-0.5,max_val+0.5]) + plt.xlabel('Pseudotime') + plt.ylabel('Gene expression') + plt.subplot(1,3,3) + g = sb.scatterplot(x=aligner.ref_time, y=np.asarray(aligner.ref_mat[al_obj.gene]), color = 'forestgreen', alpha=0.7, legend=False,linewidth=0.3,s=20 ) + plt.title('Reference') + plt.ylim([min_val-0.5,max_val+0.5]) + plt.xlabel('Pseudotime') + plt.ylabel('Gene expression') + +def plotTimeSeriesAlignment(gene, aligner): + + al_obj = aligner.results_map[gene] + sb.scatterplot(x=al_obj.S.X, y=al_obj.S.Y, color = 'forestgreen' ,alpha=0.05, legend=False)#, label='Ref') + sb.scatterplot(x=al_obj.T.X, y=al_obj.T.Y, color = 'midnightblue' ,alpha=0.05, legend=False)#, label ='Query') + al_obj.plot_mean_trends() + plt.title(al_obj.gene) + plt.xlabel('Pseudotime') + plt.ylabel('Gene expression') + plt.axis('off') + + for i in range(al_obj.matched_region_DE_info.shape[0]): + S_timebin = int(al_obj.matched_region_DE_info.iloc[i]['ref_bin']) + T_timebin = int(al_obj.matched_region_DE_info.iloc[i]['query_bin']) + x_vals = [al_obj.matched_region_DE_info.iloc[i]['ref_pseudotime'],al_obj.matched_region_DE_info.iloc[i]['query_pseudotime']] + y_vals = [al_obj.S.mean_trend[S_timebin ], al_obj.T.mean_trend[T_timebin]] + plt.plot(x_vals, y_vals, color='black', linestyle='dashed', linewidth=1.5) + + +def plot_alignmentSim_vs_l2fc(x): + ax=sb.scatterplot(x=x['l2fc'],y=x['alignment_similarity_percentage']*100,s=120, legend=False, hue =x['alignment_similarity_percentage'] , + palette=sb.diverging_palette(0, 255, s=150, as_cmap=True),edgecolor='k',linewidth=0.3) + plt.yticks(fontsize=15) + plt.xticks(fontsize=15) + plt.ylabel('Alignment Similarity %', fontsize=15, fontweight='bold') + plt.xlabel('Log2 fold change of mean expression', fontsize = 15, fontweight='bold') + plt.grid(False) + plt.axhline(50, color='black') + plt.axvline(0, color='black', linestyle='dashed') + + +def plot_alignmentSim_vs_optCost(x, opt_cost_cut=0): + sb.scatterplot(x=x['opt_alignment_cost'],y=x['alignment_similarity_percentage']*100,s=120, legend=False, hue =x['alignment_similarity_percentage'] , + palette=sb.diverging_palette(0, 255, s=150, as_cmap=True),edgecolor='k',linewidth=0.3) + plt.yticks(fontsize=15) + plt.xticks(fontsize=15) + plt.ylabel('Alignment Similarity %', fontsize=15, fontweight='bold') + plt.xlabel('Optimal alignment cost (nits)', fontsize = 15, fontweight='bold') + plt.grid(False) + plt.axhline(50, color='black') + plt.axvline(opt_cost_cut, color='black', linestyle='dashed') + plt.tight_layout() + + +def plot_alignment_path_on_given_matrix(mat, paths, cmap='viridis'): + fig,ax = plt.subplots(1,1, figsize=(7,7)) + sb.heatmap(mat, square=True, cmap='viridis', ax=ax, cbar=True) + for path in paths: + path_x = [p[0]+0.5 for p in path] + path_y = [p[1]+0.5 for p in path] + ax.plot(path_y, path_x, color='white', linewidth=6) + plt.xlabel("Reference",fontweight='bold') + plt.ylabel("Query",fontweight='bold') + ax.xaxis.tick_top() # x axis on top + ax.xaxis.set_label_position('top') + +def plot_distmap_with_clusters(aligner, cmap=None, vmin = 0.0, vmax = 1.0, genes2highlight=None): + + godsnot_64 = [ + # "#000000", # remove the black, as often, we have black colored annotation, + '#0173b2', '#de8f05', '#029e73', '#d55e00', '#cc78bc', '#ca9161', + '#fbafe4', '#949494', '#ece133', '#56b4e9', # <--added colorblind palette to this + "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059", + "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87", + "#5A0007", "#809693", "#FEFFE6", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80", + "#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9", "#B903AA", "#D16100", + "#DDEFFF", "#000035", "#7B4F4B", "#A1C299", "#300018", "#0AA6D8", "#013349", "#00846F", + "#372101", "#FFB500", "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09", + "#00489C", "#6F0062", "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1", "#788D66", + "#885578", "#FAD09F", "#FF8A9A", "#D157A0", "#BEC459", "#456648", "#0086ED", "#886F4C", + "#34362D", "#B4A8BD", "#00A6AA", "#452C2C", "#636375", "#A3C8C9", "#FF913F", "#938A81", + "#575329", "#00FECF", "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00", + "#7900D7", "#A77500", "#6367A9", "#A05837", "#6B002C", "#772600", "#D790FF", "#9B9700", + "#549E79", "#FFF69F", "#201625", "#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329", + "#5B4534", "#FDE8DC", "#404E55", "#0089A3", "#CB7E98", "#A4E804", "#324E72", "#6A3A4C"] + + # ordering genes by packing them into their clusters + cluster_ordered_genes = [] + cluster_ids = [] + + cluster_lens = [] + for i in aligner.gene_clusters.keys(): + cluster_lens.append(len(aligner.gene_clusters[i])) + c_keys = np.asarray(list(aligner.gene_clusters.keys()) ) [np.argsort(cluster_lens)[::-1]] # ordered according to cluster size + for i in c_keys: + cluster_ordered_genes += aligner.gene_clusters[i] + cluster_ids += list(np.repeat(i,len(aligner.gene_clusters[i]))) + temp = pd.DataFrame([cluster_ordered_genes, cluster_ids]).transpose() + temp.columns = ['Gene','cluster_id'] + + n_clusters = len(aligner.gene_clusters.keys()) + if(n_clusters<=20): + color_list = list(sb.color_palette('colorblind'))[0:n_clusters] + else: + if(cmap is not None): + orig_cmap = plt.cm.get_cmap(cmap) + custom_cmap = orig_cmap(np.linspace(vmin, vmax, n_clusters)) + color_list = [mcolors.rgb2hex(custom_cmap[i]) for i in range(n_clusters)] + else: + color_list = godsnot_64[0:n_clusters] + #np.random.seed(3); np.random.shuffle(color_list) + + x = dict(zip(temp['cluster_id'].unique(), color_list )) + rcolors = pd.Series(temp['cluster_id']).map(x) + rcolors.name = '' + x = aligner.DistMat[cluster_ordered_genes].loc[cluster_ordered_genes] + p = sb.clustermap(x.reset_index(drop=True), cmap='viridis', + square=True, row_cluster=False, col_cluster=False, row_colors=rcolors, figsize=(10,10), xticklabels=False, + cbar_pos=(1.05, 0.54, 0.02, 0.25)) + if(genes2highlight is None): + gene_labels = [] + for tick_label in p.ax_heatmap.axes.get_yticklabels(): + tick_text = tick_label.get_text() + gene = temp.Gene.loc[int(tick_text)] + tick_label.set_color(rcolors[int(tick_text)]) + gene_labels.append(gene) + p.ax_heatmap.axes.set_yticklabels(gene_labels, rotation = 0) + else: + tick_indices = [] + for g in genes2highlight: + tick_indices.append(temp.index[temp['Gene']==g][0]) + p.ax_heatmap.axes.set_yticks(tick_indices) + p.ax_heatmap.axes.set_yticklabels(genes2highlight, rotation = 0) + + k=0 + for tick_label in p.ax_heatmap.axes.get_yticklabels(): + tick_label.set_color(rcolors[tick_indices[k]]) + k+=1 + + # plotting the legend of clusters + legend_labels = ['Cluster-'+str(k) for k in c_keys] + legend_patches = [mpatches.Patch(color=color_list[i], label=legend_labels[i]) for i in range(len(color_list))] + ax = p.ax_row_dendrogram + ax.legend(handles=legend_patches, loc='center') + ax.axis('off'); ax.set_xticks([]); ax.set_yticks([]); + + +def resolve(regions): + for i in range(len(regions)): + x = list(regions[i]); x[1] = x[1]-1; regions[i] = x + return regions + +def color_al_str(alignment_str): + + D_regions = [(m.start(0), m.end(0)) for m in regex.finditer("D+", alignment_str)] + I_regions = [(m.start(0), m.end(0)) for m in regex.finditer("I+", alignment_str)] + M_regions = [(m.start(0), m.end(0)) for m in regex.finditer("M+", alignment_str)] + W_regions = [(m.start(0), m.end(0)) for m in regex.finditer("W+", alignment_str)] + V_regions = [(m.start(0), m.end(0)) for m in regex.finditer("V+", alignment_str)] + M_regions = resolve(M_regions); D_regions = resolve(D_regions); + I_regions = resolve(I_regions) + W_regions = resolve(W_regions); V_regions = resolve(V_regions) + i = 0; j = 0; m_id = 0; i_id = 0; d_id = 0; v_id = 0; w_id = 0; c = 0 + colored_string='' + + while(c so that it controls the matching based on the number of - # total matches (i.e. it controls the degree of significant matching) -def compute_overall_alignment(aligner,mat, plot=False, GAP_SCORE = None): - - if(GAP_SCORE==None): - GAP_SCORE= -len(aligner.gene_list)*0.08 - - if(plot): - sb.heatmap(mat, cmap='viridis', square=True) - - # DP matrix initialisation - opt_cost_M = [] - for i in range(mat.shape[0]): - opt_cost_M.append(np.repeat(0.0, mat.shape[1])) - opt_cost_M = np.matrix(opt_cost_M) - # backtracker matrix initialisation - tracker_M = [] - for i in range(mat.shape[0]): - tracker_M.append(np.repeat(0.0, mat.shape[1])) - tracker_M = np.matrix(tracker_M) - for i in range(1,mat.shape[0]): - tracker_M[i,0] = 2 - for j in range(1,mat.shape[1]): - tracker_M[0,j] = 1 - - # running DP - for j in range(1,mat.shape[1]): - for i in range(1,mat.shape[0]): - m_dir = opt_cost_M[i-1,j-1] + mat.loc[i,j] - d_dir = opt_cost_M[i,j-1] + GAP_SCORE - i_dir = opt_cost_M[i-1,j] + GAP_SCORE - - a = max([m_dir, d_dir, i_dir]) - - if(a==d_dir): - opt = d_dir - dir_tracker = 1 - elif(a==i_dir): - opt =i_dir - dir_tracker = 2 - elif(a==m_dir): - opt = m_dir - dir_tracker = 0 - - opt_cost_M[i,j] = opt - tracker_M[i,j] = dir_tracker - - # backtracking - i = mat.shape[0]-1 - j = mat.shape[1]-1 - alignment_str = '' - tracked_path = [] - while(True): - tracked_path.append([i,j]) - if(tracker_M[i,j]==0): - alignment_str = 'M' + alignment_str - i = i-1 - j = j-1 - elif(tracker_M[i,j]==1): - if(mat.loc[i,j]>0): - alignment_str = 'W' + alignment_str - else: - alignment_str = 'D' + alignment_str - j = j-1 - elif(tracker_M[i,j]==2): - if(mat.loc[i,j]>0): - alignment_str = 'V' + alignment_str - else: - alignment_str = 'I' + alignment_str - i = i-1 - - if(i==0 and j==0) : - break - tracked_path.append([0,0]) - # NOTE: This alignment string does not have the same interpretation as of the 5-state gene alignment string we get. - # Here we are only interested in the path - return alignment_str, tracked_path#, opt_cost_M, tracker_M + \ No newline at end of file diff --git a/genes2genes/MVG.py b/genes2genes/MVG.py deleted file mode 100644 index 3db0a17..0000000 --- a/genes2genes/MVG.py +++ /dev/null @@ -1,185 +0,0 @@ -import torch -import seaborn as sb -import torch.nn as nn -import numpy as np -import pandas as pd -import time -import gpytorch -import matplotlib.pyplot as plt -import torch.distributions as td -import scipy -import warnings -warnings.filterwarnings("ignore") - -torch.set_default_dtype(torch.float64) - -def generate_random_MVG_dataset(d,N,DIST_SEED=1,use_zero_mean=False,MEAN_SEED=1,): - #d = n_dimensions - #N = n_data_points - input_dims = [] - for i in range(d): - input_dims.append(i) - X = torch.tensor(input_dims) # input points on x axis (dims) as in GP - kernel = gpytorch.kernels.RBFKernel() - C = kernel(X).evaluate() - μ = torch.zeros(d) # zero mean case for dimensions - if(not use_zero_mean): - # difference mean for all cases - torch.manual_seed(MEAN_SEED) - for i in range(d): - μ[i] = torch.distributions.Uniform(5,10).rsample() - D = torch.empty(N,d) # Data matrix - torch.manual_seed(DIST_SEED) - for i in range(N): - D[i] = torch.distributions.MultivariateNormal(μ, C).rsample().detach() - return μ,C,D - -# As p (n free dimensions) increases, the lower and upper bounds converge [Ref: Wallace book] -#def conway_constant_upper_bound(p): -# return ((scipy.special.gamma( (p/2)+1 )**(2/p))*scipy.special.gamma( (2/p)+1))/(np.pi*p) -# Test case: p = 100 ----- 2**log2_conway_constant_upper_bound(p) #0.0613252739213439 -def log_factorial(x): - #return scipy.special.gammaln((x+1))/np.log(2) - return scipy.special.gammaln((x+1)) -def log_conway_constant_upper_bound(p): - #return ((2/p)*log2_factorial(p/2)) + log2_factorial(2/p) -np.log2(np.pi) -np.log2(p) - return ((2/p)*log_factorial(p/2)) + log_factorial(2/p) -np.log(np.pi) -np.log(p) - -def negative_log_likelihood(μ,C,N,data, d, det_C): - #print('det_C -- ', det_C) - term1 = ((N*d)/2.0)*np.log(2*np.pi) - #term2 = (N/2.0)*np.log(det_C) - term2 = 0.0 # bcz det_C =1 due to C=I - term3 = 0.0 - #inverse_C = torch.linalg.inv(C) - inverse_C = C # inverse of the I is itself (since we use Identity matrix) - - for i in range(N): - temp = np.matrix(data[i] - μ) - x_i = torch.tensor(temp) - x_it =torch.tensor(temp.transpose()) - #term3 = term3 + torch.matmul(torch.matmul(x_i , inverse_C ), x_it).flatten()[0] - term3 = term3 + torch.matmul(x_i, x_it).flatten()[0] # because C=I - term3 = term3 * 0.5 - #print('NEG LOG:2 ', term1,term2, term3) - return (term1 + term2 + term3).detach().item() - -def I_first_part(p,d,N,det_C): - return (0.5*p*log_conway_constant_upper_bound(p)) + ((p/2)*np.log(N)) - (d/2) - (0.5*np.log(det_C)) - -def compute_mml_estimates(data,d,N): - μ_mml = torch.mean(data,axis=0) - term = 0.0 - for i in range(N): - temp = data[i] - μ_mml - temp = np.matrix(temp) - temp_C = np.matmul(temp.transpose(),temp) - term = term + temp_C - C_mml = torch.tensor(term/(N-1)) - #print(np.linalg.det(temp_C)) - - if(torch.linalg.det(C_mml)<=0): # (adding a small perturbation) regularisation to avoid numerical instability --- then it will have only positive eigenvalues and it will have the exact same eigenvectors - C_mml = C_mml + (0.001*torch.eye(len(C_mml))) - - return μ_mml,C_mml - -def compute_MML_msg_len(data): - - d = data.shape[1]; N = len(data) - μ,C = compute_mml_estimates(data,d,N) - d = len(C) - p = d*(d+3)/2 - det_C = torch.linalg.det(C).detach().numpy() # determinant of the covariance matrix - - I_model = I_first_part(p,d,N,det_C) - I_data_g_model = negative_log_likelihood(μ,C,N,data,d, det_C) + p/2 - #print('NEG LOG: ', I_data_g_model) - return I_model, I_data_g_model, C - -def run_dist_compute_v3(data_to_model, μ_base, C_base): - data = data_to_model - d = data.shape[1]; N = len(data) - μ = torch.tensor(μ_base); C = torch.tensor(C_base) - d = len(C) - p = d*(d+3)/2 - #det_C = torch.linalg.det(C).detach().numpy() # determinant of the covariance matrix - det_C = 1.0 #(if we are using C=Identity matrix) - - # I_model = I_first_part(p,d,N,det_C) - I_model = 0.0 # (because we consider same C for both) - I_data_g_model = negative_log_likelihood(μ,C,N,data,d, det_C) + p/2 - #print('NEG LOG:2 ', p/2) - - #print('msg len entropy: ', (I_model + I_data_g_model)/len(data_to_model) ) - return I_model, I_data_g_model - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/genes2genes/Main.py b/genes2genes/Main.py index be58066..e2ee9a2 100644 --- a/genes2genes/Main.py +++ b/genes2genes/Main.py @@ -1,25 +1,17 @@ -import multiprocessing -from multiprocessing import Pool -from tqdm.notebook import tqdm_notebook -import pandas as pd -import numpy as np -import seaborn as sb -import matplotlib.pyplot as plt -import textdistance -from Levenshtein import distance -import time +import regex import copy -from sklearn.cluster import AgglomerativeClustering import scipy +import anndata import scipy.sparse +import pandas as pd +import numpy as np +import seaborn as sb +from tqdm import tqdm +import multiprocessing import scipy.stats as stats -from scipy.cluster.hierarchy import dendrogram, linkage -from scipy.spatial.distance import squareform -from scipy.cluster.hierarchy import fcluster -from scipy.stats import zscore -from tabulate import tabulate -import regex -import anndata +import matplotlib.pyplot as plt +from multiprocessing import Pool +from tqdm.notebook import tqdm_notebook from . import OrgAlign as orgalign from . import MyFunctions @@ -27,11 +19,25 @@ from . import AlignmentDistMan from . import VisualUtils from . import ClusterUtils +from . import Utils + +__version__ = 'v0.2.0' + +class hcolors: + MATCH = '\033[92m' + INSERT = '\033[91m' + DELETE = '\033[91m' + STOP = '\033[0m' + class AligmentObj: + """ + This class defines an aligned object of a reference and query gene expression time series, + to carry all related results. + """ + def __init__(self,gene, S,T, fwd_DP_obj,bwd_DP_obj, landscape_obj): - self.gene = gene self.S = S self.T = T @@ -47,33 +53,32 @@ def __init__(self,gene, S,T, fwd_DP_obj,bwd_DP_obj, landscape_obj): self.non_match_regions_T = out[3] self.compute_series_match_percentage() - #if(isinstance(S,TimeSeriesPreprocessor.SummaryTimeSeries)): try: self.run_DEAnalyser() except Exception as e: print(str(e),gene) + # Printing details about the optimal alignment object def print(self): print('Fwd opt cost', self.fwd_DP.opt_cost) - print(self.fwd_DP.alignment_str) - #print('Bwd opt cost: ', self.bwd_DP_obj.opt_cost) - #print(bwd_DP.alignment_str[::-1]) - print(self.match_regions_S) - print(self.match_regions_T) - print(self.non_match_regions_S) - print(self.non_match_regions_T) + print('5-state alignment string: ', self.fwd_DP.alignment_str) + print('Ref matched time points ranges: ', self.match_regions_S) + print('Query matched time points ranges: ', self.match_regions_T) + print('Ref mismatched time points ranges: ',self.non_match_regions_S) + print('Query mismatched time points ranges: ',self.non_match_regions_T) + print('Alignment landscape plot: ') self.landscape_obj.plot_alignment_landscape() def plotTimeSeries(self, refQueryAlignerObj, plot_cells = False, plot_mean_trend= False): sb.scatterplot(x=self.S.X, y=self.S.Y, color = 'forestgreen' ,alpha=0.05)#, label='Ref') sb.scatterplot(x=self.T.X, y=self.T.Y, color = 'midnightblue' ,alpha=0.05)#, label ='Query') - # plt.legend(loc='upper left') + # plt.legend(loc='upper left') if(plot_cells): sb.scatterplot(x=refQueryAlignerObj.ref_time, y=np.asarray(refQueryAlignerObj.ref_mat[self.gene]), color = 'forestgreen' ) sb.scatterplot(x=refQueryAlignerObj.query_time, y=np.asarray(refQueryAlignerObj.query_mat[self.gene]), color = 'midnightblue' ) plt.title(self.gene) - plt.xlabel('pseudotime') - plt.ylabel('log1p expression') + plt.xlabel('Pseudotime') + plt.ylabel('Gene expression') if(plot_mean_trend): self.plot_mean_trends() @@ -84,8 +89,8 @@ def plotTimeSeriesAlignment(self): # plt.legend(loc='upper left') self.plot_mean_trends() plt.title(self.gene) - plt.xlabel('pseudotime') - plt.ylabel('log1p expression') + plt.xlabel('Pseudotime') + plt.ylabel('Gene expression') for i in range(self.matched_region_DE_info.shape[0]): S_timebin = int(self.matched_region_DE_info.iloc[i]['ref_bin']) @@ -93,26 +98,6 @@ def plotTimeSeriesAlignment(self): x_vals = [self.matched_region_DE_info.iloc[i]['ref_pseudotime'],self.matched_region_DE_info.iloc[i]['query_pseudotime']] y_vals = [self.S.mean_trend[S_timebin ], self.T.mean_trend[T_timebin]] plt.plot(x_vals, y_vals, color='black', linestyle='dashed', linewidth=0.6) - - def plotTimeSeries_for_gene_pair(al_obj, aligner, plot_cells = False, plot_mean_trend= False): - sb.scatterplot(x=al_obj.S.X, y=al_obj.S.Y, color = 'forestgreen' ,alpha=0.05)#, label='Ref') - sb.scatterplot(x=al_obj.T.X, y=al_obj.T.Y, color = 'midnightblue' ,alpha=0.05)#, label ='Query') - if(plot_cells): - sb.scatterplot(x=aligner.ref_time, y=np.asarray(aligner.ref_mat[al_obj.gene_pair[0]]), color = 'forestgreen' ) - sb.scatterplot(x=aligner.ref_time, y=np.asarray(aligner.ref_mat[al_obj.gene_pair[1]]), color = 'midnightblue' ) - plt.title(al_obj.gene) - plt.xlabel('pseudotime') - plt.ylabel('log1p expression') - - if(plot_mean_trend): - self.plot_mean_trends() - - - def get_ref_timeseries_obj(self): - return self.fwd_DP.S - - def get_query_timeseries_obj(self): - return self.fwd_DP.T def compute_series_match_percentage(self): @@ -150,7 +135,6 @@ def run_DEAnalyser(self): DE_analyser.get_matched_time_points() if(isinstance(self.S,TimeSeriesPreprocessor.SummaryTimeSeries)): - #print('DEAnalyser: get DE info') DE_analyser.get_DE_info_for_matched_regions() def plot_matched_region_dists(self): @@ -177,11 +161,6 @@ def plot_matched_region_dists(self): def print_alignment(self): - # print('S matched : ', self.S_match_regions) - # print('T matched : ', self.T_match_regions) - # print('S not matched : ', self.S_non_match_regions) - # print('T not matched : ', self.T_non_match_regions) - # print('') print(self.al_visual) print('Matched percentages: ') p1,p2,p3 = self.get_series_match_percentage() @@ -200,22 +179,52 @@ def get_opt_alignment_cost(self): class RefQueryAligner: + """ + This class defines the main aligner class of genes2genes alignment, acting as entry point to initialise alignment parameters and interpolation. + It contains all methods for running genes2genes alignment between the specified genes of the reference and query datasets. + + Parameters + ---------- + *args + adata_ref: anndata + adata_query: anndata + gene_list: list + n_interpolation_points: int + adaptive_kernel: boolean + """ + def __init__(self, *args): - if(len(args) ==4 ): + print('===============================================================================================================') + print('Genes2Genes ('+ __version__ +')') + print('Dynamic programming alignment of gene pseudotime trajectories using a bayesian information-theoretic framework') + print('===============================================================================================================') + + if(len(args) == 4 ): + self.run_init1(args[0], args[1], args[2], args[3]) + adaptive_kernel = False + elif(len(args) == 5 ): + print('Running in adaptive interpolation mode') self.run_init1(args[0], args[1], args[2], args[3]) - elif(len(args) == 6 ): - self.run_init2(args[0], args[1], args[2], args[3], args[4], args[5]) + adaptive_kernel = args[4] else: print('pls pass the required number of args') - - def set_n_threads(self,n): - self.n_threads = n + + k=1; + self.TrajInt_R = TimeSeriesPreprocessor.TrajectoryInterpolator(self.adata_ref, n_bins=self.n_artificial_time_points, adaptive_kernel=adaptive_kernel,raising_degree = k) + self.TrajInt_R.run() + self.TrajInt_Q = TimeSeriesPreprocessor.TrajectoryInterpolator(self.adata_query, n_bins=self.n_artificial_time_points, adaptive_kernel=adaptive_kernel,raising_degree = k) + self.TrajInt_Q.run() + print('Interpolator initialization completed') + self.state_params = [0.99,0.1,0.7] # parameters empirically found over our simulated dataset + self.no_extreme_cases =False + + print('Aligner initialised to align trajectories of', self.adata_ref.shape[0], 'reference cells &',self.adata_query.shape[0], 'query cells in terms of', len(self.gene_list), 'genes') # converts ref and query anndata objects to pd.DataFrames def run_init1(self, adata_ref, adata_query, gene_list, n_artificial_time_points): - - #if(not hasattr(self, 'mean_batch_effect' )): - #self.mean_batch_effect = BatchAnalyser.BatchAnalyser().eval_between_system_batch_effect(adata_ref, adata_query) + + self.adata_ref = adata_ref[:, gene_list] + self.adata_query = adata_query[:, gene_list] if(isinstance(adata_ref.X, scipy.sparse.csr.csr_matrix) or isinstance(adata_ref.X,anndata._core.views.SparseCSCView) @@ -239,33 +248,18 @@ def run_init1(self, adata_ref, adata_query, gene_list, n_artificial_time_points) self.run_init2(ref_mat, ref_time, query_mat, query_time, gene_list, n_artificial_time_points) - def run_init2(self, ref_mat, ref_time, query_mat, query_time, gene_list, n_artificial_time_points, CONST_STD=False): + def run_init2(self, ref_mat, ref_time, query_mat, query_time, gene_list, n_artificial_time_points): self.ref_mat = ref_mat self.query_mat = query_mat self.ref_time = ref_time self.query_time = query_time self.gene_list = gene_list self.pairs = {} - self.n_threads = multiprocessing.cpu_count() - self.CONST_STD = CONST_STD - - # to preserve the number of time points ratio - time_lens = [len(self.ref_time), len(self.query_time)] self.n_artificial_time_points = n_artificial_time_points - #self.n_q_points = int(n_artificial_time_points * time_lens[0]/time_lens[1]) - self.n_q_points = n_artificial_time_points - - # self.ref_processor = TimeSeriesPreprocessor.Prepocessor(self.ref_mat, self.ref_time, 50) - # self.query_processor = TimeSeriesPreprocessor.Prepocessor(self.query_mat, self.query_time, n_q_points) - - - - - - + # util functions def extract_significant_regions_only(self, regions): - if(len(regions)==0): + if(len(regions)==0): return [] adjacent_region_start = regions[0][0] filtered_regions = np.asarray([], dtype=np.float64) @@ -275,7 +269,6 @@ def extract_significant_regions_only(self, regions): if(k!=len(regions)-1): if(regions[k][1] != regions[k+1][0]): ended_adjacent_region_len = regions[k][1]- adjacent_region_start - #print(ended_adjacent_region_len) if(ended_adjacent_region_len>0.2): adjacent_region_indices = np.append(adjacent_region_indices,regions[k][0]) adjacent_region_indices = np.append(adjacent_region_indices, regions[k][1]) @@ -287,10 +280,8 @@ def extract_significant_regions_only(self, regions): adjacent_region_indices=np.append(adjacent_region_indices,regions[k][0]) continue else: - #print(regions[k][1]- adjacent_region_start) if(len(adjacent_region_indices)>0): # check if there is a continuing adjacent region ended_adjacent_region_len = regions[k][1]- adjacent_region_start - #print(ended_adjacent_region_len) if(ended_adjacent_region_len>0.2): adjacent_region_indices = np.append(adjacent_region_indices,regions[k][0]) adjacent_region_indices=np.append(adjacent_region_indices,regions[k][1]) @@ -298,44 +289,30 @@ def extract_significant_regions_only(self, regions): filtered_region_indices = np.append(filtered_region_indices, adjacent_region_indices) return list(filtered_region_indices) - - def check_inconsistent_zero_region(self, mat, time_arr, g): + + def check_inconsistent_zero_region(self, gex_arr, pseudotime_arr, trajInterpolator): regions = [] - window_range = np.linspace(0,1, self.n_artificial_time_points) + window_range = trajInterpolator.interpolation_points + for i in range(1,len(window_range)): - logic = np.logical_and(time_arr>=window_range[i-1], time_arr=window_range[i-1], pseudotime_arr=len(bin_times)): - break - s = [] - t = [] - for k in range(SLIDING_WINDOW): - s.append(S.mean_trend[i+k]) - t.append(T.mean_trend[i+k]) - cc = stats.pearsonr(s,t)[0] - #print('Pearson correlation: ', stats.pearsonr(s,t)[0]) - correlation_coefficients.append(cc) - return correlation_coefficients - - def get_correlation_coefficient_trend_for_all_genes(self): - - cc = [] - for gene in tqdm_notebook(self.gene_list): - pcc = self.get_correlation_coefficient_trend(gene, SLIDING_WINDOW=10) - cc.append(pcc) - df = pd.DataFrame(cc) - df.index = self.gene_list - return df - def get_match_stat_for_all_genes(self): m_p = [] @@ -834,307 +563,14 @@ def get_match_stat_for_all_genes(self): df.columns = ['match %', 'match % S', 'match % T', 'cluster_id'] return df - - #interpolated gene expression heat matrix - def __prepare_intpl_df(self,intpl_df, intpl_time): - intpl_df = pd.DataFrame(intpl_df) - intpl_df = intpl_df.transpose() - intpl_df['time'] = intpl_time - intpl_df = intpl_df.sort_values(by='time') - intpl_df = intpl_df.iloc[:,intpl_df.columns!='time'] - intpl_df.columns = self.gene_list - df_zscore = intpl_df.apply(zscore) - return df_zscore - - def __get_zscore_expr_mat(self,expr_mat, expr_time): - expr_mat['time'] = expr_time - expr_mat = expr_mat.sort_values(by='time') - df = expr_mat - df = df.iloc[:,df.columns!='time'] - df_zscore = df.apply(zscore) - #df_zscore = df_zscore.iloc[:,df_zscore.columns!='time'] - return df_zscore - - # z normalised for plotting purposes - def __save_interpolated_and_noninterpolated_mats(self): - ref_intpl_df = [] - query_intpl_df = [] - for gene in self.gene_list: - ref_intpl_df.append(self.pairs[gene][0].Y) - query_intpl_df.append(self.pairs[gene][1].Y) - self.ref_intpl_df = self.__prepare_intpl_df(ref_intpl_df, self.pairs[gene][0].X) - self.query_intpl_df = self.__prepare_intpl_df(query_intpl_df, self.pairs[gene][1].X) - self.ref_expr_df = self.__get_zscore_expr_mat(self.ref_mat[self.gene_list], self.ref_time ) - self.query_expr_df = self.__get_zscore_expr_mat(self.query_mat[self.gene_list], self.query_time ) - #return ref_intpl_df, query_intpl_df, ref_expr_df, query_expr_df - - def __plot_comparative_heatmap(self, ref_df, query_df, cluster_id = None): - - if(cluster_id!=None): - ref_df = ref_df[self.gene_clusters[cluster_id]] - query_df = query_df[self.gene_clusters[cluster_id]] - fig, axs = plt.subplots(1,2, figsize=(10,ref_df.shape[1]*0.5)) - else: - fig, axs = plt.subplots(1,2, figsize=(10,10)) - - # plt.subplot(1,2,1) - sb.clustermap(ref_df.transpose(), xticklabels=False, vmin=-2, vmax=2, cbar=False,cmap = 'YlGnBu', col_cluster=False)#, ax=axs[0]) - # plt.subplot(1,2,2) - sb.clustermap(query_df.transpose(), xticklabels=False, vmin=-2, vmax=2,cmap = 'YlGnBu',col_cluster=False)#,ax=axs[1]) - fig.tight_layout() - - def prepare_interpolated_non_interpolated_mats(self): - self.__save_interpolated_and_noninterpolated_mats() - - def plot_comparative_heatmap_intpl(self, cluster_id = None): - if(not hasattr(self, 'ref_intpl_df' )): - self.__save_interpolated_and_noninterpolated_mats() - self.__plot_comparative_heatmap(self.ref_intpl_df, self.query_intpl_df,cluster_id=cluster_id) - - def plot_comparative_heatmap_expr(self, cluster_id = None): - if(not hasattr(self, 'ref_expr_df' )): - self.__save_interpolated_and_noninterpolated_mats() - self.__plot_comparative_heatmap(self.ref_expr_df, self.query_expr_df,cluster_id=cluster_id) - - - def run_MVG_alignment(self, mvg_genes,MVG_MODE_KL=True): - - D_ref = [] - D_query = [] - i = 0 - for gene in mvg_genes: - S = self.pairs[gene][0] - T = self.pairs[gene][1] - - if(i==0): - for bin_id in range(len(S.data_bins)): - D_ref.append(pd.DataFrame(S.data_bins[bin_id])) - else: - for bin_id in range(len(S.data_bins)): - D_ref[bin_id] = pd.concat([D_ref[bin_id], pd.Series(S.data_bins[bin_id]) ], axis=1) - - if(i==0): - for bin_id in range(len(T.data_bins)): - D_query.append(pd.DataFrame(T.data_bins[bin_id])) - else: - for bin_id in range(len(T.data_bins)): - D_query[bin_id] = pd.concat([D_query[bin_id], pd.Series(T.data_bins[bin_id]) ], axis=1) - - if(i==0): - S_time = S.X # no need to do this at every iteration since it is the same artificial time points for all genes - T_time = T.X - - i=i+1 - - S = TimeSeriesPreprocessor.SummaryTimeSeriesMVG(S_time, D_ref) - T = TimeSeriesPreprocessor.SummaryTimeSeriesMVG(T_time, D_query) - state_params=[0.99,0.5,0.4] #[0.95,0.5,0.4] - fwd_DP = orgalign.DP5(S,T, free_params = self.state_params, backward_run=False, zero_transition_costs= False, prohibit_case = False, MVG_MODE_KL = MVG_MODE_KL)#,mean_batch_effect=self.mean_batch_effect) - fwd_opt_cost = fwd_DP.run_optimal_alignment() - alignment_path = fwd_DP.backtrack() - fwd_DP.alignment_path = alignment_path - landscapeObj = orgalign.AlignmentLandscape(fwd_DP, None,len(S.data_bins), len(T.data_bins), alignment_path, the_5_state_machine = True) - landscapeObj.collate_fwd() #landscapeObj.plot_alignment_landscape() - #return fwd_DP.alignment_str, fwdlandscapeObj - return AligmentObj(str(mvg_genes), S,T,fwd_DP,None, landscapeObj) - - - - def compute_cluster_MVG_alignments(self,MVG_MODE_KL=True, RECOMPUTE=False): - - if((not hasattr(self, 'mvg_cluster_average_alignments')) or RECOMPUTE): - print('run MVG alignment') - self.mvg_cluster_average_alignments = [] - - for cluster_id in tqdm_notebook(range(len(self.gene_clusters))): - group = self.gene_clusters[cluster_id] - if(len(group)>1): - al_obj = self.run_MVG_alignment(group,MVG_MODE_KL=MVG_MODE_KL) - self.mvg_cluster_average_alignments.append(al_obj) - else: # don't run MVG because there is only one gene in this cluster - self.mvg_cluster_average_alignments.append(self.get_cluster_alignment_objects(cluster_id)[0]) - - return - - n_col = 5; n_row = int(np.ceil(len(self.mvg_cluster_average_alignments)/n_col)) - fig,axs =plt.subplots(n_row,n_col,figsize=(20,n_row*3)) - i=1 - for a in self.mvg_cluster_average_alignments: - # plt.subplot(4,5,i) - # plot_alignment_landscape(a.landscape_obj,i) - plt.subplot(n_row,n_col,i) - ax = sb.heatmap(a.landscape_obj.L_matrix, square=True, cmap="jet") - path_x = [p[0]+0.5 for p in a.landscape_obj.alignment_path] - path_y = [p[1]+0.5 for p in a.landscape_obj.alignment_path] - ax.plot(path_y, path_x, color='black', linewidth=3, alpha=0.5, linestyle='dashed') # path plot - plt.xlabel("S",fontweight='bold') - plt.ylabel("T",fontweight='bold') - i=i+1 - - - def plot_mvg_alignment(self, cluster_id): - - mvg_path = None - if(len(self.gene_clusters[cluster_id])>1): - mvg_path = self.mvg_cluster_average_alignments[cluster_id].landscape_obj.alignment_path - avg_alignment, path = self.get_cluster_average_alignments(cluster_id) - else: - path = self.get_cluster_alignment_objects(cluster_id)[0].landscape_obj.alignment_path - mvg_path = path - avg_alignment = self.get_cluster_alignment_objects(cluster_id)[0].alignment_str - self.__plot_avg_alignment_landscape_in_cluster(cluster_id, path, mvg_path) - - - def __plot_avg_alignment_landscape_in_cluster(self,cluster_id, path, mvg_path=None): - - avg_DP_M_matrix = None - avg_DP_W_matrix = None - avg_DP_V_matrix = None - avg_DP_D_matrix = None - avg_DP_I_matrix = None - - cluster_al_objects = self.get_cluster_alignment_objects(cluster_id) - for a in cluster_al_objects: - if(avg_DP_M_matrix is None): - avg_DP_M_matrix = a.fwd_DP.DP_M_matrix - avg_DP_W_matrix = a.fwd_DP.DP_W_matrix - avg_DP_V_matrix = a.fwd_DP.DP_V_matrix - avg_DP_D_matrix = a.fwd_DP.DP_D_matrix - avg_DP_I_matrix = a.fwd_DP.DP_I_matrix - else: - avg_DP_M_matrix = avg_DP_M_matrix + a.fwd_DP.DP_M_matrix - avg_DP_W_matrix = avg_DP_W_matrix + a.fwd_DP.DP_W_matrix - avg_DP_V_matrix = avg_DP_V_matrix + a.fwd_DP.DP_V_matrix - avg_DP_D_matrix = avg_DP_D_matrix + a.fwd_DP.DP_D_matrix - avg_DP_I_matrix = avg_DP_I_matrix + a.fwd_DP.DP_I_matrix - - avg_DP_M_matrix = avg_DP_M_matrix/len(cluster_al_objects) - avg_DP_W_matrix = avg_DP_W_matrix/len(cluster_al_objects) - avg_DP_V_matrix = avg_DP_V_matrix/len(cluster_al_objects) - avg_DP_D_matrix = avg_DP_D_matrix/len(cluster_al_objects) - avg_DP_I_matrix = avg_DP_I_matrix/len(cluster_al_objects) - - L_matrix = [] - T_len = self.results[0].fwd_DP.T_len - S_len = self.results[0].fwd_DP.S_len - for i in range(T_len+1): - L_matrix.append(np.repeat(0.0,S_len+1)) - L_matrix = np.matrix(L_matrix) - - if(mvg_path != None): - paths = [path, mvg_path] - else: - paths = [path] - for a in cluster_al_objects: - paths.append(a.landscape_obj.alignment_path) - - for i in range(0,T_len+1): - for j in range(0,S_len+1): - _i = T_len-i - _j = S_len-j - temp = [ avg_DP_M_matrix[i,j],avg_DP_W_matrix[i,j] ,avg_DP_V_matrix[i,j], avg_DP_D_matrix[i,j], avg_DP_I_matrix[i,j]] - L_matrix[i,j] = min(temp) - - mat = L_matrix - fig, ax = plt.subplots(1,1, figsize=(5,5)) - sb.heatmap(mat, square=True, cmap='jet', ax=ax, cbar=False,xticklabels=False,yticklabels=False) - path_color = "black" - alpha = 2.0; linewidth = 4 - i=0 - for path in paths: - path_x = [p[0]+0.5 for p in path] - path_y = [p[1]+0.5 for p in path] - ax.plot(path_y, path_x, color=path_color, linewidth=linewidth, alpha=alpha, linestyle='dashed') # path plot - if((i>=1)): - alpha = 0.5 - linewidth = 1 - path_color = 'black' - else: - path_color = 'brown' - i=i+1 - - plt.xlabel("S",fontweight='bold') - plt.ylabel("T",fontweight='bold') - - def get_cluster_average_alignments(self, cluster_id, deterministic=True): - - cluster_alobjs = self.get_cluster_alignment_objects(cluster_id) - i = self.results[0].fwd_DP.T_len - j = self.results[0].fwd_DP.S_len - - avg_alignment = '' - tracked_path = [] - tracked_path.append([i,j]) - - while(True): - if(i==0 and j==0): - break - backtrack_states_probs = {} - backtrack_states_probs['M'] = 0 - backtrack_states_probs['W'] = 0 - backtrack_states_probs['V'] = 0 - backtrack_states_probs['D'] = 0 - backtrack_states_probs['I'] = 0 - for a in cluster_alobjs: - backtract_state = a.landscape_obj.L_matrix_states[i,j] - if(backtract_state=='0'): - backtrack_states_probs['M']+=1 - elif(backtract_state=='1'): - backtrack_states_probs['W']+=1 - elif(backtract_state=='2'): - backtrack_states_probs['V']+=1 - elif(backtract_state=='3'): - backtrack_states_probs['D']+=1 - elif(backtract_state=='4'): - backtrack_states_probs['I']+=1 - for state in backtrack_states_probs.keys(): - backtrack_states_probs[state] = backtrack_states_probs[state]/len(cluster_alobjs) - - if(deterministic): - cs = np.argmax(np.asarray(list(backtrack_states_probs.values())) ) - else: - cs = MyFunctions.sample_state(np.asarray(list(backtrack_states_probs.values()) ) ) - if(cs==0): - i = i-1 - j = j-1 - avg_alignment = 'M' + avg_alignment - elif(cs==1 or cs==3): - j= j-1 - if(cs==1): - avg_alignment = 'W' + avg_alignment - else: - avg_alignment = 'D' + avg_alignment - elif(cs==2 or cs==4): - i=i-1 - if(cs==2): - avg_alignment = 'V' + avg_alignment - else: - avg_alignment = 'I' + avg_alignment - - tracked_path.append([i,j]) - - return avg_alignment, tracked_path - - - def get_pairwise_match_count_mat(self): - mat = [] - nT_points = len(self.results[0].T.time_points) - nS_points = len(self.results[0].S.time_points) - for i in range(nT_points + 1): - mat.append(np.repeat(0.0, nS_points+1)) - - # counts of total matches between the each pair of ref and query timepoints across all alignments - for a in self.results: - matchS = a.match_points_S+1 - matchT = a.match_points_T+1 - for i in range(len(matchS)): - mat[matchT[i]][matchS[i]] = mat[matchT[i]][matchS[i]] + 1 - - return pd.DataFrame(mat) class DEAnalyser: + """ + This class defines complementary functions for alignment results analysis. + """ + def __init__(self, al_obj): self.al_obj = al_obj self.alignment_str = al_obj.alignment_str @@ -1276,9 +712,8 @@ def get_matched_regions(self): self.al_obj.al_visual = self.index_line + ' Alignment index \n' + self.al_visual + '\n'+ self.alignment_str + ' 5-state string ' - # returns each 1-1 matching of time bins matched through M,W,V + # returns each 1-1 matching of time bins matched through M,W,V def get_matched_time_points(self): - #print('alignment string: ', self.alignment_str) j = 0 i = 0 FLAG = False @@ -1292,12 +727,6 @@ def get_matched_time_points(self): i=i+1 if(prev_c=='V'): j=j+1 - # if(FLAG): - # if(prev_c=='I'): - # j=j+1 - # if(prev_c=='D'): - # i=i+1 - # FLAG=False matched_points_T.append(i) matched_points_S.append(j) i=i+1 @@ -1356,7 +785,6 @@ def get_matched_time_points(self): self.l2fold_changes_in_matches.append([np.log2(S_bin_mean/T_bin_mean), self.al_obj.fwd_DP.S.time_points[self.match_points_S[i]], self.al_obj.fwd_DP.T.time_points[self.match_points_T[i]] ]) - #print('**** ', self.match_points_S[i], self.match_points_T[i], np.log2(S_bin_mean/T_bin_mean)) # Sanity checker for non-significant DE in matched regions @@ -1380,6 +808,9 @@ def get_DE_info_for_matched_regions(self): S_bin = self.al_obj.S.data_bins[s[i]] T_bin = self.al_obj.T.data_bins[t[i]] + self.al_obj.S.data_bins[s[i]] = np.asarray(self.al_obj.S.data_bins[s[i]]) + self.al_obj.S.data_bins[t[i]] = np.asarray(self.al_obj.S.data_bins[t[i]]) + if(not np.any(self.al_obj.S.data_bins[s[i]] - self.al_obj.S.data_bins[t[i]] )): wilcox_p.append(0.0) ks2_p.append(0.0) @@ -1405,461 +836,7 @@ def get_DE_info_for_matched_regions(self): -class hcolors: - MATCH = '\033[92m' - INSERT = '\033[91m' - DELETE = '\033[91m' - STOP = '\033[0m' - #### test cases for 1-1 match point retrieval: - # al_str = 'MMMVVVVVVWWWWWW' - # al_str = 'MMMWWWWWWVVVVVV' - # al_str = 'DDDIIIVVVIIIMMM' - # al_str = 'IIIDDDWWWIIIMMM' - # al_str = 'MMMIIIDDWWDDDIIVVDDMM' - - - - -# ======================================================== NEW CODE FOR GENE VS GENE WITHIN SYSTEM COMPARISON - -class GeneAligner: - def __init__(self, *args): - if(len(args) ==4 ): - self.run_init1(args[0], args[1], args[2], args[3]) - elif(len(args) == 6 ): - self.run_init2(args[0], args[1], args[2], args[3], args[4], args[5]) - else: - print('pls pass the required number of args') - - self.init_gene_pairs() - - def set_n_threads(self,n): - self.n_threads = n - - # converts ref and query anndata objects to pd.DataFrames - def run_init1(self, adata_ref, adata_query, gene_list, n_artificial_time_points): - - if(isinstance(adata_ref.X, scipy.sparse.csr.csr_matrix) - or isinstance(adata_ref.X,anndata._core.views.SparseCSCView) - or isinstance(adata_ref.X,scipy.sparse.csc.csc_matrix)): - ref_mat = pd.DataFrame(adata_ref.X.todense()) - else: - ref_mat = pd.DataFrame(adata_ref.X) - - ref_mat.columns = adata_ref.var_names - ref_mat = ref_mat.set_index(adata_ref.obs_names) - ref_time = np.asarray(adata_ref.obs['time']) - - self.run_init2(ref_mat, ref_time, None, None, gene_list, n_artificial_time_points) - - def run_init2(self, ref_mat, ref_time, query_mat, query_time, gene_list, n_artificial_time_points, CONST_STD=False): - self.ref_mat = ref_mat - self.ref_time = ref_time - self.gene_list = gene_list - self.pairs = {} - self.genes = {} - self.n_threads = multiprocessing.cpu_count() - self.CONST_STD = CONST_STD - - # to preserve the number of time points ratio - self.n_artificial_time_points = n_artificial_time_points - - def init_gene_pairs(self): - - #genes = {} - #for g in tqdm(gene_list) : - # genes[g] = self.run_interpolation(g) - - pairs = {} - self.GENE_PAIRS = [] - for i in tqdm_notebook(range(len(self.gene_list))): - for j in range(i, len(self.gene_list)): - if(i==j): - continue - gene1 = self.gene_list[i] - gene2 = self.gene_list[j] - self.GENE_PAIRS.append( (gene1, gene2) ) - #pairs[ (gene1, gene2) ] = [ self.genes[gene1], self.genes[gene2] ] - - print('n_gene_pairs for comparison: ', len(self.GENE_PAIRS)) - - - def run_interpolation(self, gene): - ref_processor = TimeSeriesPreprocessor.Prepocessor(self.ref_mat, self.ref_time, 15, 0.1, False) - return ref_processor.prepare_interpolated_gene_expression_series(gene, WEIGHT_BY_CELL_DENSITY = True) - - def align_single_pair_within_system(self, KEY, state_params = [0.99,0.5,0.4], zero_transition_costs=False, prohibit_case = False): - - # KEY = (gene1, gene2) - if( (KEY not in self.pairs.keys()) ): - - if( (KEY[1],KEY[0]) in self.pairs.keys() ): - KEY = (KEY[1],KEY[0]) - self.pairs[KEY] = [ self.genes[KEY[0]], self.genes[KEY[1]] ] - else: - gene1 = KEY[0] - gene2 = KEY[1] - - if(gene1 not in self.genes.keys()): - self.genes[gene1] = self.run_interpolation(gene1) - if(gene2 not in self.genes.keys()): - self.genes[gene2] = self.run_interpolation(gene2) - self.pairs[KEY] = [ self.genes[gene1], self.genes[gene2] ] - - S = self.pairs[KEY][0] - T = self.pairs[KEY][1] - - fwd_DP = orgalign.DP5(S,T, free_params = self.state_params, backward_run=False, zero_transition_costs= zero_transition_costs, prohibit_case = prohibit_case) #,mean_batch_effect=self.mean_batch_effect) - fwd_opt_cost = fwd_DP.run_optimal_alignment() - alignment_path = fwd_DP.backtrack() - fwd_DP.alignment_path = alignment_path - - landscapeObj = orgalign.AlignmentLandscape(fwd_DP, None, len(S.mean_trend), len(T.mean_trend), alignment_path, the_5_state_machine = True) - landscapeObj.collate_fwd() - - return AligmentObj(KEY, S,T, fwd_DP, None, landscapeObj) - - - def align_all_pairs_within_system(self): - - print('WINDOW_SIZE=',self.WINDOW_SIZE) - with Pool(self.n_threads) as p: - results = list(tqdm_notebook(p.imap(self.align_single_pair_within_system, self.GENE_PAIRS), total=len(self.GENE_PAIRS))) - self.results = results - - self.results_map = {} - for a in self.results: - self.results_map[a.gene] = a - - - def get_cluster_average_alignments(self, cluster_id, deterministic=True): - - cluster_alobjs = self.get_cluster_alignment_objects(cluster_id) - i = self.results[0].fwd_DP.T_len - j = self.results[0].fwd_DP.S_len - - avg_alignment = '' - tracked_path = [] - tracked_path.append([i,j]) - - while(True): - if(i==0 and j==0): - break - backtrack_states_probs = {} - backtrack_states_probs['M'] = 0 - backtrack_states_probs['W'] = 0 - backtrack_states_probs['V'] = 0 - backtrack_states_probs['D'] = 0 - backtrack_states_probs['I'] = 0 - for a in cluster_alobjs: - backtract_state = a.landscape_obj.L_matrix_states[i,j] - if(backtract_state=='0'): - backtrack_states_probs['M']+=1 - elif(backtract_state=='1'): - backtrack_states_probs['W']+=1 - elif(backtract_state=='2'): - backtrack_states_probs['V']+=1 - elif(backtract_state=='3'): - backtrack_states_probs['D']+=1 - elif(backtract_state=='4'): - backtrack_states_probs['I']+=1 - for state in backtrack_states_probs.keys(): - backtrack_states_probs[state] = backtrack_states_probs[state]/len(cluster_alobjs) - - if(deterministic): - cs = np.argmax(np.asarray(list(backtrack_states_probs.values())) ) - else: - cs = MyFunctions.sample_state(np.asarray(list(backtrack_states_probs.values()) ) ) - if(cs==0): - i = i-1 - j = j-1 - avg_alignment = 'M' + avg_alignment - elif(cs==1 or cs==3): - j= j-1 - if(cs==1): - avg_alignment = 'W' + avg_alignment - else: - avg_alignment = 'D' + avg_alignment - elif(cs==2 or cs==4): - i=i-1 - if(cs==2): - avg_alignment = 'V' + avg_alignment - else: - avg_alignment = 'I' + avg_alignment - - tracked_path.append([i,j]) - - return avg_alignment, tracked_path - - - def get_pairwise_match_count_mat(self): - mat = [] - nT_points = len(self.results[0].T.time_points) - nS_points = len(self.results[0].S.time_points) - for i in range(nT_points + 1): - mat.append(np.repeat(0.0, nS_points+1)) - - # counts of total matches between the each pair of ref and query timepoints across all alignments - for a in self.results: - matchS = a.match_points_S+1 - matchT = a.match_points_T+1 - for i in range(len(matchS)): - mat[matchT[i]][matchS[i]] = mat[matchT[i]][matchS[i]] + 1 - - return pd.DataFrame(mat) - - - def cluster_all_alignments(self, n_clusters=None, possible_dist_threshold=None, linkage_method='complete', scheme=0): - - # compute the pairwise alignment distance matrix - if(not hasattr(self, 'DistMat' )): - print('computing the Distance matrix') - DistMat = AlignmentDistMan.AlignmentDist(self).compute_alignment_ensemble_distance_matrix(scheme=scheme) - #c = sb.clustermap(DistMat,figsize=(10,30)) - self.DistMat = DistMat - if(n_clusters!=None): - gene_clusters, cluster_ids = self.cluster_alignments_v1(n_clusters=n_clusters, linkage_method= linkage_method) - else: - gene_clusters, cluster_ids = self.cluster_alignments_v2(linkage_method, possible_dist_threshold=possible_dist_threshold) - self.gene_clusters = gene_clusters - self.cluster_ids = cluster_ids - - def cluster_alignments_v1(self, n_clusters, linkage_method): - - cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='precomputed', linkage=linkage_method) - x = cluster.fit_predict(self.DistMat) - gene_clusters = orgalign.Utils().check_alignment_clusters(n_clusters, x, - self.results, n_cols=4, figsize=(10,10)) - return gene_clusters, x - - def cluster_alignments_v2(self, linkage_method, possible_dist_threshold = None): - - X = squareform(self.DistMat) - #print(X) - Z = linkage(X, linkage_method) - if(possible_dist_threshold==None): - possible_dist_threshold = np.quantile(squareform(self.DistMat),0.25) - x = fcluster(Z, possible_dist_threshold , criterion='distance') # cluster ids - n_clusters = len(np.unique(x)) - gene_clusters = orgalign.Utils().check_alignment_clusters(n_clusters, x, - self.results, n_cols=4, figsize=(10,10)) - x = x-1 # to make cluster ids 0-indexed - return gene_clusters, x - - - - def show_cluster(self, cluster_id): - - for i in range(len(self.cluster_ids)): - if(self.cluster_ids[i]==cluster_id): - print('Gene: ', self.results[i].gene) - print(self.results[i].al_visual) - self.results[i].plotTimeSeries(self, plot_cells=True) - plt.show() - print('----------------------------------------------') - - - def show_cluster_alignment_strings(self,cluster_id): - for i in range(len(self.cluster_ids)): - if(self.cluster_ids[i]==cluster_id): - print(self.results[i].alignment_str) - self.results[i].cluster_id = cluster_id - - def get_cluster_alignment_objects(self, cluster_id): - cluster_al_objects = [] - for i in range(len(self.cluster_ids)): - if(self.cluster_ids[i]==cluster_id): - #print(self.results[i].alignment_str) - self.results[i].cluster_id = cluster_id - cluster_al_objects.append(self.results[i]) - return cluster_al_objects - - def show_cluster_plots(self, cluster_id, show_alignment = False): - - temp = np.unique(self.cluster_ids == cluster_id, return_counts=True)[1][1] - n_cols = 4 - n_rows = int(np.ceil(temp/n_cols)) - fig,axs = plt.subplots(n_rows,n_cols,figsize=(20,n_rows*3)) - - k = 1 - for i in range(len(self.cluster_ids)): - if(self.cluster_ids[i]==cluster_id): - plt.subplot(n_rows, n_cols, k ) - if(show_alignment): - self.results[i].plotTimeSeriesAlignment() - else: - self.results[i].plotTimeSeries(self, plot_cells=True, plot_mean_trend=True) - plt.title(self.results[i].gene) - k = k+1 - fig.tight_layout() - n = n_cols * n_rows - i = 1 - while(k<=n): - axs.flat[-1*i].set_visible(False) - k = k+1 - i=i+1 - - - - def show_cluster_table(self): - - info = [] - for cluster_id in range(len(self.mvg_cluster_average_alignments)): - mvg_obj = self.mvg_cluster_average_alignments[cluster_id] - al_str = mvg_obj.al_visual - al_str = al_str.replace('5-state string','') - al_str = al_str.replace('Alignment index','') - al_str = al_str.replace('Reference index','') - al_str = al_str.replace('Query index','') - - n_genes = len(self.gene_clusters[cluster_id]) - if(n_genes<15): - genes = self.gene_clusters[cluster_id] - else: - genes = self.gene_clusters[cluster_id][1:7] + [' ... '] + self.gene_clusters[cluster_id][n_genes-7:n_genes] - info.append((cluster_id, n_genes, genes, mvg_obj.get_series_match_percentage()[0],mvg_obj.get_series_match_percentage()[1],mvg_obj.get_series_match_percentage()[2], al_str)) - - print(tabulate(pd.DataFrame(info), headers=['cluster_id','n_genes','gene_set','A%','S%','T%','cell-level alignment'], - tablefmt="grid",maxcolwidths=[None,None,None,30,None,None,None])) - - - - def show_ordered_alignments(self): - - return AlignmentDistMan.AlignmentDist(self).order_genes_by_alignments() - - - def show_pairwise_distance_matrix(self, al_obj): # pairwise log compression matrix - - # check compression of each matched pair - temp_mat = al_obj.fwd_DP.DP_util_matrix - compression_dist_mat = [] - for i in range(1,temp_mat.shape[0]): - row = [] - for j in range(1,temp_mat.shape[1]): - x = np.abs(temp_mat[i,j][2]) - row.append(float(x)) - compression_dist_mat.append(row) - - x = pd.DataFrame(np.log10(np.asarray(compression_dist_mat) )) - min_x = np.nanmin(np.asarray(x).flatten()) - x = x.fillna(min_x) - sb.heatmap(x, cmap='jet') - - - # separately get correlation coefficient of ref and query mean trends along the trajectory by - # first doing distributional interpolation with number of time bins and then take sliding window to compute - # pearson correlation coefficient - def get_correlation_coefficient_trend(self, gene, SLIDING_WINDOW = 10, n_bins = 50): - - # correlation coefficient trend over sliding window of 10 bins - rp = TimeSeriesPreprocessor.Prepocessor(self.ref_mat, self.ref_time, n_bins) - qp = TimeSeriesPreprocessor.Prepocessor(self.query_mat, self.query_time, n_bins) - S = rp.prepare_interpolated_gene_expression_series(gene,WEIGHT_BY_CELL_DENSITY = self.WEIGHT_BY_CELL_DENSITY) - T = qp.prepare_interpolated_gene_expression_series(gene,WEIGHT_BY_CELL_DENSITY = self.WEIGHT_BY_CELL_DENSITY) - Y1 = S.Y; Y2 = T.Y - X1 = S.X; X2 = T.X - bin_times = np.unique(X1) - correlation_coefficients = [] - for i in range(len(bin_times)): - if(i+SLIDING_WINDOW>=len(bin_times)): - break - s = [] - t = [] - for k in range(SLIDING_WINDOW): - s.append(S.mean_trend[i+k]) - t.append(T.mean_trend[i+k]) - cc = stats.pearsonr(s,t)[0] - #print('Pearson correlation: ', stats.pearsonr(s,t)[0]) - correlation_coefficients.append(cc) - return correlation_coefficients - - def get_correlation_coefficient_trend_for_all_genes(self): - - cc = [] - for gene in tqdm_notebook(self.gene_list): - pcc = self.get_correlation_coefficient_trend(gene, SLIDING_WINDOW=10) - cc.append(pcc) - df = pd.DataFrame(cc) - df.index = self.gene_list - return df - - - def get_match_stat_for_all_genes(self): - m_p = [] - m_ps = [] - m_pt = [] - for a in self.results: - m_p.append(a.get_series_match_percentage()[0]) - m_ps.append(a.get_series_match_percentage()[1]) - m_pt.append(a.get_series_match_percentage()[2]) - - df = pd.DataFrame([m_p,m_ps,m_pt,self.cluster_ids]).transpose() - df.columns = ['match %', 'match % S', 'match % T', 'cluster_id'] - return df - - - #interpolated gene expression heat matrix - def __prepare_intpl_df(self,intpl_df, intpl_time): - intpl_df = pd.DataFrame(intpl_df) - intpl_df = intpl_df.transpose() - intpl_df['time'] = intpl_time - intpl_df = intpl_df.sort_values(by='time') - intpl_df = intpl_df.iloc[:,intpl_df.columns!='time'] - intpl_df.columns = self.gene_list - df_zscore = intpl_df.apply(zscore) - return df_zscore - - def __get_zscore_expr_mat(self,expr_mat, expr_time): - expr_mat['time'] = expr_time - expr_mat = expr_mat.sort_values(by='time') - df = expr_mat - df = df.iloc[:,df.columns!='time'] - df_zscore = df.apply(zscore) - #df_zscore = df_zscore.iloc[:,df_zscore.columns!='time'] - return df_zscore - - # z normalised for plotting purposes - def __save_interpolated_and_noninterpolated_mats(self): - ref_intpl_df = [] - query_intpl_df = [] - for gene in self.gene_list: - ref_intpl_df.append(self.pairs[gene][0].Y) - query_intpl_df.append(self.pairs[gene][1].Y) - self.ref_intpl_df = self.__prepare_intpl_df(ref_intpl_df, self.pairs[gene][0].X) - self.query_intpl_df = self.__prepare_intpl_df(query_intpl_df, self.pairs[gene][1].X) - self.ref_expr_df = self.__get_zscore_expr_mat(self.ref_mat[self.gene_list], self.ref_time ) - self.query_expr_df = self.__get_zscore_expr_mat(self.query_mat[self.gene_list], self.query_time ) - #return ref_intpl_df, query_intpl_df, ref_expr_df, query_expr_df - - def __plot_comparative_heatmap(self, ref_df, query_df, cluster_id = None): - - if(cluster_id!=None): - ref_df = ref_df[self.gene_clusters[cluster_id]] - query_df = query_df[self.gene_clusters[cluster_id]] - fig, axs = plt.subplots(1,2, figsize=(10,ref_df.shape[1]*0.5)) - else: - fig, axs = plt.subplots(1,2, figsize=(10,10)) - - # plt.subplot(1,2,1) - sb.clustermap(ref_df.transpose(), xticklabels=False, vmin=-2, vmax=2, cbar=False,cmap = 'YlGnBu', col_cluster=False)#, ax=axs[0]) - # plt.subplot(1,2,2) - sb.clustermap(query_df.transpose(), xticklabels=False, vmin=-2, vmax=2,cmap = 'YlGnBu',col_cluster=False)#,ax=axs[1]) - fig.tight_layout() - - def prepare_interpolated_non_interpolated_mats(self): - self.__save_interpolated_and_noninterpolated_mats() - - def plot_comparative_heatmap_intpl(self, cluster_id = None): - if(not hasattr(self, 'ref_intpl_df' )): - self.__save_interpolated_and_noninterpolated_mats() - self.__plot_comparative_heatmap(self.ref_intpl_df, self.query_intpl_df,cluster_id=cluster_id) - - def plot_comparative_heatmap_expr(self, cluster_id = None): - if(not hasattr(self, 'ref_expr_df' )): - self.__save_interpolated_and_noninterpolated_mats() - self.__plot_comparative_heatmap(self.ref_expr_df, self.query_expr_df,cluster_id=cluster_id) diff --git a/genes2genes/MyFunctions.py b/genes2genes/MyFunctions.py index c7ff8ec..36e6f5c 100644 --- a/genes2genes/MyFunctions.py +++ b/genes2genes/MyFunctions.py @@ -1,14 +1,11 @@ import torch -import seaborn as sb -import torch.nn as nn import numpy as np -import pandas as pd -import time -import gpytorch -import matplotlib.pyplot as plt -import torch.distributions as td + torch.set_default_dtype(torch.float64) +""" +This script defines all methods required for computing mml distance between two gene expression distributions as Gaussian +""" def negative_log_likelihood(μ,σ,N,data): data = torch.tensor(data) @@ -17,36 +14,9 @@ def negative_log_likelihood(μ,σ,N,data): sum_term = torch.sum(((data - μ)/σ)**2.0)/2.0 return ((N/2.0)* torch.log(2*torch.tensor(np.pi))) + (N*torch.log(σ)) + sum_term - # print('arr sum: ',torch.sum(((data - μ)/σ)**2.0)) - # print('arr grad sum: ', torch.neg(torch.sum((data - μ)/(σ**2))) ) - reimplemented_mode = True - # Reimplementation ============================================================= - if(reimplemented_mode): - ts = time.time() - sum_term = 0.0 - #grad1_term = 0.0 - for n in range(N): - sum_term = sum_term + (((data[n] - μ)/σ)**2.0) - #grad1_term = grad1_term - ((data[n] - μ)/(σ**2)) - sum_term = sum_term/2.0 - - te = time.time() - # print('TIME: ', te-ts) - return ((N/2.0)* torch.log(2*torch.tensor(np.pi))) + (N*torch.log(σ)) + sum_term - # ================================================================================ - else: - Gaussian_dist = torch.distributions.Normal(μ,σ) - sum_term = 0.0 - for n in range(N): - sum_term = sum_term - Gaussian_dist.log_prob(torch.tensor(data[n])) - return sum_term - def compute_expected_Fisher_matrix(μ,σ,N): return torch.tensor([[N/(σ**2),0],[0,(2*N)/(σ**2)]]) # depends on σ - #### ---- expected_Fisher = compute_expected_Fisher_matrix(μ_base,σ_base,N) # compute the closed form of matrix determinant instead - -# def compute_observed_Fisher_matrix(μ,σ): -# return torch.autograd.functional.hessian(negative_log_likelihood ,(μ,σ)) + #### ---- expected_Fisher = compute_expected_Fisher_matrix(μ_base,σ_base,N) # compute the closed form of matrix determinant instead def I_prior(μ,σ): R_μ = torch.tensor(15.0) # uniform prior for mean over region R_μ @@ -98,25 +68,27 @@ def generate_random_dataset(N_datapoints, mean, variance): #print('True params: [ μ=',μ.data.numpy(), ' , σ=', σ.data.numpy(),']' ) return D,μ,σ - -def sample_state(x): - x = np.cumsum(x) - rand_num = np.random.rand(1) - # print(rand_num) - if(rand_num<=x[0]): - return 0#'M' - elif(rand_num>x[0] and rand_num<=x[1]): - return 1#'W' - elif(rand_num>x[1] and rand_num<=x[2]): - return 2#'V' - elif(rand_num>x[2] and rand_num<=x[3]): - return 3#'D' - elif(rand_num>x[3] and rand_num<=x[4]): - return 4#'I' - - + + + + + + + + + + + + + + + + + + + diff --git a/genes2genes/OrgAlign.py b/genes2genes/OrgAlign.py index 154a610..c6a215d 100644 --- a/genes2genes/OrgAlign.py +++ b/genes2genes/OrgAlign.py @@ -1,41 +1,25 @@ import torch -import time import regex import numpy as np -import pandas as pd -import gpytorch import seaborn as sb import matplotlib.pyplot as plt -import torch.distributions as td -from tqdm import tqdm -from scipy.spatial import distance -from scipy.special import softmax -from scipy.special import kl_div from . import MyFunctions from . import TimeSeriesPreprocessor -from . import MVG -import pickle -import scipy torch.set_default_dtype(torch.float64) -# M,I,D as usual -# Additional states: W(Wd) , V(Wi) (representing insert direction warps and delete direction warps) - class FiveStateMachine: + """ + This class represents a symmetric and probabilistic finite state machine with 5 alignment states (M,W,V,I,D) + to define transition probabilities between each pair of states. + """ + # M,I,D as usual + # Additional states: W(Wd) , V(Wi) (representing insert direction warps and delete direction warps) def __init__(self, P_mm, P_ii, P_mi, PROHIBIT_CASE): # ====== M STATE - #self.P_mm = P_mm/3.0 - #self.P_wm = self.P_mm - #self.P_vm = self.P_mm - #self.P_im = (1.0 - self.P_mm - self.P_wm - self.P_vm)/2.0 - #self.P_dm = self.P_im - #print(self.P_mm + self.P_wm + self.P_vm + self.P_im + self.P_dm ) - #assert(self.P_mm + self.P_wm + self.P_vm + self.P_im + self.P_dm == 1.0) - self.P_mm = P_mm k = (1.0 - self.P_mm)/4.0 self.P_wm = k @@ -49,23 +33,15 @@ def __init__(self, P_mm, P_ii, P_mi, PROHIBIT_CASE): self.P_vw = self.P_vm self.P_iw = self.P_im self.P_dw = self.P_dm - #print(self.P_ww + self.P_mw + self.P_vw + self.P_iw + self.P_dw ) - #assert(self.P_ww + self.P_mw + self.P_vw + self.P_iw + self.P_dw == 1.0) self.P_vv = self.P_mm self.P_mv = self.P_vm self.P_wv = self.P_wm self.P_iv = self.P_im self.P_dv = self.P_dm - #print(self.P_vv + self.P_mv + self.P_wv + self.P_iv + self.P_dv) - #assert(self.P_vv + self.P_mv + self.P_wv + self.P_iv + self.P_dv == 1.0) # ====== I STATE # prohibit any transition from I or D to a warp state - # self.P_ii = P_ii/2.0 # USE P_II for prohibitive case - # self.P_mi = P_mi - # self.P_wi = 0.0 - # self.P_vi = self.P_ii # USE 0 for prohibitive case if(PROHIBIT_CASE): self.P_ii = P_ii @@ -73,23 +49,13 @@ def __init__(self, P_mm, P_ii, P_mi, PROHIBIT_CASE): self.P_wi = 0.0 self.P_vi = 0.0 else: - # self.P_ii = P_ii/2.0 - # self.P_mi = P_mi - # self.P_wi = 0.0 - # self.P_vi = self.P_ii - - # NEW TEST =====+++++ - self.P_ii = P_ii # USE P_II for prohibitive case + self.P_ii = P_ii # USE P_II for prohibitive case self.P_mi = P_mi self.P_wi = 0.0 self.P_vi = P_ii # USE 0 for prohibitive case - # NEW TEST =====+++++ - - - + self.P_di = 1.0 - self.P_ii - self.P_mi - self.P_wi - self.P_vi - #print(self.P_ii + self.P_mi + self.P_wi + self.P_vi + self.P_di) - #assert(self.P_ii + self.P_mi + self.P_wi + self.P_di == 1.0) + # ====== D STATE as equivalent to I STATE self.P_md = self.P_mi; @@ -97,8 +63,6 @@ def __init__(self, P_mm, P_ii, P_mi, PROHIBIT_CASE): self.P_id = self.P_di self.P_wd = self.P_vi #self.P_wi; self.P_vd = 0.0 #self.P_vi - #print(self.P_dd + self.P_md + self.P_wd + self.P_vd + self.P_id) - #assert(self.P_dd + self.P_md + self.P_wd + self.P_vd + self.P_id == 1.0) # ===================================================== # encoding length terms @@ -240,15 +204,17 @@ def reverse(self): class DP5: + + """ + This class defines the dynamic programming algorithm and related functions + used to find the optimal alignment between two gene expression time series + """ - def __init__(self, S,T, backward_run, free_params, zero_transition_costs = False, prohibit_case=True, MVG_MODE_KL=True):#, mean_batch_effect=0.0): + def __init__(self, S,T, backward_run, free_params, zero_transition_costs = False, prohibit_case=True): self.S = S self.T = T self.S_len = len(S.data_bins) self.T_len = len(T.data_bins) - #self.mean_batch_effect = mean_batch_effect - self.MVG_MODE_KL = MVG_MODE_KL - #print('*** ', self.S_len, self.T_len) self.FSA = FiveStateMachine(free_params[0], free_params[1], free_params[2], PROHIBIT_CASE= prohibit_case) self.backward_run = backward_run if(backward_run): @@ -332,19 +298,13 @@ def init(self): self.backtrackers_I[0][j] = [np.inf,np.inf,np.inf] for i in range(1,self.T_len+1): - #if(isinstance(self.S, TimeSeriesPreprocessor.SummaryTimeSeries)): cost_D, cost_I = self.compute_cell(i-1,0, only_non_match=True) - #elif(isinstance(self.S, TimeSeriesPreprocessor.SummaryTimeSeriesMVG)): - # cost_D, cost_I = self.compute_cell_as_MVG(i-1,0, only_non_match=True) - - #self.DP_I_matrix[i,0] = self.DP_I_matrix[i-1,0] + cost_I + self.FSA.I_ii - - # [START] NEW TEST 21122022 ==== + if(i==1): - self.DP_I_matrix[i,0] = self.DP_I_matrix[i-1,0] + cost_I -np.log(ProbI)#-np.log(1/3) + self.DP_I_matrix[i,0] = self.DP_I_matrix[i-1,0] + cost_I -np.log(ProbI) else: self.DP_I_matrix[i,0] = self.DP_I_matrix[i-1,0] + cost_I + self.FSA.I_ii - # [END] NEW TEST 21122022 ==== + #self.backtrackers_I[i][0] = [i-1,0,4] if(not self.backward_run): @@ -358,21 +318,13 @@ def init(self): self.backtrackers_D[i][0] = [np.inf,np.inf,np.inf] for j in range(1,self.S_len+1): - #if(isinstance(self.S, TimeSeriesPreprocessor.SummaryTimeSeries)): cost_D, cost_I =self.compute_cell(0,j-1, only_non_match=True) - #elif(isinstance(self.S, TimeSeriesPreprocessor.SummaryTimeSeriesMVG)): - # cost_D, cost_I =self.compute_cell_as_MVG(0,j-1, only_non_match=True) - - #self.DP_D_matrix[0,j] = self.DP_D_matrix[0,j-1] + cost_D + self.FSA.I_dd - - # [START] NEW TEST 21122022 ==== + if(j==1): self.DP_D_matrix[0,j] = self.DP_D_matrix[0,j-1] + cost_D -np.log(ProbD)#-np.log(1/3) else: self.DP_D_matrix[0,j] = self.DP_D_matrix[0,j-1] + cost_D + self.FSA.I_dd - # [END] NEW TEST 21122022 ==== - #self.backtrackers_D[0][j] = [0,j-1,3] if(not self.backward_run): self.backtrackers_D[0][j] = [0,j-1,3] else: @@ -382,31 +334,19 @@ def run_optimal_alignment(self): # initial state probabilities ProbM = 0.99 - - #for i in tqdm(range(1,self.T_len+1)): + for i in range(1,self.T_len+1): for j in range(1,self.S_len+1): - #if(isinstance(self.S, TimeSeriesPreprocessor.SummaryTimeSeries)): match_len,non_match_len_D,non_match_len_I = self.compute_cell(i-1,j-1) # here we use i-1 and j-1 to correctly call the time bin to use - #elif(isinstance(self.S, TimeSeriesPreprocessor.SummaryTimeSeriesMVG)): - # match_len,non_match_len_D,non_match_len_I = self.compute_cell_as_MVG(i-1,j-1) - - + if(not self.backward_run): # filling M matrix - # temp_m = [ self.DP_M_matrix[i-1,j-1] + match_len + self.FSA.I_mm, # 0 - # self.DP_W_matrix[i-1,j-1] + match_len + self.FSA.I_mw, # 1 - # self.DP_V_matrix[i-1,j-1] + match_len + self.FSA.I_mv, # 2 - # self.DP_D_matrix[i-1,j-1] + match_len + self.FSA.I_md, # 3 - # self.DP_I_matrix[i-1,j-1] + match_len + self.FSA.I_mi # 4 - # ] - # [START] NEW TEST 21122022 ==== if(i==1 and j==1): - temp_m = [ self.DP_M_matrix[i-1,j-1] + match_len - np.log(ProbM), #-np.log(1/3), #self.FSA.I_mm, # 0 - np.inf,# self.DP_W_matrix[i-1,j-1] + match_len + np.inf, # -np.log(1/5), #+ self.FSA.I_mw, # 1 - np.inf,# self.DP_V_matrix[i-1,j-1] + match_len + np.inf, # -np.log(1/5), #+ self.FSA.I_mv, # 2 - np.inf,# self.DP_D_matrix[i-1,j-1] + match_len -np.log(1/5), #+ self.FSA.I_md, # 3 - np.inf# self.DP_I_matrix[i-1,j-1] + match_len -np.log(1/5) #+ self.FSA.I_mi # 4 + temp_m = [ self.DP_M_matrix[i-1,j-1] + match_len - np.log(ProbM), # 0 + np.inf, # 1 + np.inf, # 2 + np.inf, # 3 + np.inf # 4 ] else: temp_m = [ self.DP_M_matrix[i-1,j-1] + match_len + self.FSA.I_mm, # 0 @@ -501,10 +441,6 @@ def run_optimal_alignment(self): self.DP_D_matrix[i,j] = tot_d #+ non_match_len_D self.DP_W_matrix[i,j] = tot_w #+ match_len self.DP_V_matrix[i,j] = tot_v #+ match_len - - # print(i,j,match_len,non_match_len_I,non_match_len_D ) - # print('** ', self.DP_M_matrix[i,j],self.DP_I_matrix[i,j],self.DP_D_matrix[i,j] ) - # print('temp ', temp_m, temp_i,temp_d ) # save backtracker info self.backtrackers_M[i][j] = [i-1,j-1,min_idx_m] @@ -532,8 +468,6 @@ def run_optimal_alignment(self): return - - #def criterion2_compute_cell(self,i,j,only_non_match=False): def compute_cell(self,i,j,only_non_match=False): # Maximising the COMPRESSION ============ if(only_non_match): @@ -559,7 +493,8 @@ def compute_cell(self,i,j,only_non_match=False): null = (I_ref_model + I_refdata_g_ref_model + I_query_model + I_querydata_g_query_model)/(len(query_data)+len(ref_data)) match_compression = match_encoding_len - null - #match_compression = match_compression - self.mean_batch_effect # [POSSIBLE METHOD]constant adjustment for accounting for batch effect + #match_compression = match_compression - self.mean_batch_effect # [POSSIBLE METHOD] + # constant adjustment for accounting for batch effect non_match_encoding_len_D = 0.0 non_match_encoding_len_I = 0.0 @@ -568,54 +503,6 @@ def compute_cell(self,i,j,only_non_match=False): return match_compression.numpy(), non_match_encoding_len_D, non_match_encoding_len_I - def compute_cell_as_MVG(self,i,j,only_non_match=False): - - if(only_non_match): - return 0.0,0.0 - - ref_data = torch.tensor(np.asarray(self.S.data_bins[j]) ) - query_data = torch.tensor(np.asarray(self.T.data_bins[i]) ) - - # μ_S, C_S = MVG.compute_mml_estimates(ref_data, ref_data.shape[1], ref_data.shape[0]) - # C_S = torch.eye(ref_data.shape[1]) - # μ_T, C_T = MVG.compute_mml_estimates(query_data, query_data.shape[1], query_data.shape[0]) - # C_T = torch.eye(query_data.shape[1]) - - μ_S = self.S.mean_trends[j] - μ_T = self.T.mean_trends[i] - - if(self.MVG_MODE_KL): - return Utils().compute_KLDivBasedDist(μ_S,μ_T), 0.0,0.0 - - #return distance.euclidean(μ_S,μ_T), 0.0,0.0 - - C_S = torch.eye(ref_data.shape[1]) - C_T = torch.eye(query_data.shape[1]) - - I_ref_model, I_refdata_g_ref_model = MVG.run_dist_compute_v3(ref_data, μ_S, C_S) - I_query_model, I_querydata_g_query_model = MVG.run_dist_compute_v3(query_data,μ_T, C_T) - I_ref_model, I_querydata_g_ref_model = MVG.run_dist_compute_v3(query_data, μ_S, C_S) - I_query_model, I_refdata_g_query_model = MVG.run_dist_compute_v3(ref_data, μ_T, C_T) - - match_encoding_len1 = I_ref_model + I_querydata_g_ref_model + I_refdata_g_ref_model - match_encoding_len1 = match_encoding_len1/(len(query_data)+len(ref_data)) - match_encoding_len2 = I_query_model + I_refdata_g_query_model + I_querydata_g_query_model - match_encoding_len2 = match_encoding_len2/(len(query_data)+len(ref_data)) - match_encoding_len = (match_encoding_len1 + match_encoding_len2 )/2.0 - - null = (I_ref_model + I_refdata_g_ref_model + I_query_model + I_querydata_g_query_model)/(len(query_data)+len(ref_data)) - - #n_dimensions = self.S.data_bins[0].shape[0] - match_compression = (match_encoding_len - null) - non_match_encoding_len_D = 0.0 - non_match_encoding_len_I = 0.0 - self.DP_util_matrix[i+1,j+1] = [null,match_encoding_len,match_compression] - - assert(null>=0.0) - assert(match_encoding_len>=0.0) - - return match_compression,non_match_encoding_len_D,non_match_encoding_len_I - def _backtrack_util(self,backtracker_pointer): @@ -682,7 +569,6 @@ def backtrack(self): self.DP_M_matrix[i,j]] min_idx = last_cell_costs.index(min(last_cell_costs)) - #print('tot_msg_len_of_alignment = ', min(last_cell_costs)) if(min_idx==0): # match state = 'I' elif(min_idx==1): @@ -694,8 +580,6 @@ def backtrack(self): elif(min_idx==4): # insert state = 'M' - # self.alignment_str = state + self.alignment_str - # self._align_str_util(state) while(True): if(i==0 and j==0): break @@ -869,157 +753,9 @@ def plot_alignment_landscape(self): # pass alignment path coordinates path_x = [p[0]+0.5 for p in self.alignment_path] path_y = [p[1]+0.5 for p in self.alignment_path] ax.plot(path_y, path_x, color='black', linewidth=3, alpha=0.5, linestyle='dashed') # path plot - plt.xlabel("S",fontweight='bold') - plt.ylabel("T",fontweight='bold') - - -class Utils: - - def compute_alignment_area_diff_distance(self, A1, A2, S_len, T_len): - - # print(A1) - # print(A2) - pi = np.arange(1, S_len+T_len+1) # skew diagonal indices - A1_ = "" - for c in A1: - A1_ = A1_ + c - if(c=='M'): - A1_ = A1_ + 'X' - A2_ = "" - for c in A2: - A2_ = A2_ + c - if(c=='M'): - A2_ = A2_ + 'X' - - pi_1_k = 0 - pi_2_k = 0 - #print(0, pi_1_k , pi_2_k ) - A1_al_index = 0 - A2_al_index = 0 - absolute_dist_sum = 0.0 - for k in pi: - #print('k=',k, A1_al_index, A2_al_index) - A1_state = A1_[A1_al_index] - A2_state = A2_[A2_al_index] - if(A1_state=='I' or A1_state=='V'): - pi_1_k = pi_1_k - 1 - elif(A1_state=='D' or A1_state=='W'): - pi_1_k = pi_1_k + 1 - if(A2_state=='I' or A2_state=='V'): - pi_2_k = pi_2_k - 1 - elif(A2_state=='D' or A2_state=='W'): - pi_2_k = pi_2_k + 1 - #print(k, pi_1_k, pi_2_k) - #print(A1_state, A2_state) - absolute_dist_sum = absolute_dist_sum + np.abs(pi_1_k - pi_2_k) - #print('-----') - A1_al_index = A1_al_index + 1 - A2_al_index = A2_al_index + 1 - - return absolute_dist_sum - - def compute_chattergi_coefficient(self, y1,y2): - df = pd.DataFrame({'S':y1, 'T':y2}) - df['rankS'] = df['S'].rank() - df['rankT'] = df['T'].rank() - # sort df by the S variable first - df = df.sort_values(by='rankS') - return 1 - ((3.0 * df['rankT'].diff().abs().sum())/((len(df)**2)-1)) - - - def plot_different_alignments(self, paths, S_len, T_len, ax, mat=[]): # pass alignment path coordinates - mat=[] - # if(len(mat)==0): - for i in range(T_len+1): - mat.append(np.repeat(0,S_len+1)) - sb.heatmap(mat, square=True, cmap='viridis', ax=ax, vmin=0, vmax=0, cbar=False,xticklabels=False,yticklabels=False) - path_color = "orange" - # else: - # sb.heatmap(mat, square=True, cmap='viridis', cbar=False,xticklabels=False,yticklabels=False, vmax=1) - # path_color = "black" - - for path in paths: - path_x = [p[0]+0.5 for p in path] - path_y = [p[1]+0.5 for p in path] - ax.plot(path_y, path_x, color=path_color, linewidth=3, alpha=0.5, linestyle='dashed') # path plot - plt.xlabel("S",fontweight='bold') - plt.ylabel("T",fontweight='bold') - - - def check_alignment_clusters(self, n_clusters , cluster_ids, alignments, n_cols = 5, figsize= (10,6)): - - clusters = [] - S_len = alignments[0].fwd_DP.S_len - T_len = alignments[0].fwd_DP.T_len - - unique_cluster_ids = np.unique(cluster_ids) - n_rows = int(np.ceil(n_clusters/n_cols)) - - - fig, axs = plt.subplots(n_rows,n_cols, figsize = (20,n_rows*3)) # custom -- only for 20 clusters -- TODO change later - axs = axs.flatten() - i = 0 - k=1 - for cluster_id in range(n_clusters): - paths = [] - cluster_genes = [] - cluster_alignments = np.asarray(alignments)[cluster_ids == unique_cluster_ids[cluster_id]] - for a in cluster_alignments: - paths.append(a.fwd_DP.alignment_path) - #print(a.gene) - cluster_genes.append(a.gene);# cluster_genes.append(a.gene) - clusters.append(list(np.unique(cluster_genes)) ) - # self.plot_different_alignments(paths, S_len, T_len, ax=axs[cluster_id]) - - #### - # mat = [] - # for i in range(T_len+1): - # mat.append(np.repeat(0,S_len+1)) - # sb.heatmap(mat, square=True, cmap='viridis', ax=axs[cluster_id], vmin=0, vmax=0, cbar=False,xticklabels=False,yticklabels=False) - # path_color = "orange" - # for path in paths: - # path_x = [p[0]+0.5 for p in path] - # path_y = [p[1]+0.5 for p in path] - # axs[cluster_id].plot(path_y, path_x, color=path_color, linewidth=3, alpha=0.5, linestyle='dashed') # path plot - # plt.xlabel("S",fontweight='bold') - # plt.ylabel("T",fontweight='bold') - - self.plot_different_alignments(paths, S_len, T_len, axs[cluster_id]) - axs[cluster_id].set_title('Cluster-'+str(i) + ' | '+str(len(cluster_alignments))) - - i=i+1 - k=k+1 - - fig.tight_layout() - n = n_cols * n_rows - i = 1 - while(k<=n): - axs.flat[-1*i].set_visible(False) - k = k+1 - i=i+1 - - return clusters - - - # input: log1p gene expression vectors - def compute_KLDivBasedDist(self,x,y): - - # convert to probabilities - # x = softmax(x) - # y = softmax(y) - x = x.numpy() - y = y.numpy() - # convering backto counts+1 - x = np.exp(x) - y = np.exp(y) - x = x/np.sum(x) - y = y/np.sum(y) - - sum_term = 0.0 - for i in range(len(x)): - sum_term += x[i]*(np.log(x[i]) - np.log(y[i])) - # print(x,y,' ---------- ',sum_term) - return sum_term + plt.xlabel("Reference",fontweight='bold') + plt.ylabel("Query",fontweight='bold') + plt.title('Alignment cost landscape') diff --git a/genes2genes/PathwayAnalyser.py b/genes2genes/PathwayAnalyser.py new file mode 100644 index 0000000..c539455 --- /dev/null +++ b/genes2genes/PathwayAnalyser.py @@ -0,0 +1,129 @@ +import gseapy as gp +from gseapy import barplot, dotplot +import numpy as np +import pandas as pd +from tqdm import tqdm +from tabulate import tabulate +from gsea_api.molecular_signatures_db import MolecularSignaturesDatabase + +from . import ClusterUtils +from . import VisualUtils + +""" +This script defines Wrappers for GSEAPY enrichr and other functions related to analysing pathway gene sets. +""" + +def run_overrepresentation_analysis(gene_set, TARGET_GENESETS=['MSigDB_Hallmark_2020','KEGG_2021_Human']): + enr = gp.enrichr(gene_list=gene_set, + gene_sets=TARGET_GENESETS, + organism='human', + outdir=None, + ) + df = enr.results[enr.results['Adjusted P-value']<0.05] + if(df.shape[0]==0): + return df + df = df.sort_values('Adjusted P-value') + df['-log10 Adjusted P-value'] = [-np.log10(q) for q in df['Adjusted P-value']] + max_q = max(df['-log10 Adjusted P-value'][df['-log10 Adjusted P-value']!=np.inf]) + #df.columns = ['Gene_set']+list(df.columns[1:len(df.columns)]) + qvals = [] + for q in df['-log10 Adjusted P-value']: + if(q==np.inf): + q = -np.log10(0.00000000001) # NOTE: For -log10(p=0.0) we replace p with a very small p-val to avoid inf + qvals.append(q) + df['-log10 FDR q-val'] = qvals + df = df.sort_values('Adjusted P-value',ascending=True) + return df + +def plot_overrep_results(df): + height = df.shape[0]*(1/(np.log2(df.shape[0])+1)) + ax = barplot(df, + column="Adjusted P-value", + group='Gene_set', # set group, so you could do a multi-sample/library comparsion + size=10, + top_term=20, + figsize=(5,height), + color=['darkred', 'darkblue'], # set colors for group + ) + +def plot_gsea_dotplot(df, size=100, figsize=(3,4), n_top_terms = 5): + ax = dotplot(df, + column="P-value", + x='-log10 Adjusted P-value', # set x axis, so you could do a multi-sample/library comparsion + size=size, + top_term=n_top_terms, + figsize=figsize, + xticklabels_rot=45, # rotate xtick labels + show_ring=False, # set to False to revmove outer ring + marker='o', + ) + +def run_cluster_overrepresentation_analysis(aligner): + + overrep_cluster_results = {} + cluster_overrepresentation_results = [] + + for cluster_id in tqdm(range(len(aligner.gene_clusters))): + df = run_overrepresentation_analysis(aligner.gene_clusters[cluster_id]) + if(df.shape[0]==0): + continue + n_genes = len(aligner.gene_clusters[cluster_id]) + pathways = list(df.Term) + pathway_specific_genes = list(df.Genes) + sources = [str(s).split('_')[0] for s in list(df.Gene_set)] + + if(n_genes<15): + genes = aligner.gene_clusters[cluster_id] + else: + genes = aligner.gene_clusters[cluster_id][1:7] + [' ... '] + aligner.gene_clusters[cluster_id][n_genes-7:n_genes] + + cluster_overrepresentation_results.append([cluster_id,n_genes,genes,pathways, pathway_specific_genes, sources]) + overrep_cluster_results[cluster_id] = df + + results= pd.DataFrame(cluster_overrepresentation_results) + print(tabulate(results, headers=['cluster_id','n_genes', 'Cluster genes', 'Pathways','Pathway genes','Source'],tablefmt="grid",maxcolwidths=[3, 3, 3,30,40,40,10])) + + +def get_pathway_alignment_stat(aligner, GENE_LIST, pathway_name, cluster=False, FIGSIZE = (14,7)): + + print('Gene set: ======= ', pathway_name) + perct_A = [] + perct_S = [] + perct_T = [] + for gene in GENE_LIST: + series_match_percent = aligner.results_map[gene].get_series_match_percentage() + perct_A.append(series_match_percent[0]) + perct_S.append(series_match_percent[1]) + perct_T.append(series_match_percent[2]) + + print('mean matched percentage: ', round(np.mean(perct_A),2),'%' ) + #print('mean matched percentage wrt ref: ',round(np.mean(perct_S),2),'%' ) + #print('mean matched percentage wrt query: ', round(np.mean(perct_T),2),'%' ) + average_alignment, alignment_path = ClusterUtils.get_cluster_average_alignments(aligner, GENE_LIST) + mat = ClusterUtils.get_pairwise_match_count_mat(aligner,GENE_LIST ) + print('Average Alignment: ', VisualUtils.color_al_str(average_alignment), '(cell-level)') + print('- Plotting average alignment path') + VisualUtils.plot_alignment_path_on_given_matrix(paths = [alignment_path], mat=mat) + VisualUtils.plot_mean_trend_heatmaps(aligner,GENE_LIST, pathway_name,cluster=cluster, FIGSIZE=FIGSIZE) + + +class InterestingGeneSets: + + def __init__(self, MSIGDB_PATH, version): + self.SETS = {} + self.dbs = {} + self.msigdb = MolecularSignaturesDatabase(MSIGDB_PATH , version=version) + self.dbs['kegg'] = self.msigdb.load('c2.cp.kegg', 'symbols') + self.dbs['hallmark'] = self.msigdb.load('h.all', 'symbols') + #self.dbs['gobp'] = self.msigdb.load('c5.go.bp', 'symbols') + #self.dbs['gocc'] = self.msigdb.load('c5.go.cc', 'symbols') + #self.dbs['reac'] = self.msigdb.load('c2.cp.reactome', 'symbols') + + def add_new_set_from_msigdb(self, db_name, dbsetname, avail_genes, usersetname): + self.SETS[usersetname] = np.intersect1d(list(self.dbs[db_name].gene_sets_by_name[dbsetname].genes), avail_genes) + + def add_new_set(self, geneset, usersetname, avail_genes): + geneset = np.asarray(geneset) + self.SETS[usersetname] = geneset[np.where([g in avail_genes for g in geneset])] + + diff --git a/genes2genes/PathwayAnalyserV2.py b/genes2genes/PathwayAnalyserV2.py deleted file mode 100644 index 12265f0..0000000 --- a/genes2genes/PathwayAnalyserV2.py +++ /dev/null @@ -1,294 +0,0 @@ -import gseapy as gp -from gseapy import barplot, dotplot -import anndata -import time -import numpy as np -import pandas as pd -import scanpy as sc -import seaborn as sb -import scipy.stats as stats -import matplotlib.pyplot as plt -import os,sys,inspect -import pickle -from tqdm import tqdm -from tabulate import tabulate -from gsea_api.molecular_signatures_db import MolecularSignaturesDatabase -from adjustText import adjust_text -from mpl_toolkits.axes_grid1.inset_locator import inset_axes -from scipy.stats import zscore - -from . import ClusterUtils -from . import VisualUtils - - -def run_overrepresentation_analysis(gene_set, TARGET_GENESETS=['MSigDB_Hallmark_2020','KEGG_2021_Human']): - enr = gp.enrichr(gene_list=gene_set, - gene_sets=TARGET_GENESETS, - organism='human', - outdir=None, - ) - df = enr.results[enr.results['Adjusted P-value']<0.05] - if(df.shape[0]==0): - return df - df = df.sort_values('Adjusted P-value') - df['-log10 Adjusted P-value'] = [-np.log10(q) for q in df['Adjusted P-value']] - max_q = max(df['-log10 Adjusted P-value'][df['-log10 Adjusted P-value']!=np.inf]) - #df.columns = ['Gene_set']+list(df.columns[1:len(df.columns)]) - qvals = [] - for q in df['-log10 Adjusted P-value']: - if(q==np.inf): - q = -np.log10(0.00000000001) # NOTE: For -log10(p=0.0) we replace p with a very small p-val to avoid inf - qvals.append(q) - df['-log10 FDR q-val'] = qvals - df = df.sort_values('Adjusted P-value',ascending=True) - return df - -def plot_overrep_results(df): - height = df.shape[0]*(1/(np.log2(df.shape[0])+1)) - ax = barplot(df, - column="Adjusted P-value", - group='Gene_set', # set group, so you could do a multi-sample/library comparsion - size=10, - top_term=20, - figsize=(5,height), - color=['darkred', 'darkblue'], # set colors for group - ) - -def plot_gsea_dotplot(df, size=100, figsize=(3,4), n_top_terms = 5): - ax = dotplot(df, - column="P-value", - x='-log10 Adjusted P-value', # set x axis, so you could do a multi-sample/library comparsion - size=size, - top_term=n_top_terms, - figsize=figsize, - xticklabels_rot=45, # rotate xtick labels - show_ring=False, # set to False to revmove outer ring - marker='o', - ) - -def run_cluster_overrepresentation_analysis(aligner): - - overrep_cluster_results = {} - cluster_overrepresentation_results = [] - - for cluster_id in tqdm(range(len(aligner.gene_clusters))): - df = run_overrepresentation_analysis(aligner.gene_clusters[cluster_id]) - if(df.shape[0]==0): - continue - n_genes = len(aligner.gene_clusters[cluster_id]) - pathways = list(df.Term) - pathway_specific_genes = list(df.Genes) - sources = [str(s).split('_')[0] for s in list(df.Gene_set)] - - if(n_genes<15): - genes = aligner.gene_clusters[cluster_id] - else: - genes = aligner.gene_clusters[cluster_id][1:7] + [' ... '] + aligner.gene_clusters[cluster_id][n_genes-7:n_genes] - - cluster_overrepresentation_results.append([cluster_id,n_genes,genes,pathways, pathway_specific_genes, sources]) - overrep_cluster_results[cluster_id] = df - - results= pd.DataFrame(cluster_overrepresentation_results) - print(tabulate(results, headers=['cluster_id','n_genes', 'Cluster genes', 'Pathways','Pathway genes','Source'],tablefmt="grid",maxcolwidths=[3, 3, 3,30,40,40,10])) - - - - -def get_pathway_alignment_stat(aligner, GENE_LIST, pathway_name, cluster=False, FIGSIZE = (14,7)): - - # print('PATHWAY ======= ',pathway_name) - # GENE_LIST = IGS.SETS[pathway_name] - perct_A = [] - perct_S = [] - perct_T = [] - for gene in GENE_LIST: - series_match_percent = aligner.results_map[gene].get_series_match_percentage() - perct_A.append(series_match_percent[0]) - perct_S.append(series_match_percent[1]) - perct_T.append(series_match_percent[2]) - - print('mean matched percentage: ', round(np.mean(perct_A),2),'%' ) - print('mean matched percentage wrt ref: ',round(np.mean(perct_S),2),'%' ) - print('mean matched percentage wrt query: ', round(np.mean(perct_T),2),'%' ) - average_alignment, alignment_path = ClusterUtils.get_cluster_average_alignments(aligner, GENE_LIST) - mat = ClusterUtils.get_pairwise_match_count_mat(aligner,GENE_LIST ) - print('Average Alignment: ', average_alignment) - VisualUtils.plot_alignment_path_on_given_matrix(paths = [alignment_path], mat=mat) #AAAAAAAA - # plt.xlabel('Ref pseudotime') - # plt.ylabel('Organoid pseudotime') - # plt.savefig('Ref_organoid_'+pathway_name+'_overall_alignment.png') - plot_mean_trend_heatmaps(aligner,GENE_LIST, pathway_name,cluster=cluster, FIGSIZE=FIGSIZE) - -def plot_DE_genes(pathway_name): - PATHWAY_SET = IGS.SETS[pathway] - ax=sb.scatterplot(x['l2fc'],x['sim']*100,s=50, legend=False, hue =x['sim'] ,palette=sb.diverging_palette(15, 133, s=50, as_cmap=True),edgecolor='k',linewidth=0.3) - plt.yticks(fontsize=12) - plt.xticks(fontsize=12) - plt.ylabel('Alignment Similarity %', fontsize=12, fontweight='bold') - plt.xlabel('L2FC mean expression', fontsize = 12, fontweight='bold') - plt.grid(False) - plt.tight_layout() - - TEXTS = [] - for label, a, b in zip(x.index, x['l2fc'],x['sim']*100): - if(label in PATHWAY_SET):# and b<=50): - TEXTS.append(ax.text(a, b, label, color='white', fontsize=9, fontweight='bold',bbox=dict(boxstyle='round,pad=0.1', fc='black', alpha=0.75))) - adjust_text(TEXTS, expand_points=(2, 2),arrowprops=dict(arrowstyle="->", color='black', lw=2)) - plt.title(pathway_name,fontweight='bold', fontsize=15) - -# smoothened/interpolated mean trends + Z normalisation -def plot_mean_trend_heatmaps(aligner, GENE_LIST, pathway_name, cluster=False, FIGSIZE=(14,7)): - S_mat = [] - T_mat = [] - S_zmat = [] - T_zmat = [] - - for gene in GENE_LIST: - - fS = pd.DataFrame([aligner.results_map[gene].S.mean_trend, np.repeat('Ref', len(aligner.results_map[gene].S.mean_trend))]).transpose() - fT = pd.DataFrame([aligner.results_map[gene].T.mean_trend, np.repeat('Organoid', len(aligner.results_map[gene].T.mean_trend))]).transpose() - f = pd.concat([fS,fT]) - f[0] = np.asarray(f[0], dtype=np.float64) - from scipy.stats import zscore - f['z_normalised'] = zscore(f[0]) - S_mat.append(np.asarray(f[f[1]=='Ref'][0])) - T_mat.append(np.asarray(f[f[1]=='Organoid'][0])) - S_zmat.append(np.asarray(f[f[1]=='Ref']['z_normalised'])) - T_zmat.append(np.asarray(f[f[1]=='Organoid']['z_normalised'])) - S_mat = pd.DataFrame(S_mat) - T_mat = pd.DataFrame(T_mat) - S_zmat = pd.DataFrame(S_zmat) - T_zmat = pd.DataFrame(T_zmat) - - S_mat.index = GENE_LIST #IGS.SETS[pathway_name] - T_mat.index = GENE_LIST #IGS.SETS[pathway_name] - S_zmat.index = GENE_LIST#IGS.SETS[pathway_name] - T_zmat.index = GENE_LIST#IGS.SETS[pathway_name] - - # print('Interpolated mean trends') - # plot_heatmaps(S_mat, T_mat, pathway_name, cluster=cluster) - print('Z-normalised Interpolated mean trends') - plot_heatmaps(S_zmat, T_zmat, GENE_LIST, pathway_name,cluster=cluster, FIGSIZE=FIGSIZE) - -def plot_heatmaps(mat_ref,mat_query,GENE_LIST, pathway_name, cluster=False, FIGSIZE=(14,7)): - - if(cluster): - g=sb.clustermap(mat_ref, figsize=(0.4,0.4), col_cluster=False, cbar_pos=None) - gene_order = g.dendrogram_row.reordered_ind - df = pd.DataFrame(g.data2d) - df.index = GENE_LIST[gene_order] - else: - df=mat_ref - plt.close() - - plt.subplots(1,2,figsize=FIGSIZE) #8,14/7 ****************************************************** - max_val = np.max([np.max(mat_ref),np.max(mat_query)]) - min_val = np.min([np.min(mat_ref),np.min(mat_query)]) - plt.subplot(1,2,1) - ax=sb.heatmap(df, vmax=max_val,vmin=min_val, cbar_kws = dict(use_gridspec=False,location="top")) - plt.title('Reference') - ax.yaxis.set_label_position("left") - for tick in ax.get_yticklabels(): - tick.set_rotation(360) - plt.subplot(1,2,2) - if(cluster): - mat_query = mat_query.loc[GENE_LIST[gene_order]] - ax = sb.heatmap(mat_query,vmax=max_val, vmin=min_val,cbar_kws = dict(use_gridspec=False,location="top"), yticklabels=False) - plt.title('Query') - plt.savefig(pathway_name+'_heatmap.png', bbox_inches='tight') - plt.show() - - - -class InterestingGeneSets: - - def __init__(self, MSIGDB_PATH ='../OrgAlign/msigdb/' ): - self.SETS = {} - self.dbs = {} - self.msigdb = MolecularSignaturesDatabase(MSIGDB_PATH , version='7.5.1') - self.dbs['kegg'] = self.msigdb.load('c2.cp.kegg', 'symbols') - self.dbs['hallmark'] = self.msigdb.load('h.all', 'symbols') - #self.dbs['gobp'] = self.msigdb.load('c5.go.bp', 'symbols') - #self.dbs['gocc'] = self.msigdb.load('c5.go.cc', 'symbols') - #self.dbs['reac'] = self.msigdb.load('c2.cp.reactome', 'symbols') - - def add_new_set_from_msigdb(self, db_name, dbsetname, avail_genes, usersetname): - self.SETS[usersetname] = np.intersect1d(list(self.dbs[db_name].gene_sets_by_name[dbsetname].genes), avail_genes) - - def add_new_set(self, geneset, usersetname, avail_genes): - geneset = np.asarray(geneset) - #print(geneset) - self.SETS[usersetname] = geneset[np.where([g in avail_genes for g in geneset])] - - - - - - - - - - - - - - - - - - - -# ATTIC - -def get_ranked_genelist(aligner): - #print('make ranked gene list') - matched_percentages = {} - for al_obj in aligner.results: - matched_percentages[al_obj.gene] = (al_obj.get_series_match_percentage()[0]/100 ) - x = sorted(matched_percentages.items(), key=lambda x: x[1], reverse=False) - x = pd.DataFrame(x) - x.columns = ['Gene','Alignment_Percentage'] - x = x.set_index('Gene') - return x - -# get the top k DE genes (k decided on matched percentage threshold) -def topkDE(aligner, DIFF_THRESHOLD=0.5): - ranked_list = get_ranked_genelist(aligner) - top_k = np.unique(ranked_list ['Alignment_Percentage'] < DIFF_THRESHOLD , return_counts=True)[1][1] - print(top_k, ' # of DE genes to check') - clusters = pd.DataFrame([ranked_list[0:top_k].index, np.repeat(0,top_k)]).transpose() - clusters.columns = ['Gene','ClusterID'] - clusters = clusters.set_index('Gene') - return list(clusters.index), ranked_list - -def run_GSEA_on_rankedlist(rankedDEgenes): - pre_res = gp.prerank(rnk=rankedDEgenes, # or rnk = rnk, - gene_sets=['MSigDB_Hallmark_2020','KEGG_2021_Human','Reactome_2022','GO_Biological_Process_2021'],#targets5, - threads=4, - min_size=5, - max_size=1000, - permutation_num=1000, - outdir=None, - seed=6, - verbose=True, - ) - pre_res.res2d[pre_res.res2d['FDR q-val']<0.05] - - df = pre_res.res2d[pre_res.res2d['FDR q-val']<0.05] - df['Name'] = [str(t).split('_')[0] for t in df.Term] - df = df.sort_values('FDR q-val') - df['-log10 FDR q-val'] = [-np.log10(q) for q in df['FDR q-val']] - max_q = max(df['-log10 FDR q-val'][df['-log10 FDR q-val']!=np.inf]) - #df.columns = ['Gene_set']+list(df.columns[1:len(df.columns)]) - qvals = [] - for q in df['-log10 FDR q-val']: - if(q==np.inf): - q = -np.log10(0.00000000001) # NOTE: For -log10(p=0.0) we replace p with a very small p-val to avoid inf - qvals.append(q) - df['-log10 FDR q-val'] = qvals - #df['Name'] = df['Gene_set'] - sb.set(rc={'figure.figsize':(10,15)}) - sb.factorplot(y='Term', x='-log10 FDR q-val', data=df, kind='bar', hue='Name',dodge=False) - plt.xlim([0,max_q]) - - return pre_res diff --git a/genes2genes/SimulationExperimentAnalyser.py b/genes2genes/SimulationExperimentAnalyser.py deleted file mode 100644 index 3a44f24..0000000 --- a/genes2genes/SimulationExperimentAnalyser.py +++ /dev/null @@ -1,452 +0,0 @@ -# NEW ALIGNMENT ACCURACY STATISTIC CODE -import numpy as np -import regex as re -import pandas as pd -import seaborn as sb -import matplotlib.pyplot as plt -from tqdm import tqdm - -from . import ClusterUtils - -class SimulationExperimenter: - - def __init__(self, adata_ref, adata_query, aligner_all, CP_25, CP_05, CP_75, pattern_map): - self.adata_ref = adata_ref - self.adata_query = adata_query - self.aligner_all = aligner_all - self.CP_25 = CP_25 - self.CP_05 = CP_05 - self.CP_75 = CP_75 - self.pattern_map = pattern_map - - def compute_match_statistics(self, existing_method_df, existing_method=True, print_stat=True): - - group_alignment_strings, gene_group = self.get_group_alignment_strings('AllMatch', existing_method, existing_method_df) - #print(gene_group) - n_false_mismatches = 0 - n_false_mismatched_alignments = 0 - alignment_state_count = 0 - false_mismatch_counts = [] - for alignment_string in group_alignment_strings: - mismatch_count = alignment_string.count('I') + alignment_string.count('D') - if(mismatch_count>0): - n_false_mismatched_alignments+=1 - n_false_mismatches += mismatch_count - false_mismatch_counts.append(mismatch_count) - alignment_state_count += len(alignment_string) - - if(print_stat): - print('Number of false mismatched alignments ', n_false_mismatched_alignments, ' = ',n_false_mismatched_alignments*100 / len(group_alignment_strings), '%' ) - print('Number of false mismatches ', n_false_mismatches, ' = ', n_false_mismatches*100 / alignment_state_count, '%' ) - print('mean false mismatch count for an alignment = ', np.mean(false_mismatch_counts)) - print('*****') - return n_false_mismatched_alignments*100 / len(group_alignment_strings) - - def get_group_alignment_strings(self, pattern, existing_method=False, existing_method_df = None): - - gene_group = list(self.adata_ref.var_names[self.adata_ref.var.gene_pattern == pattern]) - #print(gene_group) - if(existing_method): - gene_group =np.intersect1d(list(existing_method_df.index), gene_group) - df = existing_method_df.loc[gene_group] - group_alignment_strings = [] - for i in range(df.shape[0]): - group_alignment_strings.append(df['alignment_string'][i]) - group_alignment_strings = [a.replace(' ','') for a in group_alignment_strings] - else: - group_alignment_strings = [] - for g in gene_group: - group_alignment_strings.append(self.aligner_all.results_map[g].alignment_str) - - return group_alignment_strings, gene_group - - - def get_accuracy_stat_divergence(self, alignment_str, divergence_mode = True): - - if(not divergence_mode): - alignment_str = alignment_str[::-1] - - expected_pattern = '^[M/W/V]+[I/D]+$' - swapped_pattern = '^[I/D]+[M/W/V]+$' - - false_start_mismatch_len = 0 - false_end_match_len = 0 - n_matches = 0 - n_false_intermediate_mismatches = 0 - end_mismatch_len = 0 - status = '' - - if(alignment_str.count('M') + alignment_str.count('W') + alignment_str.count('V') == 0): - status = 'complete_mismatch' - false_start_mismatch_len = -1 - false_end_match_len = -1 - n_false_intermediate_mismatches = -1 - n_matches = -1 - - elif(alignment_str.count('I') + alignment_str.count('D')== 0 ): - status = 'complete_match' - false_start_mismatch_len = -1 - false_end_match_len = -1 - n_false_intermediate_mismatches = -1 - n_matches = -1 - else: - res = re.findall(expected_pattern, alignment_str) - res_alt = re.findall(swapped_pattern, alignment_str) - if(len(res)==1): - status = 'expected_pattern' - n_matches = alignment_str.count('M') + alignment_str.count('W') + alignment_str.count('V') - end_mismatch_len = alignment_str.count('I') + alignment_str.count('D') - elif(len(res_alt)==1): - status = 'swapped_pattern' - false_start_mismatch_len = -1 - false_end_match_len = -1 - n_false_intermediate_mismatches = -1 - n_matches = -1 - end_mismatch_len = -1 - else: - status = 'complex_pattern' - - # check for false start mismatches - false_start_mismatch_len = 0 - c=0 - while(alignment_str[c] in ['I','D']): - false_start_mismatch_len+=1 - c+=1 - - false_end_match_len = 0 - c=len(alignment_str)-1 - while(alignment_str[c] in ['M','W','V']): - false_end_match_len +=1 - c-=1 - - # find intermediate number of false mismatches within matched region - # by first extracting the region between the first match and the last match - match_regions = [] - for m in re.finditer('[M/V/W]+', alignment_str): - if(m.start(0) != m.end(0)): - match_regions.append([m.start(0), m.end(0)-1]) - - if(false_end_match_len==0): - first_match_region = match_regions[0] - last_match_region = match_regions[len(match_regions)-1] - else: - first_match_region = match_regions[0] - last_match_region = match_regions[len(match_regions)-2] - - intermediate_str = alignment_str[first_match_region[0]: last_match_region[1]+1] - - n_matches = intermediate_str.count('M') + intermediate_str.count('W') + intermediate_str.count('V') - n_false_intermediate_mismatches = intermediate_str.count('I') + intermediate_str.count('D') - - - indel_regions = [] - for m in re.finditer('[I/D]+', alignment_str): - if(m.start(0) != m.end(0)): - indel_regions.append([m.start(0), m.end(0)-1]) - last_indel_region = indel_regions[len(indel_regions)-1] - end_mismatch_len = len(alignment_str[last_indel_region[0]:last_indel_region[1]+1]) - - # main statistics - #print('False start mismatch len: ', false_start_mismatch_len) - #print('False end match len: ', false_end_match_len) - #print('[start] Match length', n_matches) - #print('end mismatch length') - #print('# of false intermediate mismatches', n_false_intermediate_mismatches) - #print('End mismatch end', end_mismatch_len) - - return status, false_start_mismatch_len, false_end_match_len, n_matches, n_false_intermediate_mismatches, end_mismatch_len - - - def plot_validation_stat(self, accuracy_results, n_bins = 15, divergence=True): - - # plt.subplots(1,3, figsize=(15,3)) - # plt.subplot(1,3,1) - # sb.heatmap(CP_25, square=True, cmap='jet') - # plt.subplot(1,3,2) - # sb.heatmap(CP_05, square=True, cmap='jet') - # plt.subplot(1,3,3) - # sb.heatmap(CP_75, square=True, cmap='jet') - - #plt.savefig('changepoint_kernels.pdf') - - - # plt.subplots(1,3, figsize=(15,3)) - # plt.subplot(1,3,1) - a = pd.DataFrame(self.CP_25 > 0.01) - # sb.heatmap(a, square=True) - approx_bifurcation_start_point_25 = np.min(np.where(a.iloc[299]==True)) - #approx_bifurcation_start_point_25 = np.round((0.5* approx_bifurcation_start_point_25/150) ,2) - approx_bifurcation_start_point_25 = np.round((approx_bifurcation_start_point_25/300) ,2) - - - # plt.subplot(1,3,2) - a = pd.DataFrame(self.CP_05 > 0.01) - # sb.heatmap(a, square=True) - approx_bifurcation_start_point_05 = np.min(np.where(a.iloc[299]==True)) - #approx_bifurcation_start_point_05 = np.round((0.5* approx_bifurcation_start_point_05/150),2) - approx_bifurcation_start_point_05 = np.round((approx_bifurcation_start_point_05/300),2) - - - # plt.subplot(1,3,3) - a = pd.DataFrame(self.CP_75 > 0.01) - # sb.heatmap(a, square=True) - approx_bifurcation_start_point_75 = np.min(np.where(a.iloc[299]==True)) - #approx_bifurcation_start_point_75 = np.round((0.5* approx_bifurcation_start_point_75/150),2) - approx_bifurcation_start_point_75 = np.round((approx_bifurcation_start_point_75/300),2) - - if(divergence): - expected_len_25 = [n_bins*approx_bifurcation_start_point_25, n_bins*0.25] - expected_len_05 = [n_bins*approx_bifurcation_start_point_05, n_bins*0.5] - expected_len_75 = [n_bins*approx_bifurcation_start_point_75, n_bins*0.75] - - expected_mismatch_len_25 = [n_bins*(1-0.25),n_bins*(1-approx_bifurcation_start_point_25)] - expected_mismatch_len_05 = [n_bins*(1-0.5), n_bins*(1-approx_bifurcation_start_point_05)] - expected_mismatch_len_75 = [n_bins*(1-0.75),n_bins*(1-approx_bifurcation_start_point_75)] - - y1 = 'start_match_len' - y2 = 'end_mismatch_len' - y3 = 'false_start_mismatch_len' - y4 = 'n_false_intermediate_mismatches' - - y1_ = 'Start match length' - y2_ = 'End mismatch length' - y3_ = 'False start mismatch length' - y4_ = 'Number of false intermediate mismatches' - - print('Approx. bifurcation start i for cp=0.25 = ', approx_bifurcation_start_point_25 ) - print('Approx. bifurcation start i for cp=0.5 = ', approx_bifurcation_start_point_05 ) - print('Approx. bifurcation start i for cp=0.75 = ', approx_bifurcation_start_point_75 ) - - # divegence - df_025 = accuracy_results['Divergence_025'] - df_05 = accuracy_results['Divergence_05'] - df_075 = accuracy_results['Divergence_075'] - else: - expected_len_25 = [n_bins*(approx_bifurcation_start_point_75),n_bins*0.75] - expected_len_75 = [n_bins*(approx_bifurcation_start_point_25),n_bins*0.25] - expected_len_05 = [n_bins*(approx_bifurcation_start_point_05),n_bins*0.5] - - expected_mismatch_len_25 = [n_bins*(1-0.75),n_bins*(1-approx_bifurcation_start_point_75)] - expected_mismatch_len_75 = [n_bins*(1-0.25), n_bins*(1-approx_bifurcation_start_point_25)] - expected_mismatch_len_05 = [n_bins*(1-0.5), n_bins*(1-approx_bifurcation_start_point_05)] - - y1='end_match_len' - y2= 'start_mismatch_len' - y3='false_end_mismatch_len' - y4 = 'n_false_intermediate_mismatches' - - y1_='End match length' - y2_= 'Start mismatch length' - y3_='False end mismatch length' - y4_ = 'Number of false intermediate mismatches' - - print('Approx. convergent start i for cp=0.25 = ', 1-approx_bifurcation_start_point_75 ) - print('Approx. convergent start i for cp=0.5 = ', 1-approx_bifurcation_start_point_05 ) - print('Approx. convergent start i for cp=0.75 = ', 1-approx_bifurcation_start_point_25 ) - - # divegence - df_025 = accuracy_results['Convergence_025'] - df_05 = accuracy_results['Convergence_05'] - df_075 = accuracy_results['Convergence_075'] - - print('Expected match len for cp=0.25 = ', expected_len_25 ) - print('Expected match len for cp=0.5 = ', expected_len_05 ) - print('Expected match len for cp=0.75 = ', expected_len_75 ) - - print('Expected mismatch len for cp=0.25 = ', expected_mismatch_len_25 ) - print('Expected mismatch len for cp=0.5 = ', expected_mismatch_len_05 ) - print('Expected mismatch len for cp=0.75 = ', expected_mismatch_len_75 ) - - df_075['BF_approx'] = np.repeat('0.75', len(df_075)) - df_025['BF_approx'] = np.repeat('0.25', len(df_025)) - df_05['BF_approx'] = np.repeat('0.5', len(df_05)) - df = pd.concat( [df_025, df_05, df_075]) - df = df[df.status!='complete_mismatch'] - df = df[df.status!='swapped_pattern'] - df = df[df.status!='complete_match'] - - # get the max mismatch length (across Is and Ds segments) - mismatch_regions = [] - for a in df['alignment_str']: - temp_reg = re.findall('[I/D]+',a) - if(not divergence): - mismatch_regions.append(temp_reg[0]) # first mismatch region - else: - mismatch_regions.append(temp_reg[len(temp_reg)-1]) # last mismatch region - mismatch_lengths = [] - for a in mismatch_regions: - mismatch_lengths.append(np.max([a.count('I'),a.count('D')])) - - if(divergence): # because the indel length will always be twice the expected length (# of Is == # of Ds) - df['end_mismatch_len'] = mismatch_lengths - else: - df['start_mismatch_len'] = mismatch_lengths - - plt.subplots(1,4, figsize=(15,4)) - plt.subplot(1,4,1) - g = sb.violinplot(data=df, y = y1, x='BF_approx', cut=0) - plt.xlabel('Approx bifurcation point') - plt.title(y1_) - plt.ylim([0,18]) - g.axhspan(expected_len_25[0], expected_len_25[1], xmin=0, xmax=0.35, alpha=0.2) - g.axhspan(expected_len_05[0], expected_len_05[1], xmin=0.35, xmax=0.65,facecolor='orange', alpha=0.2) - g.axhspan(expected_len_75[0], expected_len_75[1], xmin=0.65, xmax=1.0,facecolor='green', alpha=0.2) - plt.ylabel(y1_) - - plt.subplot(1,4,2) - g = sb.violinplot(data=df, y = y2, x='BF_approx', cut=0) - plt.xlabel('Approx bifurcation point') - plt.ylabel('') - plt.ylim([0,18]) - plt.title(y2_) - g.axhspan(expected_mismatch_len_25[0], expected_mismatch_len_25[1], xmin=0, xmax=0.35,alpha=0.2) - g.axhspan(expected_mismatch_len_05[0], expected_mismatch_len_05[1],xmin=0.35, xmax=0.65,facecolor='orange', alpha=0.2) - g.axhspan(expected_mismatch_len_75[0], expected_mismatch_len_75[1],xmin=0.65, xmax=1.0,facecolor='green', alpha=0.2) - plt.ylabel(y2_) - - plt.subplot(1,4,3) - sb.violinplot(data=df, y = y3, x='BF_approx', cut=0) - plt.xlabel('Approx bifurcation point') - plt.ylabel('') - plt.ylim([0,18]) - plt.title(y3_) - plt.ylabel(y3_) - - plt.subplot(1,4,4) - sb.violinplot(data=df, y = y4, x='BF_approx', cut=0) - plt.xlabel('Approx bifurcation point') - plt.ylabel('') - plt.ylim([0,18]) - plt.title(y4_) - plt.ylabel(y4_) - - plt.tight_layout() - - return df - - - def compute_divergence_convergence_statistics(self, existing_method = False, tr_df = None, print_stat=True): - - divcov_alignment_accuracy_results = {} - - for PATTERN in [ 'Convergence_025', 'Convergence_05', 'Convergence_075','Divergence_025', 'Divergence_05', 'Divergence_075']: - - if(not existing_method): # G2G - group_alignment_strings, gene_group = self.get_group_alignment_strings(PATTERN) - else: # TrAGEDy - group_alignment_strings, gene_group = self.get_group_alignment_strings(PATTERN, existing_method=True, existing_method_df=tr_df) - - accuracy_status = [] - for al in group_alignment_strings: - status, false_start_mismatch_len, false_end_match_len, n_matches, n_false_intermediate_mismatches, end_mismatch_len = self.get_accuracy_stat_divergence(al, divergence_mode=PATTERN.startswith('Div')) - accuracy_status.append([status, false_start_mismatch_len, false_end_match_len, n_matches, n_false_intermediate_mismatches, end_mismatch_len]) - - d = pd.DataFrame(accuracy_status) - if(PATTERN.startswith('Div')): - d.columns = ['status','false_start_mismatch_len','false_end_match_len','start_match_len','n_false_intermediate_mismatches','end_mismatch_len'] - else: - d.columns = ['status','false_end_mismatch_len','false_start_match_len','end_match_len','n_false_intermediate_mismatches','start_mismatch_len'] - d['alignment_str'] = group_alignment_strings - d['gene'] = gene_group - if(print_stat): - print(PATTERN, len(gene_group), np.unique(d['status'] , return_counts=True)) - - divcov_alignment_accuracy_results[PATTERN] = d - - return divcov_alignment_accuracy_results - - # clustering related - - def computeE(self, alignment_strings, metric): - # compute distance matrix - print('compute distance matrix') - dist_mat_functions = {'hamming': ClusterUtils.compute_hamming_dist_matrix, 'levenshtein': ClusterUtils.compute_levenshtein_dist_matrix} - compute_dist_matrix = dist_mat_functions[metric] - E = compute_dist_matrix(alignment_strings) - return E - - def run_clustering(self, alignment_strings, metric, gene_names, DIST_THRESHOLD=0.2, experiment_mode=False, E=None): - - if(E is None): - # compute distance matrix - E = self.computeE(alignment_strings, metric) - - if(experiment_mode): - scores = []; n_clusters = []; dist_thresholds = np.arange(0.01,1.0,0.01); score_modes = []; n_small_clusters = [] - eval_dists = [] - for D_THRESH in tqdm(dist_thresholds): - gene_clusters, cluster_ids, silhouette_score, silhouette_score_mode, n_small_cluster = ClusterUtils.run_agglomerative_clustering(E, gene_names, D_THRESH) - - if(len(gene_clusters.keys())==1): - break - scores.append(silhouette_score) - n_clusters.append(len(gene_clusters.keys())) - score_modes.append(silhouette_score_mode) - n_small_clusters.append(n_small_cluster) - eval_dists.append(D_THRESH) - - plt.rcParams.update({'font.size': 14}) - plt.subplots(1,3,figsize=(10,5)) - plt.subplot(1,3,1) - sb.lineplot(x=eval_dists, y=scores, color = 'blue', marker='o') - plt.xlabel('Distnace threshold') - plt.ylabel('Mean Silhouette Score') - plt.subplot(1,3,2) - sb.lineplot(x=n_clusters, y=scores, color='red', marker='o') - plt.xlabel('Number of clusters') - plt.ylabel('Mean Silhouette Score') - plt.subplot(1,3,3) - sb.lineplot(x=eval_dists, y=n_clusters, color='green', marker='o') - plt.xlabel('Distance threshold') - plt.ylabel('Number of clusters') - plt.tight_layout() - df = pd.DataFrame([eval_dists,scores,n_clusters]).transpose() - df.columns = ['Distance threshold', 'Mean Silhouette Score','Number of clusters'] - return df - - else: - print('run agglomerative clustering | ', np.round(DIST_THRESHOLD,2) ) - gene_clusters, cluster_ids, silhouette_score, silhouette_score_samples, n_small_cluster = ClusterUtils.run_agglomerative_clustering(E, gene_names, DIST_THRESHOLD) - print('silhouette_score: ', silhouette_score) - return gene_clusters - - def compute_misclustering_rate(self, gene_clusters, alignment_strings): - misclustered_count = 0 - cid = 0 - for i in range(len(gene_clusters)): - cluster = gene_clusters[i] - cluster_pattern =[] - for g in cluster: - cluster_pattern.append(self.pattern_map[g]) - - pattern_types = np.unique(cluster_pattern, return_counts=True)[0] - pattern_counts = np.unique(cluster_pattern, return_counts=True)[1] - - if(len(pattern_types)>1): - max_count = np.max(pattern_counts) - # recording the number of outliers in a cl - for c in pattern_counts: - if(c!=max_count): - misclustered_count += c - #print(cid, pattern_types, pattern_counts, misclustered_count)#, ' || misclustered count = ',misclustered_count) - cid+=1 - print('misclustered rate: ', misclustered_count*100/len(alignment_strings),'%') - return misclustered_count*100/len(alignment_strings) - - def compute_cluster_diagnostics(self, alignment_strings, gene_names, distance_metric = 'levenshtein'): - - E = self.computeE(alignment_strings, metric=distance_metric) - df = self.run_clustering(alignment_strings, metric=distance_metric, gene_names=gene_names, experiment_mode=True, E=E) - - print('computing misclustering rates for different distance thresholds') - misclustering_rates = [] - distance_thresholds = [] - dist_range = list(df['Distance threshold']) - for dist_thresh in dist_range: - gene_clusters = self.run_clustering(alignment_strings, metric=distance_metric, gene_names=gene_names, DIST_THRESHOLD=dist_thresh , experiment_mode=False, E=E) - mc = self.compute_misclustering_rate(gene_clusters, alignment_strings) - misclustering_rates.append(mc) - distance_thresholds.append(dist_thresh) - df['misclustering_rate']= misclustering_rates - - return E, df diff --git a/genes2genes/TimeSeriesPreprocessor.py b/genes2genes/TimeSeriesPreprocessor.py index ae7e515..dac1f80 100644 --- a/genes2genes/TimeSeriesPreprocessor.py +++ b/genes2genes/TimeSeriesPreprocessor.py @@ -1,307 +1,221 @@ import numpy as np import seaborn as sb +import pandas as pd import torch -from optbinning import ContinuousOptimalBinning +import multiprocessing +from scipy.sparse import csr_matrix +from sklearn.preprocessing import MinMaxScaler from . import MyFunctions -from . import MVG +from . import Utils -class SummaryTimeSeries: +class TrajectoryInterpolator: - def __init__(self, time_points, mean_trend, std_trend, data_bins, X,Y, cell_densities): - self.time_points = np.asarray(time_points) - self.mean_trend = np.asarray(mean_trend) - self.std_trend = np.asarray(std_trend) - self.data_bins = data_bins - self.X = X - self.Y = Y - self.cell_densities = cell_densities - self.intpl_means = None - self.intpl_stds = None + """ + This class defines an interpolator function for a given gene expression time series, which prepares required summary statistics for interpolation + """ + + def __init__(self, adata, n_bins, adaptive_kernel = True, kernel_WINDOW_SIZE=0.1, raising_degree = 1): + self.n_bins = n_bins + self.adata = adata[np.argsort(adata.obs['time'])] + + self.cell_pseudotimes = np.asarray(self.adata.obs.time) + self.interpolation_points = np.linspace(0,1,n_bins) + self.kernel_WINDOW_SIZE = kernel_WINDOW_SIZE + self.adaptive_kernel = adaptive_kernel + self.k = raising_degree # the degree of stretch imposed for the window sizes from baseline kernel_WINDOW_SIZE = 0.1 - def plot_mean_trend(self, color='blue'): - sb.lineplot(x=self.time_points, y=self.mean_trend, linewidth=3, color=color) + self.mat = csr_matrix(self.adata.X.todense().transpose()) + self.N_PROCESSES = multiprocessing.cpu_count() + self.gene_list = self.adata.var_names + def run(self): + #print('computing absolute time diffs') + self.abs_timediff_mat = self.compute_abs_timediff_mat() + if(self.adaptive_kernel): + #print('Running in adaptive interpolation mode') + #print('computing an cell densities for adaptive interpolation') + self.reciprocal_cell_density_estimates = self.compute_cell_densities() + #print('computing adaptive win denomes') + self.adaptive_win_denoms = self.compute_adaptive_window_denominator() + #print('computing cell weight matrix') + self.cell_weight_mat = self.compute_Weight_matrix() - def reverse_time_series(self): + def compute_abs_timediff_mat(self): # interpolation time points x cells matrix + df = [] + for i in self.interpolation_points: + # absolute difference between actual pseudotime point of a cell and interpolation time point (needed to compute gaussian kernel later on) + abs_dist = np.abs(np.asarray(self.cell_pseudotimes) - i) #np.repeat(i,len(self.cell_pseudotimes)) + df.append(abs_dist) + df = pd.DataFrame(df); df.columns = self.adata.obs_names; df.index = self.interpolation_points + return df - self.time_points = self.time_points[::-1] - self.mean_trend = self.mean_trend[::-1] - self.std_trend = self.std_trend[::-1] - # self.data_bins = self.data_bins[::-1] - self.X = self.X [::-1] - self.Y = self.Y[::-1] - self.cell_densities = self.cell_densities[::-1] - self.intpl_means = self.intpl_means[::-1] - self.intpl_stds = self.intpl_stds[::-1] - - -class Prepocessor: - - def __init__(self, *args): - if len(args)>1: - GEX_MAT =args[0] - pseudotime_series = args[1] - m = args[2] - WINDOW_SIZE = args[3] - optimal_binning = args[4] - opt_binning = args[5] - self.GEX_MAT = GEX_MAT - self.pseudotime_series = pseudotime_series - self.compute_cell_density_trend(WINDOW_SIZE, m=m, optimal_binning=optimal_binning, opt_binning = opt_binning) - else: - self.GEX_MAT = None - self.pseudotime_series = None - self.cell_densities = None + def compute_cell_densities(self): # cell density vector across interpolation time points + # compute cell density estimate for each interpolation point + cell_density_estimates = [] + interpolation_points = self.interpolation_points + cell_pseudotimes = self.cell_pseudotimes + range_length_mid = interpolation_points[2] - interpolation_points[0] # constant across + range_length_corner = interpolation_points[1] - interpolation_points[0] # constant across + for i in range(len(interpolation_points)): + prime_point = interpolation_points[i] + cell_density = 0.0 # per discrete point cell density = # of cells falling within interpolation time points [i-1,i+1] range window / window length + if(i==0): + logic = cell_pseudotimes <= interpolation_points[i+1]; range_length = range_length_corner + elif(i==len(interpolation_points)-1): + logic = cell_pseudotimes >= interpolation_points[i-1]; range_length = range_length_corner + else: + logic = np.logical_and(cell_pseudotimes <= interpolation_points[i+1], cell_pseudotimes >= interpolation_points[i-1]) + range_length = range_length_mid + + density_stat = np.count_nonzero(logic) + density_stat = density_stat/range_length + cell_density_estimates.append(density_stat) + #print('** per unit cell density: ', cell_density_estimates) + self.cell_density_estimates = cell_density_estimates + cell_density_estimates = [1/x for x in cell_density_estimates] # taking reciprocal for weighing - - def create_summary_trends(self, X, Y): - # remember we have 100 synthetic cells per each time point - mean_trend = [] - std_trend = [] - data_bins = [] - for t in range(len(self.artificial_time_points)): - data_points = Y[X== self.artificial_time_points[t]] - mean_trend.append(np.mean(data_points) ) - std_trend.append(np.std(data_points) ) - data_bins.append(data_points) - - return SummaryTimeSeries(self.artificial_time_points, mean_trend, std_trend, data_bins,X,Y,self.cell_densities) - - - # data = dataframe, pseudotime series = array of pseudotimes for cells - def create_equal_len_time_bins(self, gene, N_BINS = 10): - bins_indices = np.linspace(np.min(self.pseudotime_series), np.max(self.pseudotime_series), N_BINS+1 ) # bin margins - bins_indices[N_BINS] = np.max(self.pseudotime_series) + 0.00001 # small jitter added to consistently mark the bin boundaries as [), [), .... ]] - data_bins = [] - time_bins = [] - bin_compositions = [] - - for i in range(len(bins_indices)): - if(i==len(bins_indices)-1): - break - t = np.logical_and(self.pseudotime_series >= bins_indices[i], self.pseudotime_series < bins_indices[i+1]) - data_bins = data_bins + list(self.GEX_MAT.loc[t,gene]) - bin_compositions.append(len(self.GEX_MAT.loc[t,gene])) - time_bins = time_bins + list(np.repeat(bins_indices[i+1], len(self.GEX_MAT[t]))) - return bins_indices[1:len(bins_indices)], time_bins, np.asarray(data_bins), bin_compositions - - # **** CellAlign paper's interpolation method based on Gaussian Kernel - def interpolate_time_series(self, gene): # WINDOW_SIZE = 0.1 # default value used in CellAlign - - intpl_gex = [] - for intpl_i in range(len(self.artificial_time_points)): - weights = self.cell_weights[intpl_i] - weighted_sum = 0.0 - for cell_i in range(len(self.pseudotime_series)): - weighted_sum = weighted_sum + (weights[cell_i]*self.GEX_MAT[gene][cell_i]) - weighted_sum = weighted_sum/np.sum(weights) - intpl_gex.append(weighted_sum) - intpl_gex = np.asarray(intpl_gex).flatten() - - # min max normalisation - scaled_intpl_gex = [] - for i in range(len(intpl_gex)): - scaled_intpl_gex.append((intpl_gex[i] - np.min(intpl_gex))/(np.max(intpl_gex) - np.min(intpl_gex) )) - return scaled_intpl_gex, self.artificial_time_points - - - # Interpolation of distributions based on Gaussian kernel (similar to above method but we get a distribution of artificial cells for interpolated time points now) - # weighted mean and weighted std based dist interpolation - # Extending the CellAlign interpolation method based on Gaussian Kernel - def interpolate_time_series_distributions(self, gene, N=50, CONST_STD= False,WEIGHT_BY_CELL_DENSITY=False, ESTIMATE = True, user_given_std =[]): + #print('reciprocals: ', cell_density_estimates) + # if this has inf values, use the max weight for them (otherwise it becomes inf resulting same weights 1.0 for all cells) + arr = cell_density_estimates + if(np.any(np.isinf(arr))): + max_w = max(np.asarray(arr)[np.isfinite(arr)] ) + cell_density_estimates = np.where(np.isinf(arr), max_w, arr) + #print('** adaptive weights -- ', cell_density_estimates) + + return cell_density_estimates + + def compute_adaptive_window_denominator(self): # for each interpolation time point - torch.manual_seed(1) - intpl_gex = [] - all_time_points = [] - intpl_means = [] - intpl_stds = [] + cell_density_adaptive_weights = self.reciprocal_cell_density_estimates - for intpl_i in range(len(self.artificial_time_points)): + # using min-max to stretch the range (for highly adapted window sizes having high window sizes) + cell_density_adaptive_weights =np.asarray(cell_density_adaptive_weights) + scaler = MinMaxScaler() + cell_density_adaptive_weights = scaler.fit_transform(cell_density_adaptive_weights.reshape(-1, 1)).flatten() + cell_density_adaptive_weights = cell_density_adaptive_weights * self.k + + # ======= enforcing the same window_size = kernel_WINDOW_SIZE for the interpolation with the least weighted kernel window size + adaptive_window_sizes = [] + for cd in cell_density_adaptive_weights: + adaptive_window_sizes.append(cd*self.kernel_WINDOW_SIZE) #weighing stadard window size - # estimate weighted mean and weighted variance - #if(ESTIMATE): - if(user_given_std[intpl_i] <0): - weights = self.cell_weights[intpl_i] - weighted_sum = 0.0 - for cell_i in range(len(self.pseudotime_series)): - weighted_sum = weighted_sum + (weights[cell_i]*self.GEX_MAT[gene][cell_i]) - weighted_sum = weighted_sum/np.sum(weights) - dist_mean = weighted_sum - - if(CONST_STD): # for getting just the average trend across - dist_std = 0.1 - else: # tweighted standard deviation - real_mean = np.mean(self.GEX_MAT[gene]) - weighted_sum_std = 0.0 - for cell_i in range(len(self.pseudotime_series)): - weighted_sum_std = weighted_sum_std + (weights[cell_i]*(( self.GEX_MAT[gene][cell_i] - real_mean) ** 2)) - n = len(self.pseudotime_series) - weighted_std = np.sqrt(weighted_sum_std/(np.sum(weights) * (n-1)/n)) - if(WEIGHT_BY_CELL_DENSITY): - weighted_std = weighted_std * self.cell_densities[intpl_i] # weighting according to cell density - dist_std = weighted_std - if(dist_std==0 or np.isnan(dist_std)): # case of single data point or no data points - #print('!!!! ALERT ---- DIST STD =0 nan') - dist_std = 0.01#0.1 #np.mean(summary_series_obj.std_trend) - else: - dist_mean = 0.0 - dist_std = user_given_std[intpl_i] # - D,temp1,temp2 = MyFunctions.generate_random_dataset(N, dist_mean, dist_std) + # find the interpolation point for which the window_size weighted to be lowest -- furthest to kernel_WINDOW_SIZE + temp = list(np.abs(adaptive_window_sizes - np.repeat(self.kernel_WINDOW_SIZE,self.n_bins))) + least_affected_interpolation_point = temp.index(max(temp)) + residue = np.abs(self.kernel_WINDOW_SIZE - adaptive_window_sizes[least_affected_interpolation_point]) + if(self.k>1): # linear scaling to stretch the range of window size from 0.1 base line. + adaptive_window_sizes = adaptive_window_sizes + (residue/(self.k-1)) + else: + adaptive_window_sizes = adaptive_window_sizes + residue + + # compute adaptive window size based denominator of Gaussian kernel for each cell for each interpolation time point + W = [] + for adw in adaptive_window_sizes: + adaptive_W_size = adw**2 + W.append(adaptive_W_size) + self.adaptive_window_sizes = adaptive_window_sizes - intpl_gex.append(D) - intpl_means.append(dist_mean) - intpl_stds.append(dist_std) - all_time_points.append(np.repeat(self.artificial_time_points[intpl_i], N)) - - return [np.asarray(intpl_gex).flatten(), np.asarray(all_time_points).flatten(), self.artificial_time_points, intpl_means, intpl_stds] - - - def prepare_interpolated_gene_expression_series(self, gene, CONST_STD=False, WEIGHT_BY_CELL_DENSITY=False, ESTIMATE = True, user_given_std =[]): - intpl_out = self.interpolate_time_series_distributions(gene, CONST_STD=CONST_STD, WEIGHT_BY_CELL_DENSITY= WEIGHT_BY_CELL_DENSITY, ESTIMATE=ESTIMATE, user_given_std=user_given_std) - X = intpl_out[1]; Y = intpl_out[0]; artificial_time_points = intpl_out[2] - obj = self.create_summary_trends(X,Y) - obj.intpl_means = intpl_out[3] - obj.intpl_stds = intpl_out[4] + return W - self.all_time_points = X # [TODO] -- repeats the same thing! To be done efficiently later!!!! - - return obj - - - def get_optimal_binning(self, time_var_arr, n_points): - x = time_var_arr - optb = ContinuousOptimalBinning(name='pseudotime', dtype="numerical", max_n_bins=n_points) - # this pacakge uses mixed integer programming based optimization to determine an optimal binning - optb.fit(x, x) - return optb.splits - + # compute Gaussian weights for each interpolation time point and cell + def compute_Weight_matrix(self): + if(self.adaptive_kernel): + adaptive_win_denoms_mat = np.asarray([np.repeat(a, len(self.cell_pseudotimes)) for a in self.adaptive_win_denoms]) + W_matrix = pd.DataFrame(np.exp(-np.divide(np.array(self.abs_timediff_mat**2), adaptive_win_denoms_mat))) + else: + W_matrix = pd.DataFrame(np.exp(-np.array(self.abs_timediff_mat**2)/self.kernel_WINDOW_SIZE**2)) + W_matrix.columns = self.adata.obs_names + self._real_intpl = self.interpolation_points + #self.interpolation_points = [np.round(i,2) for i in self.interpolation_points] + W_matrix.index = self.interpolation_points + #sb.heatmap(W_matrix) + return W_matrix - def compute_cell_density_trend(self, WINDOW_SIZE = 0.1, m=50, optimal_binning = False, opt_binning = []): # TODO LATEST TEST 07/01/2023 earlier used 0.15 for early Jan runs + def get_effective_cell_pseudotime_range(self, i, effective_weight_threshold): + effective_weights = self.cell_weight_mat.loc[self.interpolation_points[i]] + cell_names = np.asarray(effective_weights.index) + effective_weights = np.asarray(effective_weights) + cell_ids = np.where(effective_weights>effective_weight_threshold)[0] + effective_cell_names = cell_names[cell_ids] + effective_cell_pseudotimes = self.cell_pseudotimes[cell_ids] + return effective_cell_pseudotimes + + # plotting highly effective cell_contribution regions for given interpolation points based on adaptive weighted gaussian kernel + def plot_effective_regions_for_interpolation_points(self, intpointsIdx2plots, effective_weight_threshold=0.5, plot=True): - artificial_time_points = [] - if(optimal_binning): - artificial_time_points = opt_binning - else: - for j in range(1,m): - artificial_time_points.append((j-1)/(m-1)) - artificial_time_points.append(1.0) + cmap = sb.color_palette("viridis", as_cmap=True) + self.n_effective_cells = [] + for i in intpointsIdx2plots: + x = self.get_effective_cell_pseudotime_range(i, effective_weight_threshold= effective_weight_threshold) + self.n_effective_cells.append(len(x)) + if(plot): + sb.kdeplot(x, fill=True, color=cmap(i/self.n_bins), clip=(0.0,1.0)) - artificial_time_points = np.asarray(artificial_time_points) - artificial_time_points = artificial_time_points[artificial_time_points >= np.min(self.pseudotime_series)] - artificial_time_points = artificial_time_points[artificial_time_points <= np.max(self.pseudotime_series)] - if(artificial_time_points[0]!=0.0): - artificial_time_points = np.asarray([0] + list(artificial_time_points)) + +""" +The below functions define interpolation functions used by the above Interpolator object +(defined outside class for time efficiency) +""" +# ====================== interpolation process of genes +def compute_stat(row, x, cell_densities, user_given_std): + idx = row.name + if(user_given_std[idx] < 0): + cell_weights_sum = np.sum(row) + + # estimate weighted mean + weighted_mean = np.dot(row, x)/cell_weights_sum + #print(weighted_mean) + + # estimate weighted variance + real_mean = np.mean(x); n = len(row) + weighted_sum_std = np.dot(row, (x - real_mean) ** 2 ) + weighted_std = np.sqrt(weighted_sum_std/(cell_weights_sum * (n-1)/n)) + weighted_std = weighted_std * cell_densities[idx] # weighting according to cell density + else: + weighted_mean = 0.0 + weighted_std = user_given_std[idx] # + + D,_,_ = MyFunctions.generate_random_dataset(50, weighted_mean, weighted_std) + return np.asarray([weighted_mean, weighted_std, D] ) - cell_densities = [] - cell_weights = {} +#row = list(trajInterpolator.cell_weight_mat.loc[intpl_i]) +def interpolate_gene_v2(i, trajInterpolator, user_given_std): + torch.manual_seed(1) + GENE = trajInterpolator.gene_list[i] + #print(GENE) + x = Utils.csr_mat_col_densify(trajInterpolator.mat ,i) + N_cells= len(trajInterpolator.cell_pseudotimes) - for intpl_i in range(len(artificial_time_points)): - weights = [] - for cell_i in range(len(self.pseudotime_series)): - w_i = np.exp(-((self.pseudotime_series[cell_i] - artificial_time_points[intpl_i])**2)/(WINDOW_SIZE**2)) - weights.append(w_i) - # weighted cell density - cell_densities.append(np.sum(weights)) - cell_weights[intpl_i] = np.asarray(weights) - cell_densities = np.asarray(cell_densities) - cell_densities = cell_densities/len(self.pseudotime_series) + trajInterpolator.cell_weight_mat.index = range(0,len(trajInterpolator.cell_weight_mat)) + cell_densities = list(trajInterpolator.cell_weight_mat.apply(np.sum, axis=1)/N_cells) - self.cell_weights = cell_weights - self.artificial_time_points = artificial_time_points - self.cell_densities = cell_densities - - return cell_weights, artificial_time_points, cell_densities + results = trajInterpolator.cell_weight_mat.apply(compute_stat, axis=1, args = ([x,cell_densities, user_given_std]), result_type='expand') + results = pd.DataFrame(results) - + return SummaryTimeSeries(GENE, results[0], results[1], results[2], trajInterpolator.interpolation_points) -# Later TODO: make superclass SummaryTimeSeries for both univariate and multivariate cases -class SummaryTimeSeriesMVG: +class SummaryTimeSeries: + """ + This class defines an interpolated time series object that carries the interpolated result of a gene expression time series + """ - def __init__(self, time_points, data_bins): + def __init__(self, gene_name, mean_trend, std_trend, intpl_gex, time_points): + self.gene_name = gene_name + self.mean_trend = np.asarray([np.mean(data_bin) for data_bin in intpl_gex]) # interpolated dist mean + self.std_trend = np.asarray([np.std(data_bin) for data_bin in intpl_gex]) # interpolated dist std + self.data_bins = list(intpl_gex) + self.intpl_means = list(mean_trend) # actual weighted means + self.intpl_stds = list(std_trend) # actual weighted stds self.time_points = np.asarray(time_points) - self.data_bins = data_bins - self.mean_trends = [] - for data_bin in data_bins: - data_bin = torch.tensor(np.asarray(data_bin) ) - μ, C = MVG.compute_mml_estimates(data_bin, data_bin.shape[1], data_bin.shape[0]) - self.mean_trends.append(μ) - - - -class Utils: - - def minmax_normalise(arr): + self.Y = np.asarray([np.asarray(x) for x in self.data_bins]).flatten() + self.X = np.asarray([np.repeat(t,50) for t in self.time_points]).flatten() - norm_arr = [] - arr = np.asarray(arr) - arr_max = np.max(arr) - arr_min = np.min(arr) - for i in range(len(arr)): - norm_arr.append((arr[i] - arr_min )/(arr_max - arr_min )) - return norm_arr - - - # computes distributional distance under the MML framework - def compute_mml_dist(ref_adata_subset,query_adata_subset, gene): - - ref_data = np.asarray(ref_adata_subset[:,gene].X.todense()).flatten() - query_data = np.asarray(query_adata_subset[:,gene].X.todense()).flatten() - μ_S = np.mean(ref_data) - μ_T = np.mean(query_data) - σ_S =np.std(ref_data) - σ_T =np.std(query_data) - #print(μ_S,μ_T) - if(not np.any(ref_data)): - σ_S = 0.1 - if(not np.any(query_data)): - σ_T = 0.1 - - I_ref_model, I_refdata_g_ref_model = MyFunctions.run_dist_compute_v3(ref_data, μ_S, σ_S) - I_query_model, I_querydata_g_query_model = MyFunctions.run_dist_compute_v3(query_data, μ_T, σ_T) - I_ref_model, I_querydata_g_ref_model = MyFunctions.run_dist_compute_v3(query_data, μ_S, σ_S) - I_query_model, I_refdata_g_query_model = MyFunctions.run_dist_compute_v3(ref_data, μ_T, σ_T) - - match_encoding_len1 = I_ref_model + I_querydata_g_ref_model + I_refdata_g_ref_model - match_encoding_len1 = match_encoding_len1/(len(query_data)+len(ref_data)) - match_encoding_len2 = I_query_model + I_refdata_g_query_model + I_querydata_g_query_model - match_encoding_len2 = match_encoding_len2/(len(query_data)+len(ref_data)) - match_encoding_len = (match_encoding_len1 + match_encoding_len2 )/2.0 - - null = (I_ref_model + I_refdata_g_ref_model + I_query_model + I_querydata_g_query_model)/(len(query_data)+len(ref_data)) - match_compression = match_encoding_len - null - #print(match_compression) - #sb.kdeplot(ref_data, fill=True) - #sb.kdeplot(query_data, fill=True) - return match_compression - - -def refine_pseudotime(adata): - average_ctype_mean_times = {} - - for ctype in np.unique(adata.obs.ANNOTATION_COMB): - average_ctype_mean_times[ctype] = np.mean(adata[adata.obs.ANNOTATION_COMB==ctype].obs.time) - - adata.obs['refined_time'] = adata.obs.time + def plot_mean_trend(self, color='midnightblue'): + sb.lineplot(x= self.time_points, y=self.mean_trend, color=color, linewidth=4) + + def plot_std_trend(self, color='midnightblue'): + sb.lineplot(x= self.time_points, y=self.std_trend, color=color, linewidth=4) - ctype_Ls = {} - ctype_Us = {} - - for ctype in np.unique(adata.obs.ANNOTATION_COMB): - ctype_adata = adata[adata.obs.ANNOTATION_COMB==ctype] - Q1,Q3 = np.percentile( ctype_adata.obs.time, [25,75]) - IQR = Q3-Q1 - U = Q3+(1.5 * IQR) - L = Q1-(1.5 * IQR) - ctype_Ls[ctype] = L - ctype_Us[ctype] = U - - - for i in range(adata.shape[0]): - ctype = adata.obs.ANNOTATION_COMB[i] - if(adata.obs.time[i]ctype_Us[ctype]): - adata.obs['refined_time'][i] = average_ctype_mean_times[ctype] - - return Utils.minmax_normalise(np.asarray(adata.obs.refined_time)) - + \ No newline at end of file diff --git a/genes2genes/Utils.py b/genes2genes/Utils.py new file mode 100644 index 0000000..7959f86 --- /dev/null +++ b/genes2genes/Utils.py @@ -0,0 +1,198 @@ +import numpy as np +from scipy.sparse import csr_matrix +from . import MyFunctions + +# UTIL FUNCTIONS +def csr_mat_col_densify(csr_matrix, j): + start_ptr = csr_matrix.indptr[j] + end_ptr = csr_matrix.indptr[j + 1] + data = csr_matrix.data[start_ptr:end_ptr] + dense_column = np.zeros(csr_matrix.shape[1]) + dense_column[csr_matrix.indices[start_ptr:end_ptr]] = data + return dense_column + + +def minmax_normalise(arr): + + norm_arr = [] + arr = np.asarray(arr) + arr_max = np.max(arr) + arr_min = np.min(arr) + for i in range(len(arr)): + norm_arr.append((arr[i] - arr_min )/(arr_max - arr_min )) + return norm_arr + + +# computes distributional distance under the MML framework +def compute_mml_dist(ref_adata_subset,query_adata_subset, gene): + + ref_data = np.asarray(ref_adata_subset[:,gene].X.todense()).flatten() + query_data = np.asarray(query_adata_subset[:,gene].X.todense()).flatten() + μ_S = np.mean(ref_data) + μ_T = np.mean(query_data) + σ_S =np.std(ref_data) + σ_T =np.std(query_data) + #print(μ_S,μ_T) + if(not np.any(ref_data)): + σ_S = 0.1 + if(not np.any(query_data)): + σ_T = 0.1 + + I_ref_model, I_refdata_g_ref_model = MyFunctions.run_dist_compute_v3(ref_data, μ_S, σ_S) + I_query_model, I_querydata_g_query_model = MyFunctions.run_dist_compute_v3(query_data, μ_T, σ_T) + I_ref_model, I_querydata_g_ref_model = MyFunctions.run_dist_compute_v3(query_data, μ_S, σ_S) + I_query_model, I_refdata_g_query_model = MyFunctions.run_dist_compute_v3(ref_data, μ_T, σ_T) + + match_encoding_len1 = I_ref_model + I_querydata_g_ref_model + I_refdata_g_ref_model + match_encoding_len1 = match_encoding_len1/(len(query_data)+len(ref_data)) + match_encoding_len2 = I_query_model + I_refdata_g_query_model + I_querydata_g_query_model + match_encoding_len2 = match_encoding_len2/(len(query_data)+len(ref_data)) + match_encoding_len = (match_encoding_len1 + match_encoding_len2 )/2.0 + + null = (I_ref_model + I_refdata_g_ref_model + I_query_model + I_querydata_g_query_model)/(len(query_data)+len(ref_data)) + match_compression = match_encoding_len - null + + return match_compression + + +def sample_state(x): + x = np.cumsum(x) + rand_num = np.random.rand(1) + # print(rand_num) + if(rand_num<=x[0]): + return 0#'M' + elif(rand_num>x[0] and rand_num<=x[1]): + return 1#'W' + elif(rand_num>x[1] and rand_num<=x[2]): + return 2#'V' + elif(rand_num>x[2] and rand_num<=x[3]): + return 3#'D' + elif(rand_num>x[3] and rand_num<=x[4]): + return 4#'I' + + +def compute_alignment_area_diff_distance(A1, A2, S_len, T_len): + + pi = np.arange(1, S_len+T_len+1) # skew diagonal indices + A1_ = "" + for c in A1: + A1_ = A1_ + c + if(c=='M'): + A1_ = A1_ + 'X' + A2_ = "" + for c in A2: + A2_ = A2_ + c + if(c=='M'): + A2_ = A2_ + 'X' + + pi_1_k = 0 + pi_2_k = 0 + #print(0, pi_1_k , pi_2_k ) + A1_al_index = 0 + A2_al_index = 0 + absolute_dist_sum = 0.0 + for k in pi: + #print('k=',k, A1_al_index, A2_al_index) + A1_state = A1_[A1_al_index] + A2_state = A2_[A2_al_index] + if(A1_state=='I' or A1_state=='V'): + pi_1_k = pi_1_k - 1 + elif(A1_state=='D' or A1_state=='W'): + pi_1_k = pi_1_k + 1 + if(A2_state=='I' or A2_state=='V'): + pi_2_k = pi_2_k - 1 + elif(A2_state=='D' or A2_state=='W'): + pi_2_k = pi_2_k + 1 + + absolute_dist_sum = absolute_dist_sum + np.abs(pi_1_k - pi_2_k) + #print('-----') + A1_al_index = A1_al_index + 1 + A2_al_index = A2_al_index + 1 + + return absolute_dist_sum + +def compute_chattergi_coefficient(y1,y2): + df = pd.DataFrame({'S':y1, 'T':y2}) + df['rankS'] = df['S'].rank() + df['rankT'] = df['T'].rank() + # sort df by the S variable first + df = df.sort_values(by='rankS') + return 1 - ((3.0 * df['rankT'].diff().abs().sum())/((len(df)**2)-1)) + + +def plot_different_alignments(paths, S_len, T_len, ax, mat=[]): # pass alignment path coordinates + mat=[] + # if(len(mat)==0): + for i in range(T_len+1): + mat.append(np.repeat(0,S_len+1)) + sb.heatmap(mat, square=True, cmap='viridis', ax=ax, vmin=0, vmax=0, cbar=False,xticklabels=False,yticklabels=False) + path_color = "orange" + + for path in paths: + path_x = [p[0]+0.5 for p in path] + path_y = [p[1]+0.5 for p in path] + ax.plot(path_y, path_x, color=path_color, linewidth=3, alpha=0.5, linestyle='dashed') # path plot + plt.xlabel("S",fontweight='bold') + plt.ylabel("T",fontweight='bold') + + +def check_alignment_clusters(n_clusters , cluster_ids, alignments, n_cols = 5, figsize= (10,6)): + + clusters = [] + S_len = alignments[0].fwd_DP.S_len + T_len = alignments[0].fwd_DP.T_len + + unique_cluster_ids = np.unique(cluster_ids) + n_rows = int(np.ceil(n_clusters/n_cols)) + + + fig, axs = plt.subplots(n_rows,n_cols, figsize = (20,n_rows*3)) # custom -- only for 20 clusters -- TODO change later + axs = axs.flatten() + i = 0 + k=1 + for cluster_id in range(n_clusters): + paths = [] + cluster_genes = [] + cluster_alignments = np.asarray(alignments)[cluster_ids == unique_cluster_ids[cluster_id]] + for a in cluster_alignments: + paths.append(a.fwd_DP.alignment_path) + #print(a.gene) + cluster_genes.append(a.gene);# cluster_genes.append(a.gene) + clusters.append(list(np.unique(cluster_genes)) ) + + plot_different_alignments(paths, S_len, T_len, axs[cluster_id]) + axs[cluster_id].set_title('Cluster-'+str(i) + ' | '+str(len(cluster_alignments))) + + i=i+1 + k=k+1 + + fig.tight_layout() + n = n_cols * n_rows + i = 1 + while(k<=n): + axs.flat[-1*i].set_visible(False) + k = k+1 + i=i+1 + + return clusters + + +# input: log1p gene expression vectors +def compute_KLDivBasedDist(x,y): + + # convert to probabilities + x = x.numpy() + y = y.numpy() + # convering backto counts+1 + x = np.exp(x) + y = np.exp(y) + x = x/np.sum(x) + y = y/np.sum(y) + + sum_term = 0.0 + for i in range(len(x)): + sum_term += x[i]*(np.log(x[i]) - np.log(y[i])) + + return sum_term + + \ No newline at end of file diff --git a/genes2genes/VisualUtils.py b/genes2genes/VisualUtils.py index 2458c1a..7e55a0e 100644 --- a/genes2genes/VisualUtils.py +++ b/genes2genes/VisualUtils.py @@ -1,14 +1,8 @@ import pandas as pd import seaborn as sb import matplotlib.pyplot as plt -import anndata import numpy as np -from adjustText import adjust_text -from mpl_toolkits.axes_grid1.inset_locator import inset_axes from scipy.stats import zscore -import colorcet as cc -from optbinning import ContinuousOptimalBinning -from scipy.stats import gaussian_kde import matplotlib.colors as mcolors import matplotlib import matplotlib.patches as mpatches @@ -20,185 +14,30 @@ vega_20 = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5',] - -class VisualUtils(): - - def __init__(self, adata_ref, adata_query, cell_type_colname, S_len, T_len, titleS = 'Reference', titleT = 'Query', mode='comp', write_file=False, optimal_binning=True, PLOT=True): - self.write_file = write_file - if(mode=='comp'): - self.titleS = titleS - self.titleT = titleT - - n_points = S_len - while(True): - # later to replace with a better optimal binning that gives exact number we request - print('# trying max n points for optimal binning =', n_points) - adata_ref, bm1 = self.pseudotime2bin_celltypes(adata_ref, n_points, optimal_binning=optimal_binning) - adata_query, bm2 = self.pseudotime2bin_celltypes(adata_query, n_points, optimal_binning=optimal_binning) - - if(not (len(bm1) == len(bm2))): - n_points=n_points-1 - if(n_points<=5): - print('Consider equal length binning') - break - else: - print('====================================================') - print('Optimal equal number of bins for R and Q = ',len(bm1)) - break - - if(PLOT): - plt.subplots(1,2, figsize=(10,3)) - x = list(adata_ref.obs.time) - plt.subplot(1,2,1) - sb.kdeplot(list(adata_ref.obs.time), color='ForestGreen' , fill=True) - for s in bm1: - plt.axvline(x=s, color='ForestGreen') - x = list(adata_query.obs.time) - plt.subplot(1,2,2) - sb.kdeplot(list(adata_query.obs.time),color='midnightblue', fill=True) - for s in bm2: - plt.axvline(x=s, color='midnightblue') - - meta1 = self.plot_cell_type_proportions(adata_ref, cell_type_colname, 'bin_ids',None,'tab20') - meta2 = self.plot_cell_type_proportions(adata_query, cell_type_colname, 'bin_ids',None,'tab20') - if(not optimal_binning): - meta1 = self.simple_interpolate(meta1,S_len) - meta2 = self.simple_interpolate(meta2,T_len) - # meta1.loc[1] = meta1.loc[0] + meta1.loc[1] - # meta2.loc[1] = meta2.loc[0] + meta2.loc[1] - # meta1.loc[0] = np.repeat(0.0,len(np.unique(adata_ref.obs[cell_type_colname])) ) - # meta2.loc[0] = np.repeat(0.0,len(np.unique(adata_query.obs[cell_type_colname]))) - - temp1 = pd.Series(np.repeat(0.0,len(np.unique(adata_ref.obs[cell_type_colname])) )) - temp1.index = meta1.columns - meta1 = pd.concat([pd.DataFrame(temp1).transpose(),meta1.loc[:]]).reset_index(drop=True) - - temp2 = pd.Series(np.repeat(0.0,len(np.unique(adata_query.obs[cell_type_colname])) )) - temp2.index = meta2.columns - meta2 = pd.concat([pd.DataFrame(temp2).transpose(),meta2.loc[:]]).reset_index(drop=True) - - self.metaS = meta1 - self.metaT = meta2 - - self.optimal_bining_S = bm1 - self.optimal_bining_T = bm2 - - - def get_optimal_binning(self, time_var_arr, n_points): - x = time_var_arr - optb = ContinuousOptimalBinning(name='pseudotime', dtype="numerical", max_n_bins=n_points) - # this pacakge uses mixed integer programming based optimization to determine an optimal binning - #kde = gaussian_kde(x); #density_values = kde(x); #optb.fit(x, density_values) - optb.fit(x, x) - #sb.kdeplot(x, fill=True) - #for s in optb.splits: - # plt.axvline(x=s) - #print(len(optb.splits)) - return optb.splits - - - # annotates cells with their respective bins based on interpolated pseudotime points - def pseudotime2bin_celltypes(self, adata, n_points, optimal_binning = True): - - adata.obs['bin_ids'] = np.repeat(-1,adata.shape[0]) - if(optimal_binning): - bin_margins = self.get_optimal_binning(np.asarray(adata.obs.time) , n_points) - else: - #bin_margins = np.linspace(0,1,n_points+1) - bin_margins = np.linspace(0,1,n_points)#[1:-1] - #print('computed the margins for ' + str(len(bin_margins)) + ' bins') - #print(bin_margins) - bin_ids = [] - k = 0 - for i in range(len(bin_margins)): - - if(i==0): - logic = np.logical_and(adata.obs.time >= 0, adata.obs.time < bin_margins[i+1]) - #print('i==0', adata[logic].shape[0]) - elif(i==len(bin_margins)-1): - logic = np.logical_and(adata.obs.time >= bin_margins[i], adata.obs.time <= 1.0) - else: - logic = np.logical_and(adata.obs.time >= bin_margins[i], adata.obs.time < bin_margins[i+1]) - adata.obs['bin_ids'][logic] = i - return adata, bin_margins - - # for plotting or getting celltype freq counts per bin - def plot_cell_type_proportions(self, adata, cell_type_colname, covariate_colname, sorter, color_scheme_name="Spectral", plot=False): - meta = pd.DataFrame(np.vstack((adata.obs[cell_type_colname],adata.obs[covariate_colname])).transpose(),columns=[cell_type_colname,covariate_colname]) - - meta['COUNTER'] = 1 - meta = meta.groupby([covariate_colname,cell_type_colname])['COUNTER'].sum().unstack() - meta = meta.fillna(0) - #meta = meta.transpose() - #meta = meta.sort_values(by=covariate_colname, key=sorter) - if(plot): - p = meta.apply(lambda x: x*100/sum(x), axis=1).plot(kind='bar',stacked=True, color=sb.color_palette(color_scheme_name, 20), grid = False) - #p.legend(labels = ['not infected','infected'], loc='center left', bbox_to_anchor=(1.25, 0.5), ncol=1) - p.legend(loc='center left', bbox_to_anchor=(1.25, 0.5), ncol=1) - return meta - - def simple_interpolate(self,meta, n_points): - for i in range(n_points): - #print(k) - if(i not in meta.index): - k=i - while(k not in meta.index): - k = k-1 - _temp = meta.loc[k].copy() - _temp.name = i - meta = meta.append(_temp) - meta = meta.sort_index() - return meta - - - def get_celltype_composition_across_time(adata_ref, adata_query, n_points, ANNOTATION_COLNAME, optimal_binning=True - , order_S_legend=None, order_T_legend=None, PLOT=True, ref_cmap = None, query_cmap =None, plot_celltype_counts=False): - - a = sb.color_palette(cc.glasbey_hv, n_colors=3) - vega_20 = [ - '#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', - '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', - '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5', - ] - - if(ref_cmap==None): - ref_cmap = vega_20 - if(query_cmap==None): - query_cmap = vega_20 - - vs = VisualUtils(adata_ref, adata_query, cell_type_colname = ANNOTATION_COLNAME, - S_len=n_points, T_len=n_points, titleS='Reference', titleT='Query', - write_file=False, optimal_binning=optimal_binning, PLOT=PLOT) - - if(plot_celltype_counts): - - ax = vs.metaS.apply(lambda x: x, axis=1).plot(kind='bar',stacked=True,color=ref_cmap, grid = False, legend=True, width=0.7,align='edge',figsize=(10,3)) - ax.legend(bbox_to_anchor=(1.1, 1.44)) - if(order_S_legend is not None): - handles, labels = ax.get_legend_handles_labels() - ax.legend(handles=[handles[idx] for idx in order_S_legend],labels=[labels[idx] for idx in order_S_legend],bbox_to_anchor=(1.0, 1.0)) - - ax = vs.metaT.apply(lambda x: x, axis=1).plot(kind='bar',stacked=True,color=query_cmap, grid = False, legend=True, width=0.7,align='edge',figsize=(10,3)) - ax.legend(bbox_to_anchor=(1.1, 1.05)) - if(order_T_legend is not None): - handles, labels = ax.get_legend_handles_labels() - ax.legend(handles=[handles[idx] for idx in order_T_legend],labels=[labels[idx] for idx in order_T_legend],bbox_to_anchor=(1.0, 1.0)) - - vs.metaS.apply(lambda x: x*100/sum(x), axis=1).plot(kind='bar',stacked=True,color=ref_cmap, grid = False, legend=False, width=0.7,align='edge',figsize=(10,1)) - plt.axis('off') - vs.metaT.apply(lambda x: x*100/sum(x), axis=1).plot(kind='bar',stacked=True,color=query_cmap, grid = False, legend=False, width=0.7,align='edge',figsize=(10,1)) - plt.axis('off') +def plot_celltype_barplot(adata, n_bins, annotation_colname, joint_cmap, plot_cell_counts = False, legend=False): - return vs - + if(plot_cell_counts): + normalize = False + else: + normalize = 'columns' + + vec = adata.obs.time + bin_edges = np.linspace(0, 1, num=n_bins) + bin_ids = np.digitize(vec, bin_edges, right=False) # use right=True if we don't need 1.0 cell to always be a single last bin + adata.obs['bin_ids'] = bin_ids + tmp = pd.crosstab(adata.obs[annotation_colname],adata.obs['bin_ids'], normalize=normalize).T.plot(kind='bar', stacked=True, + color=joint_cmap,grid = False, legend=False, width=0.7,align='edge',figsize=(9,1)) + if(legend): + tmp.legend(title='Cell-type annotations', bbox_to_anchor=(1.5, 1.02),loc='upper right') + plt.axis('off') - def visualize_gene_alignment(self, alignment, cmap=None): +def visualize_gene_alignment(alignment, adata_ref, adata_query, annotation_colname, cmap=None): if(isinstance(alignment,Main.AligmentObj )): alignment = alignment.alignment_str - matched_points_S, matched_points_T = self.get_matched_time_points(alignment) + matched_points_S, matched_points_T = get_matched_time_points(alignment) fig = plt.figure(figsize=(4,2)) heights = [1, 1, 1] @@ -211,8 +50,13 @@ def visualize_gene_alignment(self, alignment, cmap=None): cmap = vega_20 plt.subplot(3,1,1) - self.metaS.apply(lambda x: x*100/sum(x), axis=1).plot(kind='bar',stacked=True,color=cmap, grid = False, legend=False, width=0.7, ax=ax1) - self.metaT.apply(lambda x: x*100/sum(x), axis=1).plot(kind='bar',stacked=True,color=cmap, grid = False, legend=False, width=0.7,ax=ax3) + + metaS = pd.crosstab(adata_ref.obs.bin_ids, adata_ref.obs[annotation_colname]) + metaS.apply(lambda x: x*100/sum(x), axis=1).plot(kind='bar',stacked=True,color=cmap, grid = False, legend=False, width=0.7, ax=ax1) + + metaT = pd.crosstab(adata_query.obs.bin_ids, adata_query.obs[annotation_colname]) + metaT.apply(lambda x: x*100/sum(x), axis=1).plot(kind='bar',stacked=True,color=cmap, grid = False, legend=False, width=0.7,ax=ax3) + plt.subplot(3,1,2) for i in range(len(matched_points_S)): S_timebin = matched_points_S[i] @@ -233,135 +77,13 @@ def set_grid_off(ax): ax.grid(False) set_grid_off(ax1); set_grid_off(ax2); set_grid_off(ax3); - ax1.set_ylabel('Ref', rotation=0) - ax3.set_ylabel('Query',rotation=0) + ax1.set_ylabel('Ref', rotation=90) + ax3.set_ylabel('Query',rotation=90) fig.text(0.5, -0.05, 'Pseudotime bins with cell type composition', ha='center') ax1.set_title('Alignment w.r.t cell type compositions') - - - def plot_comprehensive_alignment_landscape_plot(self, aligner, gene = None, order_S_legend=None, order_T_legend=None, paths_to_display=None, cmap='viridis'): - - if(gene!=None): - al_obj = aligner.results_map[gene] - if(paths_to_display==None): - al_obj.landscape_obj.alignment_path.append([0,0]) - paths_to_display=[al_obj.landscape_obj.alignment_path] - match_points_S = np.unique(al_obj.match_points_S) + 1 - match_points_T = np.unique(al_obj.match_points_T) + 1 - landscape_mat = pd.DataFrame(al_obj.landscape_obj.L_matrix) - else: - al_str, path = self.compute_overall_alignment(aligner) - match_points_S, match_points_T = self.get_matched_time_points(al_str) - match_points_S = np.unique(match_points_S) + 1 - match_points_T = np.unique(match_points_T) + 1 - if(paths_to_display==None): - paths_to_display=[path] - landscape_mat = aligner.get_pairwise_match_count_mat() - - nS_points=len(aligner.results[0].S.time_points) - nT_points=len(aligner.results[0].T.time_points) - - fig, ((ax3, ax1, cbar_ax), (dummy_ax1, ax2, dummy_ax2)) = plt.subplots(nrows=2, ncols=3, figsize=(9*2, 6*2), sharex='col', sharey='row', - gridspec_kw={'height_ratios': [2,1], 'width_ratios': [0.5, 1, 0.5]}) - g = sb.heatmap(landscape_mat.transpose(), xticklabels=True, yticklabels=True, cmap=cmap, cbar_ax=cbar_ax, ax=ax1, cbar=False) - g.tick_params( labelsize=10, labelbottom = True, bottom=True, top = False)#, labeltop=True) - ax1.set_xlabel('pseudotime') - x_ticks = np.asarray(range(0,nS_points+1)) - y_ticks = np.asarray(range(0,nT_points+1)) - - # first barplot (Reference) --- left horizontal barplot - p= self.metaS.apply(lambda x: x*100/sum(x), axis=1).plot(kind='barh',stacked=True, title=self.titleS ,color=sb.color_palette('deep', 20), grid = False, ax=ax3,legend=False, width=0.7,align='edge') - for patch in p.patches: - if(patch.get_y() in match_points_S): - p.annotate(str('M'), (100, patch.get_y() * 1.005) ) - handles, labels = ax3.get_legend_handles_labels() - for spine in p.spines: - p.spines[spine].set_visible(False) - if(order_S_legend!=None): - dummy_ax1.legend(handles=[handles[idx] for idx in order_S_legend],labels=[labels[idx] for idx in order_S_legend]) - else: - dummy_ax1.legend(handles,labels) - # second barplot (Query) --- bottom barplot - p = self.metaT.apply(lambda x: x*100/sum(x), axis=1).plot(kind='bar',stacked=True, title=self.titleT, color=sb.color_palette('deep', 20), grid = False, ax=ax2, legend=False,width=0.7,align='edge') - for patch in p.patches: - # print(patch.get_height()) - if(patch.get_x() in match_points_T): - p.annotate(str('M'), (patch.get_x() * 1.005, 100) ) - handles, labels = ax2.get_legend_handles_labels() - for spine in p.spines: - p.spines[spine].set_visible(False) - if(order_T_legend!=None): - dummy_ax2.legend(handles=[handles[idx] for idx in order_T_legend],labels=[labels[idx] for idx in order_T_legend],loc='upper left') - else: - dummy_ax2.legend(handles,labels, loc='upper left') - dummy_ax1.axis('off') - dummy_ax2.axis('off') - cbar_ax.axis('off') - if(paths_to_display!=None): # for max 2 paths - styles = ['solid', 'dashed']; i = 0 - for path in paths_to_display: - path_x = [p[0]+0.5 for p in path] - path_y = [p[1]+0.5 for p in path] - ax1.plot(path_x, path_y, color='black', linewidth=9, alpha=1.0, linestyle=styles[i]) # path plot - i=i+1 - ax1.axis(ymin=0, ymax=nS_points+1, xmin=0, xmax=nT_points+1) - plt.tight_layout() - # plt.show() - - if(self.write_file): - plt.savefig('comprehensive_alignment_landscape_plot.pdf',bbox_inches = 'tight') - - def plot_match_stat_across_all_alignments(self, aligner): - - nS_points = len(aligner.results[0].S.time_points) - nT_points = len(aligner.results[0].T.time_points) - S_line = np.repeat(0, nS_points+1) - T_line = np.repeat(0, nT_points+1) - - for a in aligner.results: - matchS = a.match_points_S+1 - matchT = a.match_points_T+1 - for i in range(len(matchS)): - S_line[matchS[i]] = S_line[matchS[i]] + 1 - T_line[matchT[i]] = T_line[matchT[i]] + 1 - - S_line = S_line/np.sum(S_line)*100 - T_line = T_line/np.sum(T_line)*100 - - plt.subplots(2,2,figsize=(17,6)) - plt.subplot(2,2,1) - sb.barplot(np.asarray(range(nS_points+1)) , np.cumsum(S_line), color='midnightblue') - plt.ylabel('cumulative match percentage') - plt.subplot(2,2,3) - sb.barplot(np.asarray(range(nT_points+1)) , np.cumsum(T_line), color='forestgreen') - plt.ylabel('cumulative match percentage') - plt.xlabel('pseudotime bin') - plt.subplot(2,2,2) - sb.barplot(np.asarray(range(nS_points+1)) , S_line, color='midnightblue') - plt.ylabel('match percentage') - plt.subplot(2,2,4) - sb.barplot(np.asarray(range(nT_points+1)) , T_line, color='forestgreen') - plt.ylabel('match percentage') - plt.xlabel('pseudotime bin') - # plt.show() - - if(self.write_file): - plt.savefig('match_stat_plot_across_all_alignments.pdf',bbox_inches = 'tight') - -# def plot_alignment_path_on_given_matrix(mat, paths, cmap='viridis',annot=True): -# fig,ax = plt.subplots(1,1, figsize=(7,7)) -# sb.heatmap(mat, square=True, cmap='viridis', ax=ax, cbar=True, annot=annot,fmt='g') -# for path in paths: -# path_x = [p[0]+0.5 for p in path] -# path_y = [p[1]+0.5 for p in path] -# ax.plot(path_y, path_x, color='black', linewidth=6) # path plot -# plt.xlabel("PAM (Reference)",fontweight='bold') -# plt.ylabel("LPS (Query)",fontweight='bold') -# ax.xaxis.tick_top() # x axis on top -# ax.xaxis.set_label_position('top') - - def get_matched_time_points(self, alignment_str): + +def get_matched_time_points(alignment_str): j = 0; i = 0 FLAG = False matched_points_S = [] @@ -414,160 +136,7 @@ def get_matched_time_points(self, alignment_str): prev_c = c assert(len(matched_points_S) == len(matched_points_T)) return matched_points_S, matched_points_T - - # computes simple DP alignment (using match score = pairwise total match count frequency) across all gene-level alignments - # gap score is taken as penalising 8% of the total number of tested genes => so that it controls the matching based on the number of - # total matches (i.e. it controls the degree of significant matching) - def compute_overall_alignment(self, aligner, plot=False, GAP_SCORE = None): - - if(GAP_SCORE==None): - GAP_SCORE= -len(aligner.gene_list)*0.08 - - mat = aligner.get_pairwise_match_count_mat() - if(plot): - sb.heatmap(mat, cmap='viridis', square=True) - - # DP matrix initialisation - opt_cost_M = [] - for i in range(mat.shape[0]): - opt_cost_M.append(np.repeat(0.0, mat.shape[1])) - opt_cost_M = np.matrix(opt_cost_M) - # backtracker matrix initialisation - tracker_M = [] - for i in range(mat.shape[0]): - tracker_M.append(np.repeat(0.0, mat.shape[1])) - tracker_M = np.matrix(tracker_M) - for i in range(1,mat.shape[0]): - tracker_M[i,0] = 2 - for j in range(1,mat.shape[1]): - tracker_M[0,j] = 1 - - # running DP - for j in range(1,mat.shape[1]): - for i in range(1,mat.shape[0]): - m_dir = opt_cost_M[i-1,j-1] + mat.loc[i,j] - d_dir = opt_cost_M[i,j-1] + GAP_SCORE - i_dir = opt_cost_M[i-1,j] + GAP_SCORE - # w_dir = opt_cost_M[i,j-1] + mat.loc[i,j] - # v_dir = opt_cost_M[i-1,j] + mat.loc[i,j] - - a = max([m_dir, d_dir, i_dir]) # ,w_dir, v_dir]) - if(a==d_dir): - opt = d_dir - dir_tracker = 1 - elif(a==i_dir): - opt =i_dir - dir_tracker = 2 - elif(a==m_dir): - opt = m_dir - dir_tracker = 0 - # elif(a==w_dir): - # opt = w_dir - # dir_tracker = 3 - # elif(a==v_dir): - # opt = v_dir - # dir_tracker = 4 - #if(i==1 and j==4): - # print(a, opt_cost_M[i-1,j-1], mat.loc[i,j], opt_cost_M[i,j-1] ,opt_cost_M[i-1,j] ) - - opt_cost_M[i,j] = opt - tracker_M[i,j] = dir_tracker - # print(tracker_M) - - # backtracking - i = mat.shape[0]-1 - j = mat.shape[1]-1 - alignment_str = '' - tracked_path = [] - while(True): - # print([i,j]) - tracked_path.append([i,j]) - if(tracker_M[i,j]==0): - alignment_str = 'M' + alignment_str - i = i-1 - j = j-1 - elif(tracker_M[i,j]==1): - alignment_str = 'D' + alignment_str - j = j-1 - elif(tracker_M[i,j]==2): - alignment_str = 'I' + alignment_str - i = i-1 - # elif(tracker_M[i,j]==3): - # alignment_str = 'W' + alignment_str - # j = j-1 - # elif(tracker_M[i,j]==4): - # alignment_str = 'V' + alignment_str - # i = i-1 - if(i==0 and j==0) : - break - tracked_path.append([0,0]) - return alignment_str, tracked_path#, opt_cost_M, tracker_M - - - - - -def plot_heatmaps(mat_ref,mat_query,pathway_name, IGS, cluster=False): - - if(cluster): - g=sb.clustermap(mat_ref, figsize=(0.4,0.4), col_cluster=False) - gene_order = g.dendrogram_row.reordered_ind - df = pd.DataFrame(g.data2d) - df.index = IGS.SETS[pathway_name][gene_order] - else: - df=mat_ref - plt.subplots(1,2,figsize=(8,12)) - max_val = np.max([np.max(mat_ref),np.max(mat_query)]) - min_val = np.min([np.min(mat_ref),np.min(mat_query)]) - plt.subplot(1,2,1) - ax=sb.heatmap(df, vmax=max_val,vmin=min_val, cbar_kws = dict(use_gridspec=False,location="top")) - plt.title('Reference') - ax.yaxis.set_label_position("left") - for tick in ax.get_yticklabels(): - tick.set_rotation(360) - plt.subplot(1,2,2) - if(cluster): - mat_query = mat_query.loc[IGS.SETS[pathway_name][gene_order]] - ax = sb.heatmap(mat_query,vmax=max_val, vmin=min_val,cbar_kws = dict(use_gridspec=False,location="top"), yticklabels=False) - plt.title('Query') - #plt.show() - - -# smoothened/interpolated mean trends + Z normalisation -def plot_mean_trend_heatmaps(pathway_name, IGS, aligner, cluster=False): - S_mat = [] - T_mat = [] - S_zmat = [] - T_zmat = [] - - for gene in IGS.SETS[pathway_name]: - - fS = pd.DataFrame([aligner.results_map[gene].S.mean_trend, np.repeat('Ref', len(aligner.results_map[gene].S.mean_trend))]).transpose() - fT = pd.DataFrame([aligner.results_map[gene].T.mean_trend, np.repeat('ATO', len(aligner.results_map[gene].T.mean_trend))]).transpose() - f = pd.concat([fS,fT]) - f[0] = np.asarray(f[0], dtype=np.float64) - from scipy.stats import zscore - f['z_normalised'] = zscore(f[0]) - S_mat.append(np.asarray(f[f[1]=='Ref'][0])) - T_mat.append(np.asarray(f[f[1]=='ATO'][0])) - S_zmat.append(np.asarray(f[f[1]=='Ref']['z_normalised'])) - T_zmat.append(np.asarray(f[f[1]=='ATO']['z_normalised'])) - S_mat = pd.DataFrame(S_mat) - T_mat = pd.DataFrame(T_mat) - S_zmat = pd.DataFrame(S_zmat) - T_zmat = pd.DataFrame(T_zmat) - - S_mat.index = IGS.SETS[pathway_name] - T_mat.index = IGS.SETS[pathway_name] - S_zmat.index = IGS.SETS[pathway_name] - T_zmat.index = IGS.SETS[pathway_name] - - print('Interpolated mean trends') - plot_heatmaps(S_mat, T_mat, pathway_name, IGS, cluster=cluster) - print('Z-normalised Interpolated mean trends') - return plot_heatmaps(S_zmat, T_zmat, pathway_name, IGS, cluster=cluster) - def plotTimeSeries(gene, aligner, plot_cells = False, plot_mean_trend= False): @@ -581,10 +150,14 @@ def plotTimeSeries(gene, aligner, plot_cells = False, plot_mean_trend= False): g = sb.scatterplot(x=aligner.query_time, y=np.asarray(aligner.query_mat[al_obj.gene]), alpha=0.7, color = 'midnightblue', legend=False,linewidth=0.3, s=20) plt.title('Query') plt.ylim([min_val-0.5,max_val+0.5]) + plt.xlabel('Pseudotime') + plt.ylabel('Gene expression') plt.subplot(1,3,3) g = sb.scatterplot(x=aligner.ref_time, y=np.asarray(aligner.ref_mat[al_obj.gene]), color = 'forestgreen', alpha=0.7, legend=False,linewidth=0.3,s=20 ) plt.title('Reference') plt.ylim([min_val-0.5,max_val+0.5]) + plt.xlabel('Pseudotime') + plt.ylabel('Gene expression') def plotTimeSeriesAlignment(gene, aligner): @@ -593,7 +166,7 @@ def plotTimeSeriesAlignment(gene, aligner): sb.scatterplot(x=al_obj.T.X, y=al_obj.T.Y, color = 'midnightblue' ,alpha=0.05, legend=False)#, label ='Query') al_obj.plot_mean_trends() plt.title(al_obj.gene) - plt.xlabel('pseudotime') + plt.xlabel('Pseudotime') plt.ylabel('Gene expression') plt.axis('off') @@ -642,14 +215,6 @@ def plot_alignment_path_on_given_matrix(mat, paths, cmap='viridis'): ax.xaxis.tick_top() # x axis on top ax.xaxis.set_label_position('top') -def plot_alignment_clustermap(): - - p = sb.clustermap(aligner.DistMat,cmap='viridis', figsize=(10,10)) - p.ax_heatmap.set_xticklabels(p.ax_heatmap.get_xmajorticklabels(), fontsize = 12) - p.ax_heatmap.set_yticklabels(p.ax_heatmap.get_ymajorticklabels(), fontsize = 12) - p.ax_row_dendrogram.set_visible(False) - - def plot_distmap_with_clusters(aligner, cmap=None, vmin = 0.0, vmax = 1.0, genes2highlight=None): godsnot_64 = [ @@ -731,8 +296,6 @@ def plot_distmap_with_clusters(aligner, cmap=None, vmin = 0.0, vmax = 1.0, genes ax.axis('off'); ax.set_xticks([]); ax.set_yticks([]); - - def resolve(regions): for i in range(len(regions)): x = list(regions[i]); x[1] = x[1]-1; regions[i] = x @@ -786,10 +349,71 @@ def plot_any_legend(text2color_map): ax.legend(handles=legend_patches, loc='center') ax.axis('off'); ax.set_xticks([]); ax.set_yticks([]); -def show_gene_alignment(gene, aligner, vs, cmap=None): - vs.visualize_gene_alignment(aligner.results_map[gene].alignment_str, cmap=cmap) +def show_gene_alignment(gene, aligner, adata_ref, adata_query, annotation_colname, cmap=None): + visualize_gene_alignment(aligner.results_map[gene].alignment_str, adata_ref, adata_query, annotation_colname, cmap=cmap) plotTimeSeries(gene, aligner, plot_cells=True) aligner.results_map[gene].alignment_str print(color_al_str(aligner.results_map[gene].alignment_str)) print('Optimal alignment cost:', round(aligner.results_map[gene].fwd_DP.opt_cost,3),'nits') print('Alignment similarity percentage:', aligner.results_map[gene].match_percentage,'%' ) + + +# smoothened/interpolated mean trends + Z normalisation +def plot_mean_trend_heatmaps(aligner, GENE_LIST, pathway_name, cluster=False, FIGSIZE=(14,7)): + S_mat = [] + T_mat = [] + S_zmat = [] + T_zmat = [] + + for gene in GENE_LIST: + + fS = pd.DataFrame([aligner.results_map[gene].S.mean_trend, np.repeat('Ref', len(aligner.results_map[gene].S.mean_trend))]).transpose() + fT = pd.DataFrame([aligner.results_map[gene].T.mean_trend, np.repeat('Organoid', len(aligner.results_map[gene].T.mean_trend))]).transpose() + f = pd.concat([fS,fT]) + f[0] = np.asarray(f[0], dtype=np.float64) + f['z_normalised'] = zscore(f[0]) + S_mat.append(np.asarray(f[f[1]=='Ref'][0])) + T_mat.append(np.asarray(f[f[1]=='Organoid'][0])) + S_zmat.append(np.asarray(f[f[1]=='Ref']['z_normalised'])) + T_zmat.append(np.asarray(f[f[1]=='Organoid']['z_normalised'])) + S_mat = pd.DataFrame(S_mat) + T_mat = pd.DataFrame(T_mat) + S_zmat = pd.DataFrame(S_zmat) + T_zmat = pd.DataFrame(T_zmat) + + S_mat.index = GENE_LIST + T_mat.index = GENE_LIST + S_zmat.index = GENE_LIST + T_zmat.index = GENE_LIST + + print('- Plotting z-normalised interpolated mean trends') + plot_heatmaps(S_zmat, T_zmat, GENE_LIST, pathway_name,cluster=cluster, FIGSIZE=FIGSIZE) + +def plot_heatmaps(mat_ref,mat_query,GENE_LIST, pathway_name, cluster=False, FIGSIZE=(14,7), write_file=False): + + if(cluster): + g=sb.clustermap(mat_ref, figsize=(0.4,0.4), col_cluster=False, cbar_pos=None) + gene_order = g.dendrogram_row.reordered_ind + df = pd.DataFrame(g.data2d) + df.index = GENE_LIST[gene_order] + else: + df=mat_ref + plt.close() + + plt.subplots(1,2) #8,14/7 ****************************************************** + max_val = np.max([np.max(mat_ref),np.max(mat_query)]) + min_val = np.min([np.min(mat_ref),np.min(mat_query)]) + plt.subplot(1,2,1) + ax=sb.heatmap(df, vmax=max_val,vmin=min_val, cbar_kws = dict(use_gridspec=False,location="top")) + plt.title('Reference') + ax.yaxis.set_label_position("left") + for tick in ax.get_yticklabels(): + tick.set_rotation(360) + plt.subplot(1,2,2) + if(cluster): + mat_query = mat_query.loc[GENE_LIST[gene_order]] + ax = sb.heatmap(mat_query,vmax=max_val, vmin=min_val,cbar_kws = dict(use_gridspec=False,location="top"), xticklabels=True, yticklabels=False) + plt.title('Query') + if(write_file): + plt.savefig(pathway_name+'_heatmap.png', bbox_inches='tight') + plt.show() \ No newline at end of file diff --git a/genes2genes/__init__.py b/genes2genes/__init__.py index 79c9e1d..7f0c4b2 100644 --- a/genes2genes/__init__.py +++ b/genes2genes/__init__.py @@ -1,14 +1,12 @@ -"""Aligning transcriptomic trajectories of single-cell reference and query systems""" -__version__ = "0.1.0" +"""A tool for aligning gene expression trajectories of single-cell reference and query systems""" +__version__ = "0.2.0" from . import AlignmentDistMan -from . import BatchAnalyser from . import ClusterUtils from . import Main -from . import MVG from . import MyFunctions -from . import OrgAlign -from . import PathwayAnalyserV2 -from . import SimulationExperimentAnalyser +from . import OrgAlign +from . import PathwayAnalyser from . import TimeSeriesPreprocessor -from . import VisualUtils +from . import Utils +from . import VisualUtils \ No newline at end of file diff --git a/genes2genes/__pycache__/AlignmentDistMan.cpython-38.pyc b/genes2genes/__pycache__/AlignmentDistMan.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe54a43deacb7478099ea671643eacfe0c8aee98 GIT binary patch literal 5917 zcmb7I&2JmW72lct;PO+HBJ0C48pm;)(lBgCO_KmsTPJ1hDvgw&a%5oJbhF~FC{pB- znqAtGu)IXM6lj3_0Xa~Y0x1gg)W)6t6`yTswb>^afwoy=(ufcIE@C_Ob_N|U4k+!nqSy-?L4%~GKaR{@gy;%LpgGLf zFmmQ|^rQQ_Zy+{&6Fpe6=spO#{d-#*@9eZ2H1+XEX$n6;h;D);EYVyo(MBv`r|f4O zISP7gBLrZ z7$egOLYYSe#Lak58qFXKYPvL{ptCD-=wFmUKGVbuqJP(K@AbOD?RM|ckr&?H3?921 zo>;xJ`pWJ5s28m&dYjsaRu#SdMyJ&byFnP&v1}V&c&mRTZP#sut=M%dXfv`wG-k0m zR$#~TQ|oeTEY_fv9e)-Qml4zgjgY_!|Gz)}>GBXeGt?74(oVIy_WsSG5wn=L^#sdG zd-bt?!hCa4!{QoiS^~+$Sk_+ChWbGrwTx43h=tE;&kv2XJkeLQ#H1PrA0p-PiyiR#;w#(r=Vi6R7~DS~EjD0925(DynW zX*7EMqnagXJJg=b*(t3_HN}~U2Hh!Tg6g0_r#dC5)Jy3}N~+Wx?e086^i2?reT{ML zso$3tH&}^XVHIxk5)|S1g>!1~EH&P&nWRUd*9}}(7F@U6^9LPDmt6N?;C0dxF^gW+ zAf^UJsw~v#Or%N(kpW_J+Y-b%XOP68p!DrkB$VE0+0I`fy%}*BDYWC16=A59nkPLe zk=CprmFUm{zWEnaR$omQD@aHl_F;g%X3Z4OBQNqG4p}dZ(aCkACMCpGDpEt9j9XUS zMz7l+#DSYhU4u}h09GyKWo9$+5_nIU&SWZkD2*}PHZY_O1ZnFh`jCUeG7eK5J(FP* z86%S+PAX%@#)wczU&0cow2_#UUc_Rp0GBZNCuGCWF&dPhfvVXGT72OO*C8=VjV-a3S&5!P3Ff zc`3?c1&mFpDHsq0UY~Iop%*p-Wso%^wK}6Of=80IWDW)jz>Y4?dPX67T1XK8Ei@I8 z&1=k1zbf{<#5kL01`Nqy;x&|=n>X!$4nfU(9xI~eeFmWXQ}Y>X@HYS)6rI+DW{jEs zh7^4=d#F5kA#^~gze#HXlK_0;;9JyI0GztEriHJk5^23m3XqwW(ny|eAuVItC+wh# z9@##R{`ic(CG`h*X4IpC>Fd*dsqvsE1?ZEJuc_IOYx+8j7PZuFrI2k(wjtTNWOd0N zO17;wPpo4706)xg@<7nw&;>L~hJQzV8$}m)ZPASbDCQt+9Sj1Td|8d?O~^*y0HNKh z!fvbh&9iI#3i3xS1|4`8g^6uOGEe@1 z+kuJ7Vpi^Uy?%|WW5+2f(LlJpo%V$VR2cp~n$BDW8_Qv{u&SRzOTT(*$DpB$n+WMIF+38Q^vG|x!9VPR(NnsC^?o>oLxXXG#5IiFT+vY z*Tk>l+{jkA3T`{EQYgz(#4EImlk!5lbPBg4Z2_*s1Ee9X@>AS{H{tOI7-kLJfsGf|dpDBHV<~Pf^$sK|kt5{L6F9g3*BLSQ7hCr&Q)BV%Q0l3mml z`Qmu6{a)v&38z;L^=xc(M)+tbx(cGf=;qjEwg6g%$(6Lymlb^pjwm#YFY)7-&+FL# z(DpeiBG;K5AfiT!N;!S64$@ynrkfpa*_!%xod^G}#~)z_08%{0janDxXSr^j3vm$8q7 zKvd%@4q1}all-!F!p9P8T0A?N8_%p=QYlEl!1!5vbd(%?5kEJYPjWCOTANEr!ClDW z#iW2)E>TDdr!Xh@a0Ml8Lz>L?Qc}#Mi|%4loJbcl=_2;mRCD@QQrLnK`WA9r7)ep( z43)cv1FC=nsko>OOG(MM$W?-DPL-~urK-ftN^DiKofIZ@#wE0ZT=Y?A+?pyORIU{^ z)v?tih1ayd|3MI*r@ja|R#r9L8H_111}UmCg$5j>O?k)Y!G<|q*hi~ zAg!pe7jy%s@@{JD88kS>ds}bNITAGD$XS@mxak1MZA?#U2dz2!h8H^)h&yPBpAcvE z?bd$KY3=oT{vXCT zZv;;c#$;t_$P8iniVvBgomgb*VmCJ17Nl}27pq(@u~qIvm9w+5&s7ami~wdfY9b!y z!^=w66u*Pd1i>w{Q<96UXuq03O z@-09~9sp7Ftwk+X^Em;`1F&uaa`tF`NpYEsU8P70xpGp5$y<=xOA5XP-xcPq;$mn} zJ%o8cJ0mcHPqGl(qdW|qd=8^&;IuE`NU8WXJ#qQPrjbS}Vo@-ZJ) zz%37FuvgV&1~;24!`Y;Yc4x6`m1Gt$Inz|Tg&seTt1(LHaSXL*KZZ|)otpui%x#MA zKn29>MAnFq$0hC&p}P%kx2i}9QMokLQxH8fWp6=F8CD=*JgGM<%eyTB4>^5hti<;jqGyS3U^bN zX96rTubwXYJzxRBi!4Sx3@Jq~K{rD2Lu&96{F0ss{Hc2;J+7I30rMAxofrhUCNhWa zT8$u5m9v3BG!h@6RD2i2$vsYIjNAP<@hDq^V!8&(84};8_Ly)Aje!uj>EY?oDTKjc zoTerBcbzJY7vnnft4%ypc7ha^FFY75_XO5U$wuzZ(b;68-=eF?2O(!j{cv+U`_auK z3xKF9atPewSxV&qItsXO>jdyr6f_m+DTgnx3R~b7_%bz8ujPR^dUUsK%LjG(7kX@-Mt|0q3z zl%s3)J-mcP>8V3o3I%Pn`bW2-z8AN=jyj&wgYPAjrbiZCt<{mI+J4qhq`j8DN_~$k XBw7F=lqY<~0$Y^@+qU!ejJ^CHp4^} zYIb*qYH%_p2gKlnBqE4EdQgPeD2fOH_3FLv_)GUs}+Vg|Oiq1JHEww?1%$6fWD zW(SWc)pWgjr{N$y_t45pySwJ-y4O7KJks8DJx4b?D=3NvFVwdl>eSm?uA`9^m$C-y4}NhZ$QX~{a-Rk9u=G2oX{;DM z(>G2U&p+I=kd8_E=AP}x`le_3mT&J^UaW6>ao^f8LG2xL#o9HmGIr5=3vr$O|@pXKy7bNQI>rO}M7)BLXo%JBLR=*g; z+D@ku4+^g1)w;_y7i~H#ZeTtdSneG`Om{CJapxsN&!E|>58k$liM*}VJ-=1&+_vak zsGX|oJ8!@9_-&6n9ml;>=xr;K7J6GW-7-_h*0+Kqjqcj|rsvc~#w>GRTdU*U7x@~u z2x6Ei(~hp2%$%v`kT*8hHZnNhGS_z_abT`XJ&W_k`Q0(Yx#JxBrf({1H-=S+VJ2gX z+eZd7xmsUa(X|E1fl`euIov5$uAGdEbCr1n5gK@qmy(56Nm54fO_XJK%wCS~>E%%`@2C8XpW_%R$;BYMuAOM6)g?Q~bZU)ILAcda zeR;XrX?j~hvfb>Q!=zSA1HPzXfgN|X+g3V>iLJx}^VuNLt}i)lH%Nq{r>Ph&QyWw! zf~0${?bLNANJjl@vThJtZhApn*E=hYo?u2Y?4uUg=bSA!3EPI zx6#!Y&}dG5ZN2TdLELRLA!%_aK+g^AHSlJoAUqbNDU`J~STZP_eDciM+Sw=G`RJJ^ z-tp8)eUxvHvvNDgMq`9bt_Nl_h&7r@A7h32(f4r#H8*8(i*4CW0 zB}e01IyHB#-fj=)d+)7HmOK|9FMO&FO=Vwlrr#y{ zHiV@Gp;<6mabdNRm++Fx{9UUTS5`0KrCRBJMv2NsA4yy(Txrkn6Vl6X_iU+?^-}#D z`j1gXDSOun860(Ueg+JF6l|M?XvX|}C|Z7wg4(hM&l@j*txX6zvFF5k(N@c-A5s$#J>JI#$vuuEuQ zsxzG$>~6_s&r8jL3e#_8 z{}f#Pb0ERQKA8(FwY*=_7AR>^2l$-Ya#u&gIEgM?u7(k_%@RZ^0|T8iZ3BO}&oc#~ zE0`r%rIa~qmE_7m{PZ(u^~&#jg1>J(@YtAfBGU|w)B7xuIQxLa0T2n-+dlYjPvSyw zJb@hF)#xY`B?$Wqd9<#83fEF}n1 z*Xm7pIhYE{*~GXUi45f_Lohd%{ygB>5B)SoR}z`ZL{cU-B!}`kC8<18(~>G6HNn)z zix^#!qw@-_iGER~RJtjWi04U_P#GfimtrjMm*hFI%crT>1&+$ktU&Yi4~h1` zSk&}N4Ep0LeCr&)l{y3Id91`?0FctIwPT_6V8%x^*KBVZH5dF6YxtACQM=kNf?gx< zO6@{3*N(_K8LcC=tA?pESFd($#7Wmgy8HTG95t>X7h#o4y*cPkqjl7uTQF+Zy=-l? za6P9CXtYC%+jEbEV08oz3P{!WgJ-wzc=X~r90?-6db><(cVF4LxZds(LZiv@vZGN} z9zq58m0z0v%;VqRe(`}7w8!s<4+whLzUtOn%a4Sx1MTW7ov!OO8)bCmx!dXGZpS<3 zHn*M2#BrBtttytjk!fOxOJz5&jfG65f+oi=#n(T6SxjaxTL3~9!PABY-Zq`VD zpFG|5sMs+l4d}F{s(X%Gh8o$M#o>(HSFV@a-3DO5R=K&8Q5kPX?-$CQ1LH4m-vu>S9s?AEkK^L` zR~T#gZQEmLi?eiJxpMuLOqG5Zv!{t6LU)1Ce9uDZ$N9(!79bK@qu6@e3sSmq8g^<8 zwqkRg6BneQ^@R_BAj1^=v85mdMCvd^#&UtuDtOqoHG6Jib zW8P=kG(s?jewyT7kicBoZ`KOqX03r*?m9?h%VH@&Mp!K?0ZNNj5UVD34It7+X$e;d zD~kYCu8V7kIoe_AEI89%>c&KW8- zXLvLCz9JR(ocNSJMluIO?HA-;*!U2D@hr^_{Laj@(VNCdQR+{hGhz0B(a*K!RNBu^ z8}Lo%RYt}VwH@^Jvbk|g)O~hw`^^I#{Nzb!fIY1zk&JS)1AXKSv_uGQ2}^aA3gKfpzdDR2wx$Z;_3_R_b@A&o&CaH!dT%pG+k$aT-{Hirb{!8 z5DA1@#L&vo|AXKj4uu@gVfci?UBq>xnKDET0LT)yrcC`Y^w6N09sKCeGXzBC?tTaa zQUJIL5Cbk1=y(Va!)DZ}A5(}wF@IO&V58{SEtIL4Lfnt?0SL;!#7k)gXCcD`C3p7_ zzE$ZL;hvqOWTyZD_ny*z43U^Lq$fdONci^>LOL6#g^MQ^g$ac_&j!i!P2Gh0e5{g! z?uBQi0TCK>>Mex+?DGh9kuu#_s_P(o5wPQ26vfDHjXWf};ysPU-)O)XWg;uPMDd6Q z8P69H=R}|zu^N2$hpi18P|A|=yn-tDeeVHTzDrhM9lJd+*Y_`G<^)l~W9vL6dWUjx#H5^;01Nz7c5~}X5 zItn4bB|fHUs4p~?w;IIj?e*1qnB#g|5Upe=zM@4U*VS2f05RKanp{Pai)Hh}D;fP; z$ki{9P`>mhKp4dJ#MV|bloQG*@Se?dH6b|{qx;sriPrB!`>*11OCSbpToD%QFf12+ zI9Mu{>t{h4T;kks#+ckA=ILH@`&Xa0lXCb z;2KWHP;rOh=h5mCFelqobHbe{V;Be$B0!Qru{wF+H3+|+UkM}W`=-Av`x&qE#_yv? zmwMWO=0~8yJZkFC;_31<$e1!`2IXC*y={szM=Xu(*do{hSc=jX!pz({01!c-I*v2| zmv2Q-D(xj(DJW&Aba>Y>fl>%*V|g+NRcHHYK&h-k7+S@9c`w7*1VrNVy#hpJ0#_0J zWqo-4SlpCfU~COgYSN!Xt=j>vN*GP9pGTlvHifXD6a9RcE1;$*PX!UJk3h`w-h_-6 z(SR_j=M@nVOaOFc*}Fh4N%aw7@JfD8AXgHQt3+@%0=Y0^K&~9%67Ttqwx8)w3*^F= z0&-2^i%{xM3FKP!v#l8gU?-4kRv~PUu|{qEs4qaSyM2b=GmByu^dDgb_Ms^4Gp|ST z%OH=hn5ZZ|PrUmVhFg93I#r z&!P{I2qGgA(jO%GH4p?N1-8-F>t81k2y`t{)zm=PM+Wv1tNaGZZ<4%BLLS#2CfOvR zBCqBLA|sNgKf(gy8~ss|eLO$kSallb_M~dS{q7G?**yV51x|cJEF<_vo^8-s!8`?v zk#NB{sKWr=0OMe9?TctXrjHWHpq7u}a+g8Cye9k*OT}JfFdS?}Y_em1(0D&Lgb_@8 z5#biWE8+g81=hW#XCq)0M+@w24tA4(XyOWcNYwMN7w}+&ZI8=dTxzizKlQF>o;*1= z8jeCn!qFT80nR8vxJ+0cAgpEU8TbGjmS{F}QRc1^)4zv`VKCNg?4Qr{3C{Y(b$x#c zQZIJg2FlpJwu}9dRY2x&Pj3WR4C;>!j7Es6dkU9JWg-wL*E2r=WxEi;pS^bj2yswA zTdQ?_$*g91uX$a>eZ8>OEQ$qQObv!5(C6rD4(qk?4kwKIS$0!pT&W&_Sp><9G4)M7+Jgoc1Fw@SI*L3Mx)CM zAY;gdBOBV-Lud}gYaIwQV?Puqghc$;b!=!RVWPHy(lI|i1C za~IQ3K|=66A0nZ7`0Utd0qT$bRzi+nW=I=ed7_`;Ze>494G6jV3#(HK=TAkpjOkZB7FEsc<>l4kU|1pHhf%!rIXlA%`zVwHrP+CO|w6E*4VDv z0YX5|EZPtg%+}_Hsd_=!Rr*JZ-q z*Nt-3_M^mYt4*+yZma@Q4L1eB6}1aZz}m63NA8GV4kbW18caDj(h=Af>RRbCmK~$I z7Prcw5k|ce86kFjn5jod#HbXQA`9s+kbI6r?8%c%y`6-rQCCRbOhWY;+M;1lO;dwv zP4X0pOTr*nB^Sb;yO^;^a`&JJQN1oUf?e+^8BLH9B}Bnhp?T>&D4Z-NSx>}=B59VqfjJt04`GljzzF2X+v1` z`*;Q9=M8V74y7zMkTSGRh@1{^FgIWmkex(7h1gX*@~#*0oTh)Vul66%Kc{a(Pb5Op zAAzjOGtUhmMwU`GwsO7%00*fDP_q5xvIVsKGD_ko`8GL*3& zOY9GD4x3_;)K5pUK3$s`re;xY)aFF$Cx2S0*MCN4_;ZqRmK0%sm`p|{5nJ}ek7d$P zD}i$ktInG9+VXCm3e0y&&?7LP5vB~xlfsvQ`K10U4(TsQzDiOi`5FkK*hGi>c9=)P z_IsM(66$dSg{PB7;^{vl6~G-Dg&p4_BVKZNDBSuD17rH@9Hhj~%@#Z&I!-jVudfD< z|1>lFRxZ+mc->%kciLt|fCnl_!@wDXXS@h98R^cT#sTf{9rQ6~JvdcdeyUjaQy>JB zxFX{liR49wwrO;g1p&DmtRK6Fhvp!XxLX@wUMZ!ZJ~b#E=2A$P4%FrT>_yZ~vZrC) z6mq9gm$~d0@0g>3XTUD4GBEIVsCwX|WETTN|24)On0M%J;jS6@(SIvBx9h*dee7EY zi0}*w#t`8FOIb0+bBWNG{$qA~1>xBm6Q0{gkB{v?a&{#lftS6<=)%;W!;1jn5+Ohf z%rk*`HjE%mjCKyQvwt{5R80=2j0F7+G6)%i+alu^BBuxN7vlg!tMCIn{EYC|^&|M} zjUp;WU~!@hx)r*Ah^SIPRB0fpG_>OYQNe}fMn7~bHqN0brvz91HTuQ@71`nc2T)~S6HtZfQ;^*^`1;pZn~(pDCeP=R+=%z> z$5gL@roM}sV~$D&8ACz&HXeR5K|yVvX@CxUemV@gdVm0}c;sB%>m_kK62Vtx;`EE{ zr2#ddV<8SU!~rK~;Ese2MzWQKJAj!ZGQU`crI; zoGXJ3x)>qYNG^uw#dw%iUqey`!I6W_&Kon)!cnvdkBvP~oocEyo zl^}ZNwK?i4b}L@Z>oe1jcWGW%5~D-EA3wlKfax^81*?ciGB>M3U>wm@Q4zN-R znPPhX1c{K9u#hmo@NX@~mWj=WIkx2R^e=X(Ms~EUK8`mE-p1bYa7XL7&p_sL+_`E2 zdjm^@fm#{XfMx{)Nh*ziDZJvy(BDG88SH{1I6%N)Mc!F&e=hL%dpFLCFAT30#@LPm zh7^t!T=K)&R*Bk)7xyp7s%CMJ#Hp=!fg@cVASX0|^h!bxkC0A#tXcrBYph0fYiIU7 zpWVZI*nT;M20@B(pc?i^^{3fB-8maIM)J(yo#N3AH!Q0JkB!wc(f#Z`vl<*`{CvIM z)EBsc84jp{BN}qD?{nk;Zm!YZmhgv78ab@M!w5z57E6B*WoO6SFFLS8{c$%C2^Jml zE&71}a3G6KXNX~h4NA2o%n)nYqB#an@m>Pk|8jB_Mv{lTaSBvI56~UJPzqa+a>tpe zW7q~tV;cg|?-RgxxFftL_jq(qM0cJTg=_cx$6-)2Jg&fgml^Lyq+Ai3eTBWm$rR~( z5cdNcd=w=alzf-{9w?zh^*<!%c=`qJ`Vluod|`zlgJq%4hd zZqa+fGE2`Qlr(4wQDw{VwcR+)qZ!NGew&QM?H|EUf`Rt|U_rn7pIJ(F)Bgk#n0E%| zU4eN#4CpK7(+R6ghOrh(UFc>>JpVm>d;z4IZ{xU@+o-QQweC`@I`LF=t~K;{+;FQ; z1fBYv$9Y!}6bYj5>7MBTL{#|c38Aw81C=JU2la&-26o#eV8RXHE8mcuz2it zwsLb2M?83aD>{X}j`gg&qCs(X&8aVSbw@U~^=*8`n@FIWj9?Nfk2~D$<`Vuxh^ZhE z-ep6l8kc_+l5^!a7|s7}IObyhallai$EG?F{cFXsXek8=RObh|(LYZd8)(=tGABGu z%P2d?!ZhIly{nmbg5(*JizFW-c^}DVNIpyQMUpR*NHpv_ObH?(oD)MVnpq}$MB~q} zbQUB?oxowa&9-wtR^D(Q1{p3f+^ON899qdGQkhgbb2xJ(^HAn!E}to+vYDf)Lgt8N R=6;kpmYT{O%3PhA{y$mBZ;Ai_ literal 0 HcmV?d00001 diff --git a/genes2genes/__pycache__/MVG.cpython-38.pyc b/genes2genes/__pycache__/MVG.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3dad7a71a354ad1e6a2814f80f7f6670675759fe GIT binary patch literal 3275 zcmZ`*&2Jn@74Pb=>G_O5Ch=ExH*kO*KH{u5EFdBh){a8}TZoVqv=+76Q{{0x-93}) z9>+#C5|UTk;Dk7jFG%}8>|y_cJ#efO5(nhluCyn_f#0hh8^=J8rs}===zjHkzxV1r zZ?_v3p1(Yw-~aWzW&NF$tB(Wa5&p%eXt>2$YK2wLLfhz0=7uh-#=OusW3@0aeLbw3 zz7aM|-^^NJd$zWd-3aGq@_aaN<`=>Ru`pfa_8DWAu%=yM9o{6F-{c zR=C9Lyn%k1H+c*F3UBic`c-~|&!J!A^LzpQZNA96=-=S%nbo^>j#F5>+D``gP>OQ~ zZM*N$;OuFkCw&utbLN3H@k&csr)_S8|{8JpBf_t2aAXVxT8 zfvTOdX$^86^MSH3-%w6*46UwQ+IjiD)rnu)hmC2IGo0C;w$3coQ2qn!<%d|^Rt&rC zNz?2!m|!O}mb^LJ*}?jHwR3s{yPIbBpJ`Mz_Z^(s**&KNos*w?bJ~+}J`mcEMKhSlOb8!ca<+!YcUjsjCU7Px4Be&RXCclC; zed622L$Zs%VH&-sv<}(SR*11Ba0Z^UcO`{i&(U7dPhd%{7>RxYxN3tq18jMZX?ARm zS2cIAT({HVAli@n@FUjtFjNp2mMy=A_W#@P9%eZhw`Tas*%~<{x-@d&h%`AE4*5N7F6e8P zecN^s2Cm&=3kZmgy~+Yx{s1$5qW#4~@@2yP9u%S*(T-AX7cIBpR|*Uyz%Gzi@bDUZ z#+exf8N1L-qLX*A-=N)dB=P1y*n*nhqyaL`03*Lc5@m)fq0@RA?I*G*qfsnN!!Hv# zOg_N6J_X~&Lq54A;UA$;^iu3{`w#ZS0p;8hCL6kKOgv>N4^(rOLCMkq^>Bk2$x}Yv z8_?}ZO}Rv*iTiRD^d!yZ?lR&LYXiJL!2J=NG0t8v`FkQCRyE3|3Qk!C4XEg-VBa?M zGdx4|x$$vQ$Zx|$lT8Ny`uK~#?(F@Xm`Ci=E~Qn&x(cmzTKNt&SzXfyOSY5F?wH~Yg3xm?g( zB#JTt#YM4=jTM<#hh5~?Ign|=nPazXxrO=H@)>!hhQD#$?;t4a5I$M4CLjwDYGT7H zpb$LcdKXBdO;mOwr3tkIqC9J*7F?rZXoxzGJFP#*17{;7*QzYetFBlAB0 z6qNz(q|~5lr~+Jb`?^&QKPT=~%xoZ=AH+y?@6!++p^EZ{Bt4?WgtM2Av(bqmo(Y=& z0{cp!8)g%qb?s@C4Y^2l=c&1(qCur!$9wSt@132WMOlFe7WoHwL9u|wvKv;w<{16E zc89In@+TO5jsH|9T_T-AQP#v!&J1lr)d8v=Di=qAom8zrLI?vlMitV;1$OQg!~^b5 z>fA$WMsxr{&!AfeuE7k~CaRj|1-lKPkMUuH>Hq?s&mS71_dH3|sSv(Qp%siR)G5e_ zQHgqjkouBN0nYB>IuHF#(_f?Y|3X^+m_G27nrk^^QI2!eBuF9`7$3dc$GRDjT}I$G ztOmhryN=6NUAcpKq%1nqc6H~!Sp{dU5b@qn=GrdybYLp8yE(Kxmrv;1Z==!v2z5zZ z;I`!myCf5mYCS!V7#gL+GEMeS#*l=SRqu?_QZH7od#!4dRETK1_jDV;48ox2=ACL oK7d!osrVt?k_8c+^q16nP|9q$Hmx9Vy-&#hACExXXt|kM`{J z?A*-G&g@qA-rbI8Wk*LPfFZ_|lUR!2$DX6eW+D)QAt;Cg*n#XIG3*40VJA-iktkLW z`4Yo13`YqVAjMgs7fh5gJhNq#_1!F+lD5j0+!N)5-0dixET<%$E<1A1l(V>7t=v|woHKPB`Ep*K zO>7m)g(z=wt5}|rc%qfxnl4XE+-}Wm&6Z~oPkO1=v8}oCyu{OpFO<gcgr=B&uf;ah^;Z5GO%J(8w^rn!SLaKz+ zv^Rs)3{v+YHR~Ni>KIa|J#*Dqn!C+8TUjzgJJ|8Icz>bOY2iM>wA=0kwe?PC^EM{Y zK(6yKRe!qGsJ9W%UD><5+pY$UPTNOvZmqFZTdk=^&3{4FwpFKE^Zky(lcLnvseP}HG>O^ z#WS&xR@ugt^b%M^8v{&+>8sUFtE2pG^28^9WO++&k8O4B6Q6u^Yh@`NIU!v?xE55LA!~fqoel{uNjyD+e_k}^p1OJ&w0(nTx7f~a?)PT%j52N6J7!L zj5p~OanE{F-ZbtxZ^oO&J?|a!=5U|z=Dh{n3*I8;@9N^&7Tw%+Xi^=t{hwW|%k zRBcs!zvR_68W?1$(k@j9BQ>woS#Q>=L8-G*s+3f1qo!)@Y7GUv(#~#8?UgtOrP_^c z1y}(bl>)#}$rm7cNSY5irD{c?WTn+Y3$03kR#AGl75L{$Qly&3G{nCQB18<&;PQV3 zLC@?NuFwvM%P@^@9Eh$>^q-z zUNt^pw5?Z>BBZV;g0)H_xNR}uIxkIyHYX`GS3`3xoREp-3b=lt!ig59(yvywYc6Iw zOl@5E+>0-iQ|odMXX>@U-KqrDt2NJ6wK{idI4K!%(p??TSPN&`owlwvRO*P#wea3* zXKQ;msJVK{qDtE}RYeag^;%d^yKVR4rKj7K)*i})nM*gSwQZt|FuAQ7?I28G97~zd z_G_&T#SNfNFyO{6XZ3hEwsKaFiE(ub55q)fHwewzVDqUtW@9@y+Ks?<&m!fYMqrpJ z`8)bmhKpjz|EK0(dc-7S|LhKp0YqC> z+EE-3)=Wlmz*#dD#R+feD4xD)nt->abC4kv2}C$6-TdY2*hJewsoL>_?(ruc_5ZO+bHB9G@b6maOw`aj z6g$pIZA&lvU|QO4b%OJyrL5xesCyBF4#wwleCi(N-N%6IrtW9(E(Toi@&fC+gALGc z^i5Me$WnbYERY~?_%QPQA_BuK;Gd8Fj@g|WUUC7&%T=2NqaWibT(aOqBmtHe3uF;_ zAhbQxv%JLXHWy!>B;`pGPe6k7lb)q+$WybK3epEo&unH;JG*GyGEqPIdLpWAHgnN) zAd=`gkhnabI4Cgh762reIGFTOQLN~t8S5p~SAl${hQKDwZFGPmgSx8K+TZ=k-{J4g z3vb$?c_TFU!lV$yFj;ABU#*1st%lcb)UO8Xt=-yELcJGbRgW`x0zsI4!LRLl9U|tp zC!ZZf*Gto(<*$c1V4wiRo9iRHH#A=j%}>Z44jrIPD-wUTcM^J$)|Z8rc_@%$xQprAaBmIXis z{A%iBK#W-_T&=ZQz<*;E$G|2m@cDq3`6wpApG9Dp$IauGZIm{RDE`klh&_uJ~r5BBx=0)SvH}=h7 zQu2yDkTuU?{=PMmmytZe_C!;c0+bG=a*IaY+9xWrno~U?iOnQpAc@FN2I=NBNNLi` zleO?BUr(CGPD{{OaYa3f5v#`-EHijNgAXwH5eCGq8X(pi?J!z@MhgUz?LPd4MGYE5 zImz`_rMelWyowJRSAVbkzH_ZGl zb`+OfU1V^Hfe@w3j6I7WObS8`=OPptkw_3Z(4CD=nDnoL9ESgJjPX=Ex*tbkhQKA-xYQxSzlU zOa>_I?UT(i3B#Kybsn&i+O*U)ux4g2xoFHA`xY3qIGzAB#c{$_8fAa>j%6W3;raQI z=R@^(&Ijq96*#)zCk!t$Yk1k4WC}BQl8`6aX6~jr@+^;Mwmh3S^sFGTom*Lh^fIAp z!LDk9i0y*n4%oP*L}*?K&6h*-6-AywRT+2)ZZl9^tEEYG0k<%<+in27L%RhTNho1x zdyOiPV7IXy+MBgKAE1X9cwwSdYllUFKX<@uX>^%{Ipjj-&^({oMYSa-%no>FRYF!c zKg`OLoVuF53$qO$iyc%TS7~HMGE<&f-QC)%sJ#Il3-4tM5teenAB+nIOO~deIh3hH z{QB7RsRz*ZU&ZB9#4;>L{)I6zILh*m%n++lT9X<2V?;d6PGAI>mq2sOn5z?Dj)Xj6 zez>8*Sp$*G8XJ~pzn=J%wGV_6q~r2J;9dp+N`k!-RN^J)jav!frZdefzy#=H(O5M0 z$#3T*Cm-dc1CYsH%1e3a*U7~*m5x(8mwSnWNxg18YZek)FS(!bl0ajbp0jDH{{%_0 z=plC&tshKzLXdkX0aTU&%Hln_^G!h%*_GQI{B3d)Y7xPkW|-b+)cr=cCL$m4h`rHh zwQheOzspP0>J=6h++bI0jh1R59TuWZExf(EDr72j{M~iZ1YfPOOgc(xcxba3(zUU= zYeEIX^rly1-7p`J33OYPJqlL#21IRec7T~{fUxye4LtjUY$ZiJN8;3WRp+`NCNPRH zQMuu(Px5hcmDndtuF1`8hvw!O6ord?-x&D09?+iu1+<`LHZWHU=1FVX)c=B6gtVsB zA`b8k82B^8Tl9`Cj)0$y=6MPDk;-#}HUVtp!acczlg1YPRAL|e(SAb2I#9Bk04Kz4 z0%8BQ`gU_bHPAjJ70R+Jk!nTT<|w2GNBW~+(0%; zh~yGxDlJYulu+s#x*D;!Y6o$p82Ajv@Y|$FG6Ny$mr%yfBQPNLI*|m_JuxzM6vGEK zMkZ_o@s`jErVA4$gkQ_67E;iT3q;2+S-@*FQiB zp9^GIKq@CkdMO|?;Pwx7lUA~sYz8qbAWy!jH8PvSM*Gw`Xr3%xo>vgKGs~%?7Sq4AK;Kvw{j>_r^W>r7K8zU!{K zL*PVc{!*UfWpGb>SucmXLk1uBj8-~BbNDV%F`LjGigFpc!#QsrB__NDZxQzbw2&uo zpY%?8_uyXiPI>p@KIN6X`*5H3PJ8#`KI2V63w9=)d~sBMtNeQ`?VVJO!POd+9*tPx zs3J`y^o^ojU6ydL`5r1&ps#M%OBKHa?FrP1LPbl^GE!;O008*4LGjXdML{(T{)b9U z4`nFjc7Pe7z>2hw&rvr6WfzDrDz-`}ezoKIB~aKBC}Ep#JF+jO^;&Qp@_C74@V9H# z#zq6WP2FP*fZ4zghg7Cy@PL+~{nYADnVJ{+9hKz6{huTstf;y#u@Zh2VG#xORN8GO zbVGw@5hjYB_A9b$F4@!bC5HX{HYA^+iHSIq^PH}J8N3u~ziTyjv!>d$*7*{w3N37B zDKN5`x&OI`DQx+VV(nwpOk^CG^M;q02Un#L@-XU#SOOI8B_RlufyN<9cqxdmCdFWc z5PA_p%)JGbjhEfG+LxmmIn?+vTFO%aw&-P7?C%Ncs7b3 z6JZSXyt$j{2nWxvbUPpV|NHAsJPY86>>H)CZ<|Zq_QgF|Pa4${OhI*Qqb=EYF@~i{ zfbdir2=k>tRcI~f(1cXl1zU@KT3WB{)%*rb3Y4xDxF2l2hp{)H;0nO-+J~x zc4y!EZx`xXUH6YK)Hme!pD%PDBJpYC?Qvxv9RkV{ay2?@osI6)0BMNAtMov3UN&?| zCoz6()-4}n36p|tmWrC=$hqz{pgK%2$dz!L8S?Uc>tl*rvOKpIFVWucsKBelI3-+Y zyA>4xmsNpHMWYc*Dj8NrHTT6Zfp){3t|Wa6`wu@J=0)Um1^4=4vE6ZNH=yE!$WsO4 z^A*(`VWy9aBE~4RiR9GJ6AHe_U_dy9W|I8=7A~JmtN~uAh|96^P>iHRSS>-L2U-fY zGbV$AFqS=pd?%?Y%JN10;N? z)~db=vj{|g+_(XyvL<6;5`3Ggg~@A;8U&MxRVn|>>N77R&1%raA&9G4+g_Nt2=aHC zI3!F5*#2#_pSf01ViNYlG$ftfE$C7*6oeXWuXZEMbk=?SScEh66uX!}X4u~?%S#!N zm4-(1CPzbVTs_TzaH43OP#}3JCcH6L?f1j zR(Ek6Ui({k#7;^n+`=UQ%hyD9Qc@ig6in=-2oqu%wHu&|u#(bZ3g?KJu}&X37!DQI zy-Gv*GUBDQo(vhdj9Er610PJw*cALa8u?$ie6rKA1y9@Y?mWchDu)#RxF7qlg&~D7E3ZpbLi~uMR~-lfn!oztOZkmQF(FDXmfc@P_&6_hFZ$qV^LD z#%AITQ3Fp7^C%w)~lTb zPzPw+Q5r58fQZD3z<{uDP>9Y#m>9@j5lbPfKLCv|*XqLRb@$g`3L&Mfl z-@DK13wrma zM$jJ}iuwlZZ7g(P9K;4}6H8=8X?nvjoqz@viwW7nYNnBm;y0lUm3siz zBoGs$pvq)W7)htvU)43#Pol-i9+uZjVDY~`RL-s&FmBrbqd{9hpwOl=H`!j-?^55A z7Qlo^3x7IPI&NY3?EtN)g%o()LCZww!{wM3FsxgvM!%2gO%tPS_un{3LkBXLm0${n zPMM{i?PXq1pyfCRfGatvH>~#eM%uO1^r2E|0Q?L+lA3mt5}kc3vF`-Odg*2nNH}`} zI^kLEN!MxO1=sCBnCyhg`rrLHdjd{gdqI`cX`{m&Mqg zOMUZDDM+7W=cFC>(Te(*J+hCM`od5jt>$?+;H*DqorP{Qy zs(0ukXYjMuXUQ?9xU-h>DATu+P16>A7b=y5@f56YevYo_`*usFl^{);@82 z83op$BmQmN%g&|Mwdbxp4d2SbF3fap9V(+XT;06OLI~Rv&VGfSCpQ6D^e6jrL=n`e!J%G@*GNM>^E-Zj>R=-*A<_?UV~I!ClpT z_PLiYt+=ae7quGxB->BH*`T(yttnd;T8J$O{peUZ1&0JYC}$$}$z2^p*TyN^l$I^x znStWqy(DJ8fHxQxITsjh5=F$O&ACL@v<&O}DY@IHp(X)tE9_YMUcW-@!A4^ z1JYPD=Z?xtPXlcsGr)=8L}XDKa_D}ogPDZD+^kJ*+5r@!ufMXDhsVV6Lq^KuZ4);jNAg1Uax|z+1T6yu)4zM+(#flchaSg97@*c2OGiBL!Pz z2D>pmu{dkEm=r|P1xDkm{V+RfFXWJ3~%+Gc5FT(bM2+dHU(N)bp|DtLFbc!S-_4^z;yR;WsMgkH}`J z{x!QX>Rc)|p>m!crhWgzKVT#IzF#3d$IQwhpqDOzhi}*6DNZr>=Lz&|9Y!_)S`k;l zTA+FffOi^7rs*R9?ocb2SE>oN5&g)C6YMA}`~MRGl)&0TX{xVLm$d1qvyi-Ctt2a_ zjpSmRgiLtT{OM&XrQw&803!gEbKrP(Aijs>;cr=S91It|b!<8%f82P{coqJH2RSi% zC$wub(&E@mKy?eE2%s-n0dhc*z>4@J8Luaeou6r6*3atze76uJ11EkD-U>7 zfy1uuD!2m%qRf4$Z-Keq2(Fg8S?Z#BECI5c5Dkc%K_{rRcIk=J2d#lt_P){|{oLna zia(3T@RNqqc_V58s~={n+q>8r@cB_(IT1emRuo~J;*z)QACO++9E{b3~ZEKLFUckWKK9}DPkihp z>)@x|VlNH<4iQw*!nBtHmp((g(k%=4*Le~SWypz62zc2|SerJ0{ZV$7_?f-LJqLP_ zZXQFsxw=I=D#8L}k#sNzB`Ho~(C#BHzjFr*lAfd=OR$LJJLqe$7#x@W%Q*zdM6=*L z%~?6lqi083UhyUa8K|k;D?xFowyk+K^_rr$UPepw$N}u0#aHgl-lhfG3_pPWJ zKq0iBg~=zpSHMOeVrXnNS{2m@_J%y!!s)}tc=*LFZ&YEhpi8qh^E|;86VTv9T;{hg zl${E6vn;rF860P(;zRF40bQ#S#NE!)JmXhTS?wd}E?`X@PVle?yi5*?O?OCLF1q4n4@3OgIHjI%e~$NWk3bAcHg_e*i#IiMgUjGPF0lUSpQCaIkf3ZtG>zS|A@~s!lXdyQd7Ui%zwh* zUov=^j}w3gI5Bk;yqJLHf%Y%**-ZwoF(6`Bn+(n~_-hOnTuSOfPO)P38*GdS`PF(~Dk!w~P6rv&DP zHioah%NEUg*;Zim%k~b!(DF85J|Q>%N9_I>AmxVO<97&2)WB2K#D5mt@f`iy&C_65 z3P{i44{jYiIF(SRplvK7w}8B9^5S4^vR3!RU0`rzM-XcJG~gN%TIWQ3f6Ia=ADz(2 zF_TZ$%4-~}5WN6!muCq(V|xiYya$;edys<*7IamR+Tn8t)&fig9ClO4A-)0~ar5x1 zoak9~;BxKLCtgX;%G)zf!?sze*GhdrhbLD4EafcCO)PaM&J1n-^WEf?-YdPA^=>8V zc+;X)Be?3T-$iPvpmMl{r#Qb=eR|QY=ywcwSzo|CJjp_26|cj;g!ZTERn0hvElPk; zU?mR*7M_{lhT#LO)!hP|MZfQ0j;3>!LSsuLr?fO}{H3k?7vIf~uSKrsyk7Z04ILjcB! z%cS5NVJIBNi?%#xTVV4}+-S`8mxMJz|97S*u$h&*M{SG_Z$g zv+FMZC8ueqk&ziAm)Te8eiRA1&T{KS-1XxSN6AXj;!G=7A%F~kIfH=7JgC_s2_nvM zogMLG*M4%D0kazBVvue?SJ=h$qW6E+m0ax`07t(#yP$3iWc zH%Vzmu?Llwo~P7co)ivz8=CW$fs?Mq8;lv{>A`?`bP?_?+D~?TcJ3KLBe9ShS4nYl z^sckH@5BV$efTJF8oVz$d>6Mdf|$|Xz*U{=)|W1!q5xwqObZ~%bQ3!T4MSjzQ@1FO zSu5QSKBwK*I?B^dG4z$$fWa4X7m-`3Q`K@95Bs({aO4@-Sg<1no&<`HnHdEKGr z2!OGp6ww8F(Nh%85SThG-2~vv;rIYkG7rOGI-lQ%y#jJdoDa>BlCFK za_->T)6o6kz+f*;9v)K4<08h3puBT3fJYf>WpP0lbPFX|1F5{^V3Si|C+`obr2snt zpHAXX7fvlx+>cOYDl)JEO7%&UFwv|9WbvvHJ(s5R;mpFt7u;PO(icM2xS=7Ryp^U- z|AR3CeV7!B>=0{~1jF{&quM>0G;ch*%T(aoWx7q8;ACmENGwnvObSXL_=C2 zK#;y03Hq1AtSU?qP_{LP7y#tZKD%psBm0clx{tAMAOSEK7lO}6Lac%>BJjMXwHR%GFuxp(m^`AJ$b<=vQp6OvG$a%+S+7estcG*VYOBm?AiPJ5I=z;J=@@6f* z@JI@^6Qse|*;tfWv}<5FG^a`qbXK_3?n6f*zB@N{d1YyK%q?4ei5(Z0>;uN=ZX(uU zQNN4EQmb#i!w1+hhT`crRb(*3V3q;DEuv_^RqtjnKtU3lW8E(!_y#UH)&T#qf^*V2 z;bfhXQ*fHjvFxJrZpU#__-F8+lJZrQANgT6jHht<41O@LZGgh0x{hYLFNyE%5#Mjn zPKlJyae}C{5r5-DT$=lcH2w$!9HV)*ztR611N$2P80RHel6y>ebmB_#>X3>NS_6Ng zggVV>Bx|Am7J?YROYUHDWnQ^ zfjJT-WtSV0`Y2ePc|Hs0j(9k6XEw^Ehw<|`PKa&ir52!P8F|WsinDm)Y+}pkQo007 zqhw-dmTx@z37L$g%u1+@T5p>Vzir~Y$Y(@<5t!L<6y?8MyEZz8Vw%J(uh z3Jsz@?X<4dUgF5e+zZcb6pgV_fQGQx2#!91(tTd>JYwLEAx*$YoK&+s>hhW#Q1hs? zOxyxF=aD{O*f^j3b)0Vj>&E$1R>t`Nhkib_@NvG4V-<0Ex`M~~XcL&7-j{eQaXez! zGW5AY3|u~9!1i>~Kn#olaDIM2g)5B<>OLG)%kO7#;e#*xd0Z2qJOx~nxZwS^KjlsA zPve?Fd=~d(y$M(g(1V-irU7&C3gf*AtXKgvVX8mGdclR>n*_J4WBEm60cn8rT(1Bt z1K5?`9NhCK7b2gY-XwDQAI+6C>;O_4td1jXIK63{A$QQ)bZ;h#&Ge?dVsCDiXM>U3 zk0T!?GX1iQ&GcsBzETiMd5rbFg087czTdzI;}K0^MEEiTYz{cpE+f*h;St@v?>Xsn zZltHvNA{Fs8T4;zsDD#;?B9$yD8O5)haO4)bZoePN6&aXr+MjLerQfdXEN^V@Jw?4 zW`_DVbI1P8O8?#h-+|*4(mx#=?%&j1*B&1p!kT4d{<5-q)84Unm_M#+7Ka}M9*$Vm zIrPd_7lBpq@9f0&C724sop$Xs$la%oq6PPc2eyH);;P;59R@sSa`$U;Zc`~ z-ZZo#3&>fN6jsq&MCy2N(K|kG^ro>Zd7p{yjPs*NFnqX$9;|upWUq*@*qcH))q|@K zJh|a$d@Rl(;dD$D8u6NR{odcI+g3h=GLr&#!ln6`3$FebPz4X4Jv zmsE-62|#~{h+*X+TSk8?3%(J5gfn{l8W#=nlt7iyE*IR4RFuW!UDXX z@RZNUVKX20lLcMpopX;WiM*^Wy%5U`S`=;_e0zfSnNyI227kxup1M0i96@~4Fj8SH z{QOrjH4u-?SVRJ!C`v4R&xIZ(FfilX!%xYl@@$Aa0jL%QqbDIQQOvQBrYQ70jGs*7 zTQ1OFhzT893?P2tG~%*R88}}q(_oHzzlZt`T&ljn_HaHTZV%4IQ95oHA2&q#Z}bxW zH*l;I?uSRbCo0?ieYB2VfuH?A)c&8ae`ud4CGcK6!Fi`BC+=NJoJ37^yoYlg{_#7$ zB`V);<1KVDHPv>%)wR)i!LyD2%*6HHhk6(bo_(!fPsTr(PdxjDe!X}G;LN#m3bGbX zZ)h)lw&8y(NPJG)Ol&%&xmwi-Spo61c<4A1!mx%~{4AXofB(K%5pq0l#$3v8a95)X zbgo&DJe>BDJgOllMZrjA#M%Hu~TXkhA{q z3qIVaKLIqz!G}qqB)#TVH7@-dZ2qSal(X_p4wts`a#lXfA@OlOh5E_gB|_v_W5t_R zzdDQ%p!9x!LW z-#vW?)HO6&Bj}41l*T?mUtJ6YAj6H_*UQH!4EGc+Y=V7k0robws0!Zyo_p><0 zNWaXah!Q*^r4_J)yjUq~50#75vZ#mSoJ}6^f*^ve!!41*kxqV}M(5;d%N{SMH_D(U z{D+21MRkWtNsg4_X^fF~_R~?V1&jbF0m$_LpC^>jIQ>{Aj?j z=mhvqY3h@J5OP$nQ|9Umv?%Q@eX+g>MVTp4*NGkLezXlPD=O$5Cx*!v>mDE8C+qo z!r)^F!sNwT3wqb@GM>8LXjMBm!o;S(3{S-XpR$9Wdmj@GsR6&m`Y#e=h^}-D+g#w& zrx^Sz1p1p^e(31d@hMhk(|4Es%GNggY4{QzaMNBf#7+typf(1E9o5(QtC#Q;-jTE) zhNd5yL0NyE2q;N^*KUy3AGa%m!yFiL=(Pk}Ort8JEl2%<{UC|);+EE(+14y%ADyXB zvOk|<@M#1%F(@;3gpEOU(X;skFxo{#3_3HCX&d~-#hh4&PUH`m4_FTg{>fWsP5fd1 z!QVOuOAx-EC-|@X;22K2%Llbq;M`NdYX={L8L~Q%y;mX-lvsqQS9&{ylSPUYT{qSB z9{@g+!?3U+*iO=!#-*Ge6;=My{aW6zs~vnv`s-|qCP|#6=fUZ7(Yfi=He9AFzN}=J z#0L#fmXLw(Mb#?n9o5!8PqKiqZv5s$6GX|0)ckNh( zsm6z2_~r0(@t4Du&GtkhBjBswXCTl|PE!oN;wPouG{}LfUt}QC(L7^cX22;pYHmX3 pN&U@kiVB8LB{b>E@IU?%9q5eE8vS)TrfjQ_F8qWsi;MsC{{ggsF~$bV)(NC91MHy;Wes~x zMzW4Ql9%LyY`)-~x?Ge?kTfL!mc^G(p_jG0K5g?6V6S0*iCI3xV%QO{yeXe}phV?W zY>id?8mw7iQ!YVG{7TS{b59DoZ~VPr%2#o|$|{n06(_pID!;|>ve)iIHI1!Pfi(_o zU<%#cwV_dYS!nB(EdebmPwYS1MQKhFjPBj&|$%{e$gs-Zfc~m$yd;ww0@X zYO;Nmz<^|sJyC;fwPukljczdGcof^FtZZ6_J&tkCa4@+Q>0!SxFGE`{dJ zATQ2BeG|;gawMQY8X{p=5pY;fR{NBG2(jNjC1+1>(Lz?n!3ASUoAt}5+RW@0mP_k( zlZ|(6T-r&xMgA;3psh4l@m)!dF|o!qaocZ|6ajY@2ga*Y0RD#Ms#HsO*0vVF`Yiv?8AzDK1 zB{wy?b0}I&MtCEr+>I8SL{*L6lyyRMQm>+_-V=yaqwVr=JLYy#tEb~kt1{{9RJt5J zC2#xxz54sN`;WibjJaMT9uwpD%{lScW*nh{RemAFR4|(2>>})9<&cdVokgnU#1zS} zkZNG)((&u$aat-zpBy^6(?E9QCMgYayGe?uB04vvq6U!VDb9*sVs;!FP44psYhYdWSH*Sj zrdSgC22S4U4G{!X53baH06_`sV5iOscmTWWq)(XMGHj31m%>=Mwb# z%1Z*QeiFFT5ZxK4lb}y&QKQ7U$G*EUsa4@oWa?EwnvNQn3saoYpW|t-a$og4fF903 zpW}CHtN+uFG)_15GR%D#H_wr@ixdu>Js!KJX={)2tXJrvt?4oyjs{9!!zVUuLzWI~ zpwqmsY?zKlDwj6s4T{v*5U8c-`9VG&jt=6e(`-8|Mg<@)tv^(0ZlnEFXKCJ5=Uv1$ zW@_)j1k3MPp1%uNi`u$Q(G&5oEQU@kGU+aiDk)i+AiyHN%$L28hkOa(HTiX+Kfzhd zx8sGGuOEVS32U$5^F*p%Iv!v*2P0)`&W*cw+<(CJR+r4Pv?lzWwhOkMx4GzZr1^ND XzMz&?QdvYi63sBGMSc`SL8J9QTa?Ng literal 0 HcmV?d00001 diff --git a/genes2genes/__pycache__/OrgAlign.cpython-38.pyc b/genes2genes/__pycache__/OrgAlign.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..584aabab4ac58b1c0b3a27d59370fe65e582ba7f GIT binary patch literal 19348 zcmcIsYm6M(Rj#V8e)r7uyk~63+w1l2y1ic8>-8oOHoM-$IGGSTo7l0VR+dV8s%G4? zJ>4@^?HPO2O(0$pA+pGT6dvNoSVDs3vAhFB0g1;C2nc=wqQs97Kw%N$^$UfS!uOqf ztGlXZX7hkdtIoaW-dnd$-Fxo2=W%O3GB#E+@b{UYe`w{?pEittAQJz}0J(rG_&xyF zaLu;SP^sB41-BX*N$o~f(%gE!Q7{eHax?AXdZ|&8yp4RhQ8o?F*sXZR+SmirG~R2t zSvU8J;pXmHjd5Ukw*agFYyw!(EdeV5tGeb@qh5XkG#FRjFahgkSiXGs((TT2uhs1Y zKE_*@H^_O{1{Lcb%0atJqz%%$34a+t07NGbQqhY(Yn?(xB z+&t2pTX2g=^KOYWgw;!}E$?ct+4C+pmv6N?-p&_FHT+$>)e36M?Pd_v_;@eyI=!IQ ztOa-1*S((KTCO!aZVhzZY~F0OTR{&cE3Hne=hXuCR$Euy)wWx`TeW9u&30?GvyM(w zbx=ET`P7Y5*H0~-x>&Et6MNm7>!Ay;*7KX4pv93GJ)nnysNM8>+n(2{dFZ>g(QNs( z?uzd3wCZFzi(!cVa>#JT--9c7Cq}Ao_6-NK-Z$2a1_OJGZ&tID^fjleB|ErG5o*^*j; z+X7nxU6pT1t-x)8TLN8G?y}Yu*b=xcaLWy?&PJGNcUMDuNyu9g@|GNux8#t#C5Pmp z9Au!4K$3@Yfg}>`K?Z6Cl04K3B$4O`WS~|c$wPaABo8e?3~B|EFw_ccx%FIV2VQ%{ zFJMIdd4dIiaO}kofAD=v?_Rp*yyu0h-yNDSzh?N)qvEU2p1IXs_s*I;-Sr#J3~1;?R)a-13T6>I<)nVdOp=F~4! zF!j$G=CZb~Em*phb<~b(b9U9#ztJ7B+^9Cn?JTCH?dgc7Wo`q}AM-XY5nP{w;OaqW z-NP1x6e5HIX3e^n5gcNKf@aOuI0YAlR!-y0Q)m@5PC-VYRZ=)|?9ra5h{*{o2rSJT z`iXg+7IezHDdsA=M~Xg*xp9rN4&rF7s&R_KNnBGJXIqM{8HFRq7US~Ns=z6MGkYi) z*A%dK3cESoBZT1EJgxYq>h5{gu;=dI6Wp$JudJufhYD*a7a_Y z_o?GLse6QIU3&u8DUI{}p2T%p;}ErLXKIH`XM*ExkFcM{hrdFp9_X9b>96o?S4 zUD=tstoQfs?cQp))meRKZ728Q`g-kMOt*EqrPG#9T}fx&wa?$q)Q$am+}>6_uIqG5 zr!Aejl1^RU@7}E~eXmZpb=uOYE9unIe)qOo`d*!G>vT(}uB1~J_q*41^}RaX*6Ef` zTTJUuhuICk)#>?!6AaT6C5UZ2f^D2 zzJuTh!FLioMDR|68o_rF90hpI^q*y(GF^cS%)J|68CP%xz<@B0#AekzCSvtl$9LxT z2ws_kJyI94R!QBTMuy6lQWqUl@=&Z%k`ZVN%nHm2%nK|CED9_MEDNj%91}P$a6%xA z0rWX3P}DtVTJkdjX9dm)oENwtE77{*=Rsk8QE`zlknoPMjc|-Gi|~oCi1384i1384 zgK&aOy>Nm|z0A5yxXiUov&^teuFR`UsmvxfsMIuu(@x;^oMyZ2?0A0HS?&hC;6B;~ z?B0lWTQe8!s9z_yb9~R#?el=dM3O%P9Lpl)9naGNU*xVe=E+|tSeZc~*3x396mIN4+4s$zf4^g$8q5>N!&0~Eoc z07cGZ9~3zYeNY6O0~9${9~8kZ0Y%PK9~43Vr&Tb~2SrY%PfMWCr(H1C2Sv_&9~3!u z9~8l207bA(KoP77Y)VeC_y24y=>BqAo33d*tMP|5p4a#>jTbcjh{lT=r&aMV=;UUK zEz_Y@aSSOpQ*5|mRiMq#s)($B$f}5}fXJ$dwEW1bh^&Ihs)($D)94=?q{A%~dFL$B zQ%JdmVw-i&A;tFVC~LvIQa`UmyGU>m-Xh;cw2Mp?5i9aj1S@$~2~m-s6q8DR)|eBS zFXCS0yNGs?`6AFozKie{sV)NjVcvF(;1Pn6SQv?l(Q=nXK3em#(q+xdA|I{!k>n7e zA>ukS@a`Lm_W0ZzuH2ej?*0V9Zg<&n{1-SL z++F>P1m8_C8WhQ0U>SWU9P)reSupK<;e&-I3lB;8GKvy^6tEX?1s?-QIG*Ok+V$amS{GN9@3rBUa%(G*lTA5{ zZU-oX>j{o0r0A);(QBYHnsd))qQaZkxjft>Cn7MTIg|Kq7x%~2W({~;Jc1Q6Rxh$-Tt$h)0 zktW@RUJy=737f;gxBN2JU^q5(3pzp14{f%n=atO3j9z@Ig8nu3S6=Zw&)H~Vc7t$M zREv?HP-z+A7^eSL>n3dc7_f+^K-y z_P5MARupD3p{Fp0t57Z+1x?9Y0y6?_ft*lGm21+Hx@f$#E2@aGTj&*cOJdJML!37r zzyXJ;gpzV#jdfs6=D-^Jz?!VC;WJ|tY@P#MRCVAm>hf>!^8E8wnajPz7!s#E5n z=w}_V#^F7ZpS3eXmPxaGLMye=7v+Jl&g;ksa~#MzJ?f<}EHu;f2Vk#(C8Z-)nt>(@ zrhu;>zg0|wX>7imQ@2|{Ib_3#r4l&qN#J0g1P-Pd;hU!tlSjc(FNNdrh3s5T;Y%s} zVhX>J!ms-`(XGs!Op*jTLVLNpakp+Od9Z>8VaD%nhvr&(#;&98Yq(-dTl{sHFWa2L zFESl>W{2|>r#~@KT;D^XeGU7ZYYt4TQxhSB3_=DF)q@N?aS+FArVG#6JqxS)lLH8G z<%xs9wO6cX4dT(YN|Ph&W+~8BjZE)BE%T|Cg#-I6MlCmwYnnXo5}k)l*Rm83}lKWh5XjR4^%;2N{VyZDl0ji;emr7s*Iug7Zl?7C|X<4RO%%Zl`_M>2y1k z@yoZ~pcH)rYtpFCDv?2vu7rH1)mibWjrywu4#671#|d60;LhN`L~sirtSooeH*WVl zXW47Fm9=!0m0}rD>+rvqphZA?s9sQ8M)pO-Juf#TSyIVGzr$7(QjHva9?nIbE~SOl zN71H=_fvi@%Z3hOXJ#Ox4(aQNmB6I#D9RSBl6lHHfdIrI>qxp39|tMxv4U_Igzq*BqSMk0DIdH!@n-#&F}1%bAG>SpCW(6vw9 zoYaA)ed^|g9=fZBW0s&tMKp%vm8e@xj#Z*g#XyGRl&Cu#(WAH_QHQ7r#)pnG&=aFK zEB6%F-X8YhJ>>J@m?g$T zMeBy+m8etvIvlG+o#ON1I3?=T`WTK;q7Lgy^LcW7Vtlt}e4;wVugUR=>J*& z>mxZnQ61J5=pmmc$0y=R@SUP(AD(XPFV-FIB-lH(KAsr8W@pQsLTOwd#AJ>JjX zGvPMR@1E#a9+-nMl$201-Y=nK93>MdDWjy?FQWuUJ_eI0L8KXu^a4t7$YU^#k|IiG z`bCt$feV*9?Nb>G(MGE3&HEX_=$aQ-LjU{$y0Xd2*Aq*!T0nzvBJ)HrR`CS;A?g zIf*mu5U*$GA&xWrFvM50;#N9MO-p)D)S_kj?sV_zrzfb3_pVLD)z{zq_d7GMLzfe8 z(wVe5*Ms&UejKHs(I{8?R=}JnU5_v)N*g`LN@+XIoakWUlNQbGX_g%)I6)vzXmL@W zCn`a(lgx>;T0&LN5Ph6Lywl>l7WeksS@sx#_^zK~PF&tp0K*Kth+!6fLGO;zC`FkR zJyJA6Q6UTP7mD|(KBvso+w6pDpeTf4QPfuY%o-CzEtV+s!&JdiBiHlRH=K2yZtHYQ zr>;(0jcl*mL$e|XbXx8mWTnI%mAI;;H7e0}ZI4Q}Rhz9*$yQ2L4QJugbF@bkAw7MB z?=qFwzs4&X8QYpO=b(7PgKZvztu4PdbNQO7e{b5QBc}enY3F14LM$IEFBD?;#>&5u zd*QK$BQJH0yfm33=7Ri=K!qOq1l}xUZTJbahv3aZZfB87^uBMtO!Fi9W7vW&bJ42b zk1>H$7SZ3Ul(k%okR)QC@q`WYTthso!#q8LHrl|=EBd63Hq3L6v(bk6VlTU!Lm1HR zXCKhMMHo^_iUX))WMTgkvvn7sn`9KB@5UonJQ~<>uLXkyg%Eyy}ohRZz5lxSF3lpaQzLG_Bh>t@df9q zWd8D=bG1?My%mRE@u==vRCldW+Pv-gccr#bUUJwHp&75;m=#2P>8r8gu_c6Sc!z4W zs8QW6UNxVr8ug3{Ap6`gt>li^S?;)!3)-Tw`i)%h2xJZnqngs)oa+;~kBK zkqnD>4Pkar%^vf&hcW#dN)QTn1Lm)7RSY`f1!5)2&#IU+D8rShnOLEaEGMj;DpzJS zjWpVq$q&&V7bB-!79`~pn2ZYvON)_|K?V#-r=1b>$bN8pnJ`sPx!Hle`RBTX#_Mx# zE>;3JivOaUkChZqa?>rwN=hjCbGJOOJ0Fep{3$896zlmXc~|GWE@98Vz@9tDbP0R@ z0-xTg>Jrs6DC|Qafm@M_qY{hWeARr?{~o+*XkHG@8=-mK_XWG?Z%Vp^$j&w7>RDm% z#&pc>>1^NtSI_gq9Oie+Y3Xz=ED0{>xYIkrgfr|wT<`zGndEk9$PpbI-tr1UD5)!x z!clKqJJX!aRDTJ-1$`D20y3cuOtqYVp?rn^0?-;*Qld}fz(Fov*5C`M%VEGwtxM%& zc{sL*<;7UO6y>40+I@Ss+^h8Md+=27h`9JPUIF{$I&Y_s57YExV4EB~X7E1(+r>2I zI){L93SY-G=Q{8Ux}3hxgAH<>p8-w7XCNPccATa-t;2UXh$eVC4@vLODXM^-PF0G(_?`gGxR%L1RW!Nx zuu3r0E|vRIG|pvI)JtlU8hRSTii)JVh>P}aH74-626j1!MI8^#YXvdPs3j&M8XV+8Yx{O5?t#Q9E{#I`L)BE^#H*b7>kha z!*VKKQMrXf2~m4G@}lw#4k<+CbYMm0HV!RBW&>R=KKZlFfX;1@T!1kpD zOtHC(kH!$!Hoy5f*&ajM~C&>M)I72DwmxqHD-YUNeJN<0CyWDIC&mj}txno^?;{Qgp{~0{jf0f`q zK~gCAKaMglL7uhr?SJA*@`;zf{ZCv?KJmu4|B1JfPrROdVg}<*ju7Uf7o%tNpz_>< zj6tyZ1j086-f$zPtikq;oXc}P1< zh4wm}CpdE?&?PjN!-=bE*+n;n6W3A&(^p5~$YHDLYx@*h&8I@Ec@7^r_3#O0a$$wj z`>b$UGXZr2Lpv(yt|-{*n4k@Q&_5Gtbiwgb#ZEeB%<&*0j_tvWLK#Yki&2%tO4iQ& z0TTzK`x1h#m+E8wC63j53EoGrMDPLu56Ae|2^h!pcL-?GDgEV#nfpb8(d$a?0?XbH z@E5p(Ie>y?XG_(>p~7V0U4>5=<<{Qy2%4VIf5-lb*Q7119eqN7t?I5CIgcA_(wD@@py_#kO4I<&3|AV|=^^9%s3yf|U1+;U3oU9_Y|=&jjwR0d@kJj;cyzqBb{fU)8ZT@)`Wa5Ebd&a*KOC~ zVJ?ZS578yjbBE|f@$L@M8~(@9?-0En7H;ZZhPkC-?&2_aW0xAY@4*%P7xoH-?!g4H zfWG}wC}Dn=&W{S-Pi`GxesPl5KFYLR}5 z`V=wVNBSuYP8J8*`i;uJIOWN*@+3h_^^n zihw5OHr^slsS1b2`6Rt=_VQYqQWb8E_mQSl1#9Ci(v*T5zafr}ubVWbd)Ho?QWa2+ z_mQUbaiJsLB2B4Uo$(gwF~tgBUBn?dIN2D^#OJQElA+f=wpryi^%@Qo)8bO0eCH^JU+73(Crgi zPOD)#YoX0hVNA)oz{>n`x6Q8{et|pCHIx_`gy-OGDPgxcmj3;Bp|qzg-6oCyvwe8y z&^|kqesIDR5unrPcOZG@a7b>_XyaLXovenZCR_CLAG-t)e>jU zh8r4mwqZUS%a3}gqG&@cZNDpAdP>$Y{yP76(Ut%E1WDZ@EbBpbpqu<1XLtdP4EWP#;7cn(gE(m&r@sF+ zJ2PeO96flJ*g;~Z=uo2$EHm{AGCish@C3hVBJKzSGde}=Le++*n|;-WgG;{d!8m8d z;YJ5q{uK%zg`*vO@HX^K{No$9On2F7YS7y8@NvqTN8>dY;BzP6!#VNw<_1iyJBhc#5RFV|j+OXt zs`THu2F>MpL-}nGYNQnT3NHM8>3JFW*DU`>frVyzm5EE_KRN!;xs=D|`F+NEqm3AS zGYXkI?a1=j&x`Xdlg(H3zepgw@@K=`$maTSl>Jwn+Aw>wjn8kCE1UO*Iky?y^4z)| zX2D`zKP8wejQh=Q+r`9`R=OShXP(yT ztzMY@5JQ?f3&SC)@ic60`2P5GAhXf#g@xeuO^#6zTEWdQe+!>yU=l^@iH;rGtPOK_ z+Rd9@JIvixX|~tuwLKlp3Nzpc|4-!|=mB@`AcV#AZhGx395fyO#-XXA2l_R5?6wNS zncsBmJTZKOq1AZ$pkA)fXwIjNW*P(j9}xTz!Iud}YkD+Azl4e(A{}CfTBU~y=L%Ck zOY4(7qv0>#@_&jq_X!>b2=js0yxH|T{@)T~pq8YyTQ~8KPSm)m)c+gaLIcjv5RhdX zll(v%U(Ypfw~_YlZg^N)?QXO8_Ve;#m)PL){Qbxc3(u+#4xi(q3HUKpWZYEf{--1V M6UKXR@%P;S0{>}PLjV8( literal 0 HcmV?d00001 diff --git a/genes2genes/__pycache__/PathwayAnalyser.cpython-38.pyc b/genes2genes/__pycache__/PathwayAnalyser.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..458e6ed888f0fb2abeeb5251bd1b048097d1b773 GIT binary patch literal 5026 zcma)AOK;rP73Q0qhaR>S%daMrSDR^S*;cYBTse(mTZ++Ibt5H3*#rbNm*P<3a7OoX zBx}HF7IGgOFS;nub|o*0!uKgaZSt^MjT_N%S64e4%% ze315Y>9T%KLR*(szRP-edU?1xOv5~aeDQ8SjXJ3D^(4;1e8?klE5v|MM8`T7!~&Dk1dwiG${k@3uXaGfuvOLU_einpkav~O%(tamQ#Rm{3D?FLMtajV_ z8X|vW2mk)^tH0hD%~m5;Zhg|)CAc=Ag|$$0qKqY3+^$O_9{6sQ@ubt`(#5NXX)dHK zy5S&_4bkU$pimal?1lIFG&U^FB-`RMBst+=fTh~5^yr%doS8qfvHZ#E?Tujd_S)_B z+Z*e?iRGoiwxo>}f?a9vVnxAEVSy8T`Dt43wQqo77K6lhQE0lO8}$3ivFG$@-O}s& zS^a`x=}kyY{5-Vs*D>-rjUaxi;Z_!O3qql7gQZ{_My?+jliDMl8S~mjZBB!(m<9NW zSzo`nS!SmdPWZIi!B zZ*aO%5_fZH;JBWuZwc=l@!3Spg6gmdPC@8v`WwJep^DPcohSSnT8|lu$Hp;1cT5V) z-$v0PX>!q*HE=6bX%jhc1t%E=_@po<=sm2BErpl6HgYTLr+v?kiT z!X~=szn!c+u4M}onN;^T29(^5^?6cx^=`~?&S`~BJ@eYQRv1Th9G6{yx(m1PVnyJE zMTfYgVPB1-W?{f-)bpu)x~P*GusRVsn~EJa&1TeZmYu@ybnLP-53NJE0CnVZrHMR; z+H>?wYNP1+EYWiTtd8K+V2CizlBB`PdY}vqk|}Zyni`}@-ZnV#g4D$e=gQBZ`3~+A zp8XLKOsDk96GQm6e0xlkr)>O~41Hv`78Vv-vOZZZVbaY4#r>nH%Ja8W)%Pl2AVyP_ zRCY*by+7ogNLn#Z7=Itj9upA9WCi?96m3(w-6&)c7b^Tnqpe;pn_yCK!G6N>u4vD2 zGDd0hFpDE;rcoxR6dhMP6N(2FX-JFQhqN}pE51!6CHNcku1mCnF<`PrLCgvddy%vU zFt41Gb+;SR4O0X<5%=dSKlK#Hib(u7CP=)*kyT^(GQEsGk$;53_Xeet6#m&t3dA7l zBwIqdFCPQ2VP+3x6kFAB3(FU&h##AAa9nvB3Dj;^kCEF+#lt+h;w z(?i$w)~NYTCrZ=HX|xxm?a}L(Tle6SajY$D<)SJq;JY4~E^KO91E>8BMOifDe z%X4ZF?IOrI0;&KepW{13dy0QTYt|^53VwQLeM2@UhLXWX`RC~6U!d?^`ocU|o(Ss` zVFT=fqx9!qhKD@$wU3n3&+nH1ir(>p$z3RMP!aA4KKSA={{@DN777h^1Utizf-(Bd z8t2Se1ZStwGOP0xV>&eO*^4ymL)_vm6p>0FeS?AtBymVnn1!bb#_+V6i}1^nXYOQ( zlp;ZjL~6T^+@WJ){^TN=NK)DB3WNl1FepoM(69ieKZK(W5bbSKb_5suh#WU;4!KL& zWF6Ps*ddL0`ffH>sHxb+m;qZ z$I@h*f>ToGJ3W!aY!ejS4;gsh5>dM4H}0w8^elIAY9ARZa`lRWjcVC9Y|C!f$Hn=63&WBV;TtzWS z9>+)_AsfHb@b?4hr$=;uh_n)kGsRd&1(3P+wXla4rRYS(m@4dP(sIAocAPTUIr+q= z;Z6Kl=X;n_c2UHx0-P^tnR1hqNPfd-zATYMLZR|HHxwb~;eoXGyRfG6e5~qx=nMfb z->3JUY=a;!lZ(bPV2&qy(%@J}uYJRw){p{0i|NdRK-Pnx*Js0&D|!0~^UrZOp& zMo2pw_Ry-3aRy<=LcxDVLkMdX6!|TcGK7OBkuI@qcpc^;C5!udX0M1yHKD1A0z|_fB*mh literal 0 HcmV?d00001 diff --git a/genes2genes/__pycache__/PathwayAnalyserV2.cpython-38.pyc b/genes2genes/__pycache__/PathwayAnalyserV2.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..410cb8941b505bf1cac8ff2fb4b756c021676d35 GIT binary patch literal 7307 zcma)B+i%?1c_%r%4QED@Em@K;vB&m$H4V**4^_}U1a0#)F3<;~FD=lbL9ZxK^r`51m+cp8b; zHGHF6@=JQ&Zu+L`w|q-IZQoH(*DtH5=U3FT>etkBsyppZ7jtL2XZ*7TJ?qb^{yBe6 zJx(ROmNd&$3~Xeaqn_p*Ok(QY!; zz2aX1U1nZV?_TwVM3v3ZB zZ?ZSoMRo}-Z?PNfOYAbJFS9q<6;=n;Skc&3cI}zQu01vUuP}X0YrJ(xTwQG#(%A_4 zAnB*lVf~bdhAz!?hjsCE({N)q3DXGl#gF<))IyD~#qD00?(#_74>2GV)LjW#XIG>_ z8tu^vUfdH=8iae1c%e&sC|Z3U9pcP1bXLEaU}(fYP2)t++|OdM8z!pi&wYgT@Be!6 zVVES{kZ%XeH zkr^Fzv(kBuncLcVZB)uiof5N9x3&%b^{lj}Wu>vnO3(FCiP_9~hB}SEuRYa4xp%bG zqUU*PN156kXB+)b^^w`uMn&I!?ejNAc4n(yga2`Ak6l)Nu2(d)TH|tNKi73_=ldCr z_wHb=4&KQsta@P3o^Uv|RcUR}S2j#pZb!W+NMe!tISIB3-|h2u*o#HixBI&(i}}l$ zdy4-NtAG6V<1pEc+CTXd`n~uDC$6`snEZ%$7Vb6NVi84K%e2pDMtZ7^^-QO|{-LpB zu4ykc2#C=eNGs{LZ{4Iy^QP2y_!&%6#mo1Xw))*@sndVFANH13qbI>fA;&qrv$TSZ zi)B?Vtt_uBeH^A+Ps06sy)fAq5&vwtIoOxp9s0CZzdN{lopxiQur)&){wm+?1^ven z=g|O?7WL9Fjr+YIRMTV8yp!}>VIuB=n5^(};cd0g`p@y=$87&Ue&=U@_Wp3D81eA_ zC-og-)H>v^9Ew)dV{xzD@TAcm_)gU0achf92d~~uQXws|6%Ha<5q+KpioZgZy5Sx_ ziw(lX}DuY#)La?#3fVB`r| zAV4kSR=RT^M5c8BtI!G~)yKx9_FQMif_71x(;y|K48X@sW|5X;`li7k;cbI?2hPY~ z5UWOIRY@JMU_m~ELb@af0oZX!0Dm|~1<4>_2TIN(CS4*nAh``&+O2-F+wDoK&HK9p zX^MCl0gd_t;0y0cXEScACJW>GykYW7SSmjY6C$w8NyHl^evRH>Z$)u?E0qS0>qu~O z_RbODO~foH4toic1f8pgu%J0yYS4HPY+CO`Tz`gXvfQs3iEJUQ)+-$db0 z@1=2TI|(Q8UJCvWEqRJBXB1!B zA_{}TD%{2>L8qp@!jghD|HJR1{e)Q2$5(MH2Fw6_Vh9dE(nz&bPf4swPj$Kgmp>hq zQj=P;0!%ZrV<{2T0nB%HF685jUMscH?*McGv7lT~4g*V{0gzEI)0${=GmC(p|6sE6 zsN8E#XcFDuH6Y|pTVEiNSMSCQdr`|Q>RHf6<;)m+I4&y#XlG95wiR?|CLQ7(4g0E$ ztC<0F;ia{7D)UGUm`6|!yKR%zRu`-`uCr~2%^sLf0siX9=W-P}!L<{>Ok$&ed7gkd z1Fyy~XHY~aXAaN6V?9uc2C5`d4wxDwaoR9Aos`tYsdMEEX#NEEG0y%P0Zc2m#}h^P zW%2fK8cQi1KO_@8wCc@fvo5{Kaxs%mFCZKg!&+hU>#FLzg*8A&LFFCNUhD7jRwT_f zj~V|umOUg!9DbQz^%jaoNjh6m$RaM3f09I--BebAq`-nFF-x~ZW15pPN{ffRb|gzl z)RQ#@$HmTs!a)TZ(j@aC&2`|4ZxKidPI$`ew1P2UvP@piLmqY`X$_!WDJSi2RHPFo z@Ps1nFBEp_h>r!3_}3AZo>%aVzJfbcEs% zY?5LnzesaW>F-;=mUBs zz31s&xBR$_eeOqc*X5H(7_^MQZ7sS0zQ8sm(qFoXl3n@|DaLhpJI=) zQb=Xc4F~dK8y-A7wKx)}W81;;rgQbMV4Zucu)DN7PM9w*)5MABhxvX2r7W}`Aumz? zAdLBwSVYCrEv%Fh%}J8vb!C8#=3sQX6~O&#$B(0)8W!)#MKy@t8>AdwR{&f{v2%gX z`HWU6lgkzS=A*TBS)FK84~on~^zsc9zC+)X2KOe!+JsmK1OW~Fxz}+(p82`Yl2a#l z%L97HDZRXqf4Opmk2w+uIJe)!@In1| zUYUWm6EiB9;bk85hKyr1wV*>)iljhQnR#GP?GE{nR#NLujcU;OT0R1iR+ZU^imdT8 zbR6?gcVMXD3ZPV&P+-K#e&bZX^V<0~p@^FAp6bu%ANB9dsjmjYj4VX~Q{Zq=d;}C3 zIf`;YfwP$v%%HSY>8R2TX3{eZt_cw1EV&Lxm{C5k3(W9VW$BY>(|5l`v0NY4j$$-~ zBNj{dA(iBn%jz)b^?4VfB_gIw?eK>G_Hk4FQ4b#RpieIGN1k@8x*~^5ZG>gS-Ob{EpEnK_1Mp1)8M9g>`_$W^Um0;MR`Z z)E$?fYmfxD<5AQ^Zsq}5EVz%#g@jD1CnQs8Oc}Kr-C5pKqnKRvcvx#nMPBA}W%D}I zkS*A%bOw8Jn0sKbGWJ!$xHHUCl3wYY9nZ2#PLJoFBcJj4)ChVtKF6xmy7O;Gt6H=w zzVv@Hy)&PG2QnpaKk**@)O!q8JFtp3P0>2|z6InorWZ6yR4Hreju+|6mBV`$*{1)> zlM?^_4gLf0_8}rPibrGxuG_?5NVkQIQ~(LgqsQ-J00J2`u7s!s#;m#vvqAnLc}CKu zL;&Rm;2D~a;RjL~_lkA4p9ooonnf@W>o!|()ujq}nF8%JI*S z#4v{)>!h?9e9L9VNA&*EAb}3F64E_a>~4_y2&JN&TwvrI@KsLHp`2hccG6^V3-i)A zP2i#KhkLP*XDRunVnbqNOx%&vWP4uy#3~I!ac4Ft`Tt~m~7Of14Tq! zyeZ2>C@15!8mzaar#x)+oq61ixqFl=!6I>w2o-7~xrn&+lwsuG#|QoqZb4BlIiqJ` z{gAZ)SuK|m%#1BuY}qMxK(l_Qb}STx&bXIAtkD^7f76% zd7cj`Vi!@kCubF$Mj}NNO|ifll5H<%o6RL)^gVK@mbxNtvyG(_`A2f^DBw_7K`wQS zkTS?i3U(2)F#}dl3y?YKs2n3RpHmR5GYhsZZ?liu9N0$63b?C7>ub$-o3|iw4%{e# zV;p*|Y999M^t#qn`-hr0USxY9eXsyIV!-v|&k;5>B$jHUuYh^X+{9(1U)+ zph~8Ql1;zz(b}UG=Dp*%nbIuV^OA6aR#A$!3BM_;pB#@g5V2Wh+63_#=ng<_+O*&&nUrz zFpFSgKT!VNZ&T-Ap+N3~_Kn>ChU!7`e*+_F5K`8U!#F9TstPa#ORJvKELaE`3wlUx zqCeQ={cezR@t?7>pu^Yn;a6YBiC-|U^=hh!XgV4aSV1VKTnhmH2&q`m#t$|80{{Vf zrkpr9T3{E6OlA}eDy_DFXGgz@G|r+$Wo%Rup-RjBp0;i0se|KBERqHM&oTUk&i@vV zyp!^eDq--iv{1fHO3Z%2hrTW`3#_Gl8XuyN){`wDVSYMhaXNH@U@reV8g;ydf1s+$ zgW~Unqa*kiw5t7R(Es6;FWAp2jK^PM2C|9}WjgpEkX{gU`)oHMx*7o2!X)qEKc!dx zJr(~%#j%^AD8-Y%KnDq)pks3l&vQ$h+F;Gz!-Ic~3Q7q!jECHR-SexyO}VwfKEFU? z>BLl~4t`dzM<#(llJX6`-7W?Z@1#8l(Iu4e@NLlwdq|%ff-pM@H~PHCe@x3{G}cmX znLGo`CL@x>8_2a2g;ms(|AdAaeME#}Uq}c4*1(ifH{}!~l|E5v4!^FFfZ-r+b|?S; z1c1Qe51@c;C|r?V{`b#1{{7*U29P(pg9J&&ezF~>qDlV)DT{rpng21QY<8b>aBi%ds$=)TM5Og3eTtD}fi<04sxp^TI Wqs$CSb7tPHcvbg;x9DB*F8?3F8E_l` literal 0 HcmV?d00001 diff --git a/genes2genes/__pycache__/SimulationExperimentAnalyser.cpython-38.pyc b/genes2genes/__pycache__/SimulationExperimentAnalyser.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7d31cb06c0bee3effc24999d6d9e1830ea7ac58 GIT binary patch literal 11141 zcmb7KNo*rmdajMdVsQ~gQCqKcPkW(z+-j>Wx5v}&_M&dvfvs`evYpWG&}_aEMTv{_ zs@QIE9+69jnDSFMrpQC!3G8Xor|0-*|}tOyOEDs+UJFc9h@hR_fu!kCC7G?{{4)Rkh~ zPgGdNtCYF3<|l*bcE{m6NHprTmcz@HM#GQ2_D*?mNxm)xuPaM_a;xHDNUd_SvQ@Nv z!*v>KoW{dx*|A^b->+Pnqu+hG|Q)w-%IG>c?uJFZ6i_a~* z?z9|tQHBdO8JFS0QA_eltJ2tU9e#0Z$G6JmdaLf0%haozL7=GS$iI8`C<i#mRH@^KhkPdwqc4Ml7GFzA^GpS5iBl$@$)(72vY;9RDwM*2;XyEgwS4!3^jmFy*ueyFf zllIj&eRcUjMYQ|GN~d|p;f3~EVXe||okFwjHmRn=3UX)Aesl|6z3@`ufT6_$_3`e> zV;iC7Gfk(`Iz+5cZFgE;VXe&zC>ziP&2H>z`eB2ftT|q}#@n4OY(;EI>bUHByxyw0 zeoPYZ&C1r6(_+5fa9VzB3uT_KwYGeN`Yfh+f@YUOykItjwIb}JC()2i;AlLP?qqS+) zrA*XSP(>c6AV|_Y=c?`IRtIF|_mWegc~|9$aL=QPqUAJ04gQvzS1tLs)eLe^sdH); z>F$xo4Tt48>Ni)$P7f_PokMwgeu{{P^RuCD0Gtr&8-`FfwRvS%?HNMj9_?7v&^O|{ zsxbC(x)8U7F|YJuI9aBb5U~xLsnh~zY=+KHKPhkm=drWr6*@oLZ!w)2_Z49-6J4`) zj#|nyIoGpfePtuHkrvjxvXPN31Wuu`M&KV?;c1MLpG8omBM|)v(FnnA;c@2>)G(3T z*`6v?zySbZU;Cl9ZGeRKKrQ%yRX@(1twyEl6g6MFc*$2Qhd+^hwSiAUD|u~!$P(3* z4tJfZ_bl^c^%isP;V`wUevDUIHOG&+>lJXJY1Ea#?kBn9b{d{b;P0o$wrDXCoVR$? z!%1p&f`T*Dm&VrkX^N1oHu$Fo&<{l=&9fkI=GJ3OUgLT z4&o{BgXFHt(&*~}%RG8SI4?BLv0i(LPu12hKn47PsC=sO^K9bbF<{(O^UqlpWx;Bs zA!?-O6*jqPqSd=NhiP!Fm`KZa`crlL5^9>DtrG1yr5Clx$rks)wWx(jEnYw=THL{D z&0u~8o1*zGE4N?lO^As;Rv&9M(Z?6qS2uO^_nFA_vM#|h+12pb|FBnM%M#F`0c znprvcKm5Ac-}>d4v4S5?W-U7>ihmAK~=*c zo)U@yoWv_WLA5f&LJ|;omNK&rq}HnEuyTk3{)GDpap^%uX(DKMFRD@0hD3`xf1WzH zLct3Nx)uib{=x^hFB}jcKQMz)7cHJg($~1-$Jc;ckXHE&@MGC}Fz+%+7*jzB2!=eCG7}^SlYr#< zXCwYEQMKu?I&t}E=};jnn}$m%Ntz^abO6x6)e`uJ3xUD&)oLjnaB=R`pbEIY+AQVP z>fH6ho`d|-WTSF8D?8RXctI&mxZZ%MFG+=MM)M9~v44u#PNiDy@Je+@3KHeva?w|D zzlT5pD3j!1&=dW&b5TsSw7HRZN~OP{o`IZ^8RP@jHAAP?8323=lu|*=Fd&W0sOK~r z`8mx33z*Z+sCm?t`3O(h-OG<)2gi;7fHv=#$U@wS#1<*C{0W2?Pw7LW+R&J~3Fzq= zp5CV?CX5Y;{eT926M%z~7;@tPK?6c~4A2}4TLSXGq9q^_Edf?xO9(--&%{@@jLVi_ zNny*lAZWY?^&|#$Kx1Yea0szS6Nn)lm+1u3h#?K70p)fOLkIzDPLX-Zptj62!@Lwy z0iA@1hk0pc_cAQmn*ig=vUG0}OehBiG6hDFXOq2Y&t^FhlS+sUP&dO^Uc@N|b~_DN zoxyts?^(QO@t$k_{LmO_4x@9zm=DkCp|Q~%NG%$jR~o5c?g=rN7n{Qz<}rs8;T%q4 z4yQ1O(}Ovj!P;QX;popIh1OxJ5Ug7`pU1-aoD1jk4Vn*D;k>ZI`j3l57<&S$9;s(G zdlGCsh&=^X9K?zuEn|8=$)5I7Vr~~KQOvOmSD^+X@0-Y*k$E$Nyl0R%EAwV$p3+Z? z88Iv7YEVUc^8){k47<2V;!TuV7)xCWQeea<#5{X;UI7!nhJ8QL&$2}+mY>8f)7_`V ziTC1yr9Op*s01Q=J}Ajbdl^ z6|BlUgqxcyUw$|Lm#_bO_x|tW>IqR4+<59E}Iki8S< zCPvKPM0I|Hf;SL!bGJx%C`cw;KzYqu?@rxlv4K4#$~<{fnn@(NhYFuOY#15k&4b#G z+kN_#tu5ZZcd>A%zSiMYY5yw(H0#n#Qr&v#(#6FkvXB(NR$W>3>yf=eb^XlpP$Ien zqhES7y)<47D`0wD3d)7+IV#)CjEGAk`Q5BsM_T!45vRk|4r@i(^TFDpr@>l9MT14C zwp)Q9=N-3RPetqXsH)>D`>2}1YE8Tr^$P)#mMb%svE+|exU$rJjO@8284owr;V)Zw zR24bGb4Sz|QjtUJo*&j3SAMkD{9H^9zVL$kXF#6ap~ffrBdS6n`KD)*v4IC zEG~8ROBa{;x9CWbT+LskfP`;;jRI+P8E6v+WD0>M@Et1p5W(j#r>xa$ZoTUi4c{PR zwXg2@D(n;0j@O+FKI+(c*=pl>9gvZ~wkCi7di~2k{v-PR~An+4wFy&O+jW*|NNPme3zyG@K$7_1=t z|235G>lD090U0lR9m5ZFAAJj&g5`GZY&F`R>&L^Gueo^! zdv&kjl#HE5y;+J^?yb99m6mVr1f?mjUR(Fdjml2D;}y?HRjHI62+^V0$2VK$JN1?e z55@p3IGJrqj|HC|r2_yX&4GJm`B~sb_{u7mzPbvw8BDjKh6z|Evf~*bGM<4ZbAu+4 zE=G_~ZI3Adkz-1LeA>>lcos|A&SgB$mGqrw%Lo=JxJNM!h0!5URmOc zv{Ko@AR>*-@|@-t>1QKl0nc&ybM)oOaS{a5(wnF6TD}YG8;%9kL#d+u7 zFrQ_Okx(T9FW{;wp>oQDe*EeWt*K=9WKn`WMJi?Ih0}^hqs5HqWEJ# ztZMu2&>=IlaD?Diiof&9>gtVmSEQL*8m&tc0|dl~$WWtI8i!ZnbsETEC47|~KCeZx zc#&sj2%4C5D7Qdzrmw3?I*B~!zN93Q3E>*r8qk=>@0nyI~(>| zV3NySA@TJ}6P&Q-CmYUPr{%7DPQ3+gD|uh)wMNH<#qK??-f;bs(b&T# zShT>z1xo(esBnbLXa($U>GPW$^GryWp$YFFuUGwCM6bNoX;le6U4JSf8W#zS z(F&{G_?SKuVTl6E6j`p|H#^SB9JS$9RIkRVQTQW%PD?JA`xhVydyujT%ccY(IGkZo zGDQs5ogV{sB=nFQfbJo{`=s|0$hGk#@sLv>Azfq}y|e@v6Q3y!hjomgsF&?c_Hv#{ z2K0S6$;g7mJ%G_)iFn@+HVPj;4^H||omKRy7Au_k%RKmfSUg7WHBl9{1Sp?n0TD#?4B+2$b z%Dmvldai`I7e6@{J@-bzWDHDEg$fLg5tnf{TcXP`)8!d>b1>mIP zuD2VkJN-8K+i%qy>+Oz%iyDPn)i!seR!mlA*3Au-(rR$BT~Ep8tqOECpb*5_l2~Z<{sw9!=sDXqLxl zmWMP03E0?lcH98@JJiJD)ptOs>H2183qp^>?@|w+P;gIHfV2zeYun>Hl=oc<1O-FJ zk?=4QcdLzCBR`-L$v?IciyQ-=Qtl5C_<^H8aGhqB-@LV2UVY=;8@Jy0&RffLbs)Nz z6L=0jqZX!|yX)IvXW^ofj{(RvSWDMW%1W{od;t*Kns8eT>)Q@QI*DekZ!L#i`f2I4 z9S}{1v2vY(#^J^Hrw`}I-u#I}ok>a5pL?jXTOogsdl>jK^u>nXXr17XHZ|Z+OF_Id z6hq12Awzx&oGOdA9SVCXDN+svJ@V?32pGKS8Uow`dJb*@TkSr63_lz1e~f979yvhK ze?Wq%aNv^F9&C#kltjfJkccN@U|S|(sTASz5ar@46lT=J9FFaz;P@J4# z!NIX8MrV-!E(IP1wC9WIKtlR!%Ah@8%8gPE#4%{ub;d8F6arVUq2ag-B9Y@x>L#Uh zbREpw*@6!%6D0$w&QCyJ4M_60s6#@zAdsreiHP$KhX7zH)9ijkyNr}<1-psdJ|y*! zGbo2AMJfeSq=L+;-Di*4H^)=wzYoG=`audA12@ngAR*;{T&QE}9^FeJX`imwY0wA^ zY>yb+J&^oKKEjrg+X_rL#<0mC6(Gp}kk-)7kgT?otC4j{n&<&(-A2(EDBDe__ zM-2D3z?feJdxQ!{|ndz7ixZ3HCpuni5izP)DGY><_?J$y#uWa)`5>c6CW>Ru6Bqfmj)B zVmN1Ot+u=%^%u0$=%R`Og*1sUpkP zYEi$rfPh@}DE504 z3_pTg`oE>T;aQV0I&d6Ql`_6dKw)WC!k)FK?2ql=R*H7Pvg~K9Sy_h$M|F=P;#*gP NF<#SAmJPVw;0bXW6xk>t(l;`;iY0Hin6k# zE?KIQGfVCtN-+7P7p-oi3o_;@QpJU_+FNzR=4e) z9kvvimH(NQ@8$QteER0fif`cYnyud7e8n*SlQPSXh0Hq$@)Jbd;B06FOux;*(r-Jc z=(iI%`t1fT-saF7Rf8&{KGx75)q)z*Hm?lpqm^JqryZmlL4z6Z8{FmI6N7t?&0v*R zx&Oonn!LvAcwa#63U8pcf!a0Bc8&JxGpxqww3+g|QiP)@7s&x~^*c#=Cpy^eM#ES< zV`ziI+MUDqCTTZMvQ#2l`DUIBWtRq6eik2k7p1ZO$iUx^aa1mWUrt5)t#3oQy`1IBdy0$`hFnAkkhOWJc)FwBu!*)m6 z^!0)yub11tq?|OopQKWm=~%gu=*vvx%F6CbRf%|%M<|q$5YeHiqL*rRw z8cQl~i0@{o+Y^p6l<4M@fCTSkjI^l=E{lUApc6V>> zs_LUiglR;JtCIA?Aw<}Alr7_7Pc-P`=~Rmiiq>h`hKLdxGURbz#PL%@v{3v%zqPZM zjpCg__UJH5cRKNYcqbCq-n#bHo%b=Uyr$zFnnGUF@y=ar$*$I|51~`#QZsRNe5kxI zOwuF|!!?X7X=;NxY|~t0O}z0lH<{0le)7d<;Oe}0mt;48C{5d@73>-kg>h_X(L)gi zrg$BBOXs0F#}%D&kaBg(Bc*uCI_;|3>Sm+yB#*=Bz6?pMJn8iaH&pX{iQYA}Iq#s= zf#j9sI8f>p5qGtSqG7mHbr{AtL>LcEUYmDa=)iuG@@yY_pJt;Zr6aG_mn+K}sV>fY z-iwp|UOvM$wW=rDUz9y%;>*|zEy6N-1zBZ@N!mg}YD71E=2*Vzn@5*lvg2reX0z#d z(}~ub{UH*4V@lg>+%!IVeQMH~M!@MD*f_uD7EbomD(F;OCyW^n{oFZr3x-naKM<<=!*+jzjH)uJ->(@x=(J-M}Ipw(ydR$J7nGHdll zZj+dn`_FS_yAu;F?WvD;PEmyjYelW_uNZhU2(_jKe7zv}^|U#JP;UZ`A!;_QqgNHd zFX|@{|7T3Rnb(l7Qy%XXdT&$f733Q8mIaktGyTF|#$Zfizn1?~Q<+!aQtaTIHYoo(fhI}!#y4oNQu|Hg=l6oN@N0OT8N;6^@aQA z_FVilM*l?w`Bg-QX=}UCgnei-59iTiO~av_;h3*88wlpHy6LcnZL>AD#Wt*?uY4il zoKYPDoW&XaFJuUsrZcKSpp{#X4V+i=*d|cpjGHIs2|IR92nGq#+zq5~vH^NHr5iZK z*0D#VWl-G!7y<%sR#=GZRF1H6qM;JnUnwfk{?(%Lm_X9eEvbz~XI;aoOS9m# z+v0CgI?HVtSBy)THT9^BQdk4hSi>exZIxT=#?&u7ti{)>_y@Ez!e-%VO7!f1@stPR zm+{r@3*u++(z;keuZ65y>A`g85UT{W94QxMaW5Lj%I$%o=Fr`X)QR-7BosDjySPeS z9T0&S3|vzVlB~E%#TS*dZmsx{tfVpg#o-Tn-g3BCz?Q=Bo^pl=5{%6#ZcAs{R-ttcoeFC)6pnD3_UP$e*>N{F|jTMK>rSWNe$_Yj`OZ39`ziREWrk>xc@3Q%rv6b2j@7UwRQFi<#3-WP_50GY z{Ac`*_*~%Ko6)-RgY~EDul`x|K0=t$6Ng=9ZejWZd3G zkRKuf#(I^nMR6A9UuwN=mctrb`@Y<1#d9yfvZz);X*~s zjyBtEPy8H)hpEfDryQ@u7!3>Wp|%vgSK27DHtKS$Pcz+>W)wHyBlO9JJh*VaPU6B( z)=^)60#BzCE`yEJhD-HJL+jF|TV1*PEm{%nzIv@6=i#|QPMe3?-hGUQlBk{ma|Ggu z*rO}W%b??sP{^RW~iD>@N0i&Qr_Pa>r5Evl=w>~weHXXST!h-?$4G0AX4!32K z+o&Vaz<}kI!XdO^Mq!C9^md3ulK4n;SAZl$;y$%uvbOZi+3hIzQ6duX=DYBb;7S~J zN6{EeD!}wOs=tC)9|6yQzumT!-Ajf;RRI&nWybCflX2Tqro2y#21Vf*9vIk-;6i$C zSEcwi#waZbVS#cV@OV7TO8e1v%j2^~CrV$hZ}Wp|R4w&Nluc<&h{b8LB7TiVYb_!z z%pGd9G_Q#J%%sR+sQfS-WqdNEbUh3oPNLzg#tlQBb;D4+N&~<}XXuNJ=2C(zmLT9LH+7Fv zt+SEF7E-1%jsNl*#mI)LJH7@j!XumgaQMg-h4Xzt>-9Q)6?yQvYU#9OVZYwU!PxBiHe`#Bjja7t@SpW_NSs1D%E z8#I7nKrqr0w5DEEzRDSp30K~}MGoRmZRkz&_(I`o4O)ZjL5Xo$cJ&q7-Vgk_~LA3Oq60Jyjb;DGppH zsaqlY*Y1u!-!v}ELlBm{KXyWSAiE}cvw=NjH+mlmsrMy z-O~4Z>Ppkr66RCe!)6XA@$Ch%&4hS_{(=4et z-f(i_GyDxug=FgD3A!Bp(U0WXhC`a1ax<@WE4xo#@>aeV<*hDg81C7~#7n+#bSOW& zmGxTD4|9R*`JeMzF9^|hu}JOKFn}YbX4^(#ms}fGmM(JmQgHC)QnIhM_C-5L!EJ+< zgmoakND~naTpb ziBB;5ibm=e5lwPp%$t>W4PVUhvHmgjQfK#2zRR%SE6Vc`bRJqKcHQ1{`;mRnEFHL6LBEu>Z*I3`!cnv$=&I0|2rB=y-0TS z%JW}!uJ!--bHtxxn%RoQU(nk0Vo4N&AnN=o8+zV_9w;=KFS7NLMJ^iSZ&UGS=rDh| zBB2=u6g{A5ND)a`e2=2F(*7vBzWY+MAeUk)nu+lHIHmK)`B~Y^)6-Su4Vu03(uCd#Y`> z-P1F!>hXG(8j$ejgjNb1A`wN(7zsqU2MG!BCxk1i6A}mTjRPkx@Ow2g{@4u{dQ?@f z-m9vvdG$Vi)o<5op@rwO-#xi`;c?6Q8+Fb;4mvNRNP&u5oMl#vp|vx+X6dp}dMB z-$bRX9ZRuG*3cePoGn`DIp{HEE9ZS1J-4t&#Bn)B9E{}^?+j&oy?j@otbxCf=y+r$vr|m>GJwe;kY^PP0MJ$SlX8B3Zi4e41 zXduaD61BTozmtDzi5i&y{>IhqZYR0A)7>A$`PH@LZge{qE6=aIc=fd;PvnZJuWlLI z)K}jw(oA0I4fOm(U$kOGYp0t7dXcQ6vRKGw@T;>16SL^INu|?=R7XRN`o0Td%Nnx6 z8Zkv`u|PqMplnc37$~Q33#jUl9kNyH-6w{wVk1vk%DrSMXUQT3E?JLbB(wIeC^}fS z*7SvswY4A1ScrI_olbm!D$R9C#Q9dDoh-?<7x#KlRI{WV3~L2>S9Bs!m?y{}4q9UZ z(^zV>Hm+u9?K)P&kccM5St=9yuaZQT?Lr6itFvV$E`iggQ=L8wAWlqZ71|m9f0(}Y zzOC#{7|agLcSB8m225OiwIg;rf8n^8JOo~=xub+s)Ii7aOV{|}cjsJ$@F+~51Uh(-K zqxC6q#}I!$oNz$g(UsRDn%lv4Gkw8y%qzFfuxqbJ<5kC_E={_Vac4z~=S8P0Lx9c~ z%$P?sY;Q+N-tO`=--_TZ*Yp=KS9Igik=o7sS*GX45>lZ^q(mfr#D%8n?Q{@Fpl_rJ zOFoOrvM;mC6DJDokdZ^Vb^{>=KfB5nm?;<)*y06@KNn;uAd^L zt$@fV?{^+nZB@pdUY0~i2}SZvEGCywQEaY4eJ)!%SH!mo+Rk3~?aHm%u4Q@_V!) zB!40ab`hvqGvjl#G~+L-Y-W@(W7f?02U?o(Pm*9pSyf>)whB~v32_*lstS!HR29=! z_6}4P2Ur;&phkeXEgpl~%u)V*sz}XV*X$k5RyF%x)0rG-JV`?FhHTcg{mM0z6={xHSG%83WRYM0S6=p=&YP}=CCeuJ>r%LVq`mP zw-sQ#F$Yj9W9nh6d1~sANN8JmC&>$>!z9Ksn+RlZ&^T`=-@#v#H|YP_3$UWV{K!Qo z6tbU7g*Z0x>LwGOv~~mB&=v%q&Os|O{FA)6jn3F@731t<8xD)yNbWkakC64b1IT1^ zxYnd!qW+Ep(C9tDSOSWW!`&!{j!G!rk+onIzOoM)FV9=tC)}_^P8hC{JFRPWN8HAH zTLcnz*TvcieFu=x-WV8gHpwo*%i=}In}N1_T$fi7+HZ-tlNk5h)fL$V7Tb@rej)+l zpg^&T;Yc-bR+CE{Z4YqpgB5M_&4*9oOVg8RZ&RWYTPbo;vbmYIQ=AGRW@*qhl#3r+ zTKoXKHmPs=ki3FoqTp{q0BQj$Kq)Y11EwA$96DlzO}R~KCbu>~s$RIjrY<3H1sQ%N z^Yb2*fyI5eJGuSWhEOsu-?x;%iWT9&;h7%b#Eyp)9>Cdw(g~qI@Bq@@D*T;*a5|rR zU^5)ZPMJ5RJrfh>&&0$#+TTw_3gf#4ReN%;4~x`pyA$`c4N&Lq6RJZ^bbBKfx^hsY z?cFTiNHVD_gEJkkYy9q!TtR_v9mT}REg<@L>y#Zha6IYc=k^{VdDA}Y*nfS=oY<$f zwy!*|S?}TV3$Ftb7IcU=?$gTj3eU2wd(ZWPYB;DFBi~}`8F;DAWpvs_fPb9r0F0?H-w7(6C&eA2l zXs4GI+B-0Zr@et`o1rfC;$mBh*Jz1vQ}r$?0`-NUw5CIn(`^Jhb&1xV;T%lb!NbRM z>3^QWkFcJ+h{{3~t0PY?qYccBW(loG6+zH`3QyPh}a7MV$7;s(!O&1%7E;I0^GWiMohkUclods(X;qC!}^Ke*El~Ea2 z1Eiv173s1Da6I8)|k`RSoQ@Mjo%~ zrZ;1}{}-w>sur_mZ*|%WaSMXx9;IMf>(IpIFZhc`N9DG&2&qqxGa+o+zVdK`Vs;3xqHrKb@})|cy2WpwYPbfN2_n0 zF{!a>_VX0TSvzT7Aer_XuPYDoVN3;mHf7 z_AscMNpWcQIURrc$8=--pQcNh1u#i;?@H}JSGf2bNaIK14O$~SeUrFLjM|i8J_G2+ zw0mX~EY|E1c)!%%PTISGB~vUh-Q3Txsw7ZKH{~i2X~-JRGLQmc212JUyNKS5J;N3N z`kyv1eBV- zE&-$W2IKhWBnLh5Huz&ZBHP9Hi0&5fJ}E`B0ohw8g4K4$=VF5PN#xcSr{Lg?H&@gB zo3TvzDva$jOqR5o|bs`(HD71!A_w)i#p$YDQ5BYr~ktQZk(?7k}7X>V{v_F^Fu z^Fc{mCm|`-e8e$dG0aWN+!YLXGmyhrxH%2;hBJZobicuMTv7X3a)T~8Cb;4VS=9;3 SK{a?Xs0UvO%GG-K%zpufRry;0 literal 0 HcmV?d00001 diff --git a/genes2genes/__pycache__/VisualUtils.cpython-38.pyc b/genes2genes/__pycache__/VisualUtils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62bad2afb31e5c2fe30f755564dd16c16169f1ba GIT binary patch literal 16251 zcmd6Odypf?c^_u*ek}HRw|mFqadk&h2a!Btu~-00op{7PKv5x=6mLaQuow`F9$+dz68X1ZC@Voc3kJP{SqR;nl_;B<$ zg@@;GnP)YhPxyq^_LVht*UNr&4{Qa?AD?WsrQ{tRBkDSxuQSlh=4~fg-aq(f4 zoDt87l=uiz4~wFBN?b(htaw_aQR@*gCo;Itig}U6{X=3&EQlOxoD<8UAQq82FIL1e z;-g4CDzq!U)apIX+*(R=LVISdD~)@YGanuUFIAhJz3}I8(|581w!uch_FmfrGnMQu$4_x=3{o&$E+jtEVJ@Eo|TweZS_jVh3?C zc}F8fVI8K{N=~@eZEtUNOvxpXegLe4zq7rCTTnI|o0b#aXm%=1VIn8m>B_BYySZmL z{*9_!f3L27|L(FAFhs*h>Gz^TG!Z9owb>C)*tDv(tA=ob=&CE7Ktnc#6KWd`1Es+m zO<`?1K_G2(oN&F_FoAvV#In(DKV|LqjM-|Z)3vHrv)eIK`g=f+-|t>-y!!KJ-}?8T z{LbaQ_;#;n$XPYkl-~(jO{;A<$&HQfPQ~h0s-4;?{2{*={hn-_b#&WT~~oY@LikQsa_^HBhw9?(L1K#yo+_{X$!n(iC^zlRd! z%8RI13$nKNJApgLuc-Z-AR`MjYH2O~rY;OZ&cE*EVS<(RShk+a|oWgHcSXQH)y?cJQWlQ(N)9vD6>6gZAhtUw_OFsr$G+F8tFz zdyKH5_b0%?{kTtJX2$FZdvd_f_k)`6ws!5DI3-Ts^7W_eDLZx(I~w=6cTcE$tTibf zBDdeyQJTQh+2K>THHEX`5psTUZa;wX*ue>Le#jXfUBgxmU{UYs0CzRXMONJ5p2s3a zHVra|isa73#&r3I;^Vris4~DZLgr>w8at?wCJxr95e$Az6Xz9N<`{sD!*faz(M|0k zWZ%`-?g`}I1z$3^w{X*T)P8WmQz&vGm)g~>4N-kIkEh?k<=gwnbw&Hw%oXm1iefNZ zM#s8%39O*nHlIbpZIsgFV`w7Dv)4^l{fNoI`_a8z=ffLldQ0yuhg=lAy-a)E6* zKDkK19ewx)@}qcshFO;ghEwHe7oFfn8w}Lre7|uSJTA0Vl~)a^2IfS;z6`}hKX^bp zK&lQ7?Ue5Tn~w{vp0OE@JqPZ44jz3z&>Pr}s-^!RAVO~XAUMno9fa@rSbh+3`%uSB zc+K%!-CoHFZFDWGyX6GhM%|KOjL8Xfx*a1G1(o#h7N!$74Xc8cYi@vt$M~>gHEUN* zC%8j~>r609TY-2Qfr4r{urnB_`Ev_BIJow z!P&5a3MSXsr#PU?1ry+eF>UYS|CyeSqH_GN8ufc+paQ{0q<4Zj!B1`*a<}nQKgr+S zU)cM|E2gn6x;%?!d4SB`Xj+@IJRfG&;aBT!4c{|V1N9mbdrIG|?mb@QiAjQU@q zv>x2ZOr3OMn9dH)u};m9+|%Vao3iE08!cyKxG^fGC7tO8=3}dB)iw=*iQY0W&p5qI z`6yb-mjIl|^O&wHJsei?st^@(dxNKl6TWUVs+CL{2fmvlXW8UK1P>9Y^DJ2H_1e4g zJTf3(R67kw5^ZC!i0$fz(Z{w3JU1}WWsS{3gI7%dW;#Qb^I z>x>Ovz#V(#%KLKG9X}|=3AmoGm>+5(9Xm z-0`D?g0-777+pxjRht>xe_JzWKCln_u3;GemnHjXpp&i}@WnUe( zQ}2H`pD6q3VY|T$g!`duXDrPQ+|&ReED(qJ4NLzb^6X$;htQr_Q)g(&@j2Q{j`nGF z*gD#4IACvJu_al(qpg5qU*JiB?FKPhzRBEV(KHQ7p1CSzSEeHJbyc|~ySSGFr5B6O z%S|L@llh^ZG_F@_j#j&mp5n;HzJNCP#}tv0P$P@c^1jnEIGEbd+JZ^aVk-rp)@ zB_L61t>pdXNA>#sF!V(2U{oZy&5(k;C`M*{c2JB^PV{oeMbb-6h*2-)zOAm_%IyiV zq#DWatza#{=2R90NV{H76TiQn)Db_q=E{5-tOq1VB{eZE=LnQEL#fJ1Y&AuPC-X*o z+bF#^uYSLN`Q8-Ug(U?bN86<~*n`xlCXYM`UWsiKFp$+cI$u_SCuOM7$%- zly9*1G!MLW9FA8E*)+`INh}{`seh}wOc(I^$u zERU-6mQn3gEW91m?rOWz#i<|07HDp5l{Z1Vlqkc=nZAd@|a6ihdJ^xnmQp- zH6iH9lXyt^kLVMowc9o%w_WOl{3E!@A1CRl%YWw8f|5E7>oXwqa8JH)JnMvvU(pCZDa zB;eU4dj!`AB!NkATnLk--=D!{o&oUbG3^ZgJdk+I#PlhR|55brE=9XP zjf4dQ9`#hLHO#o68~>CK28W>ggi=TX1LN#|z%|RkD5qJVaniv)nK>!_FdYIjPSSX* z4l5Y;V8lK042>$NDA*5^o-*!Itp|=6dOATvFWV`qI<-~~v@`XZ^T<$Wu9(fOX1gkz z)@}vHGAa_)h9Pr!m3tHzN{W*1eUHyvs`b0N%NN1Ml;O(JuF5Z>;BMpyfs%82Cb7+J z!wL7QZ3BGFiPCcTWyE=IoW>z-AwF0vp}7K-Yf-4V)OO z-%wVDz4I@28=2WUN@r`ER2pWx^;uHJ5w3qP!{LyuVXsh({?9V4yXilNbSi#?ipNFs zx*;3QPNU+D+hz5kfhOrHo-XAo8m0vRY<6dp3O5tinSk7-^!5iSm!}l_9j*f2wQ}98 zaBcn?aG5gzKJBbFMk^caYY7met8*qb`3;mEW1z=~@T(~OgAt)3n*ZlS2vK%OgqHsx zPBXxO7$>?t>(KlHyC?L_pT+nU`N_Y4luLf{E;Iikz`FcPO#Wp8Ql0!cf-ew!jo_~m za0=yL2iSY~E+)g_yvMB0B_yP3BDb>tJGj=IL>pNGY=#Hv{*)iUj!DDZ2W_0U`&&& z1I@Y#jk{^q{H*N#c$U(HqIeQVN^-!-sYKp=Ka8z@oMSkXz>~B0{cfi3Y-JPXa%LPQ|gvd3-SAl4Ff3^J1U{gRQdtFclT! zCpj$kEDQBA`^bF2Bj_08BsEwqYTZ4VG{hL5YWD`h5(Ta{m=vYTGoxc;6U< zvIid<*snJ2`!W4m0P2xvh4Ldh5qrsE?eu9q+z-|NNi-KBoCsfYO`-@TtSu zHFr!Yqeo1W-$wp>EacBieCP5p-voCTr22NgMV|godOqQQ{TTeLm*Gb(!auqUf9R?xiRZ<~#S7vS z;tz=z#YAHOik`UExYmb3wjaV3#ucHN_dvS?GtV0{@T)`)bPFs2TCx&>xlOPA zPb+W`w3LUDHE&Ye4M0r%HM$RCW;07Y*OMlfOPw%<4nBnWn6%@qbFGK0^Y-LT2$k^aOdLFFj}D%>O7{4HZcj0FTfg>Y7f;P8iT>qqf0=46<$MGH}gw&H)DWu+@B}29UEpHqjwofSBYE-}$7+@1O z`qRh8I;+NtG1({VX;On~H!W^rji+z>uNA~C%r*Q-YM%ACeQ)b^UA*Pazvai=D$f_@ zcLsPb^iSERS}A)*{HSCB>$pGQJ0EEwrD617^su))ZHT6J+QH-|)hqgprU>|f0CkAzV(=H}UIA-9n= z7Lh37r<%_#ujWc6l{26964_#w@dW8~VKGaD>2$7CWKMBDonCO8uPm){Jo#dFQQ=AB zr|RV9OQqs6ik9+=`5XtIPG?uL?7dK0N~%--h82m#8NTsjwgriy@4%fimZcS6&E?4 zQfV>0%(qxtOwSkDY6Z%vd(Z=JWZ?oEmefw6vfwFK6&xD<~?a^NUNYv%H+nXSq7d>7~Ub4vckF7wBL$ z?IO$;7CD(Ytd={O%S$Uc&d=)VGG-cy>_ToO&!H~m^Gi97Ctob&i=451q2N|76c!d# z97(CTl3rx1QmIf}bl-g;ujXfIbvdgR7YK7%^hPK~)x$d$4j#(Z`@m*HD1Ws8L+ zCb;Tq4zmlH<%}BYd~RNir#Qb{Q1gtpnww{*i}QtIfdgB}FU)7$7c8wVvuG*3u%xE0 zu)N|D$^6n{-d(|LDZfA>SuPak)O*dX=2le^*La0R#eCkK=iGcQUnJ&YdZ94SqTEtC ztp=9QXL9Z;qnS&GgDw`bi>rCI!s_SL{AAK|xr`!!Om?oIM!1}DCwy_SxUx)&nlEOu zIo4UsWZc(TSjsNUb4^PttA!QLMmC*YU0@yVrXq=Uc`3iT$muO+R|{!Yp3h`g^CW;= zalV*czV|)+oB+*yPOR46HeoD7M2ftLV;va`E7^c9CX^QYJ~@ms(j)}{N&`x8oPZ{d zL-8HR!enHw=%@sgln#rSLTEZ++1SWz8asD&`47-i85-bxi`US9cT1_ou23@+(V&4J zifDTmm4pMioibL1j=hREcJo2*ydLWVffJ;}WFi7`y%rfK;IIkA9iI?_6$t z{5SWee(w3-yxjO4Z+m~@GKFXSe)Tf!BTUP`2LMB6_r{=4jJu1V6L<2Qm@47Knaa2C z#Fe_Mq&yRmFA%^f?8#S&i_+8kW|biz?-R!b^a7aG55;k9FS6`*`4ryF56utKs+vuu z+#Oe!l$iVI(K&oyufw%Ghwjx{p5|J%ytRA>3IPr}-@fjy>0Qkej-8--HE08oEy$(t zZo;Mf3KQ~Q0u%CGGOAE1a7NtVq>4Ko_bu{cM+YcB<+cvPDk0hk+Bt=1Wt&Sq>5Qnr zgTb039Lm@=<>y%8O@fCB{ulrp;_NkCl?}5iEhi+69wc))%d+SST%09|Kq4n?XW|(Y%EI`7Xo##&R0QffYN=*l}X8AbSb~qoFXNqwG?dPX=_v3_5e=Kjj#{N_@(# ztH$h%64}xEL4{eq$a>>IPa+j`Uv(Rp0M}-(A~jw>jnrw^2uN~w0@rCw2dwUtF$_jP zZNkq0Kldg~>W!{25kywWWy>eL<*0O}fWcp=;1I+e46+gDR!$5a;FvaD-&m+t;p)*f zz14GKoxw&j%h4_(&)kWA1|xwr)@2{^TYQQu67Z%Yrc^5RP>#Cl73bzQDK5L= zSpyZ(#;D96PSPVqk7y5L7+|jbkF52t2>ufR*^qPkgPD$la&sNj>kAB{x_tT|#%pkQ zBfgG59o8DegZ=+r{++-iJd%i5OTsmA0cPH$b`Fm6n4W~OcLv7a1fFK}g#RQo%aF3V z9h;Ni3;AOjeUUvv^2<#4|9W<6KHR6%IG{s)ErSK z)A02*N}^f-8zEh6{1f{RFNNG15;$C&Y;1M2E(NSXiG8M~z4- zxBTPK6C-#+1fG|IOJ@epaL|u3A`f@ytLn~(Ts}waq!nl}dNFB_PWh%0)rhC?v=16k z(!)Ncu-l03gVRW1cT*3-9#>f7L#$&Th;>|H{WXLD0FT1zrjBAAj5VRKCWctYKM?B# zuCzHTQ~#PYE%2J(c>&XYrCkx&xB49$WEkOJ=T4s*dBtv-|ol!%DqK8S!Q z9JPL&E}nenXj}K$sFXaC;jZC6R>naXmLW62n$2LT2T5^z`pOgkcVm<%(W9IuI8AVT zH>)wt9~+bY!VkX3?C~C7`oZ_OaJe*_oHXi@-Bt!@mkcB13~u?<=BP-1wL zWnU(kAQ(#0O5jcf%1Mgq8F$U8I80zXa%k3f2djpyBuzKAq&ghm>l{7B2Oos11lBn* z5RIrY9s59rJUfg_2^lsL)EenghiMg}5RPO-y9tfnDjwljDTHy+zkOH>B?$9Be5P2% zJQ08)dw`1{#{_ReEV+rcC}S9w+Y7v1%u|NND858eN;8yi`KK|Oz3B%#6rYh9l$HZ@ zEAGu6`mhh3>$8W>^=H1TJqDMHg%)lA#p_Ic1prqXa7Zw9{Cf1ccu+ znxtTOSHG7el3xNi=0X`k28BkoRzHE7Ff$)_Fu=5q2-36tD1}6r$dwquXdVlJbV9Qr z3LfUcxu7dI0KA3$*s&H-uZ4c31@exyh@%DE5O!Pz-X-vHL>w*yxF?WGa5wV_A54w7 zxi>Z3gO}jtJYbj|>f#xudx6MGK(xE##;=C%E7$KU58qerwnv1I*FJJzx!ZnNer-y< zDIz!!EXY|xgc3f5VG0pSh!J86?iL7{i2JOJ0)`X}D;TMaT4N%tD14lsh@o8V5_}A! zJ}HD~6kU_AV42F1mt~{c>4MK=t@trlP%x6boY<>)A8(lm_#r3VOFuRH3Sz}9@c-Gp zrw&VJL7j+3>$a1$v6+MQ}Jn+4kXF&>=UU>e>3+2_+gtAk}{|1aMp&u~y=LwXB zd3xl$LgzN2Y)yichxvFa)!8egU`C$NOMCr4hoxx0vD4pMl zKEJxQT6yt>E9*{zZpItc-3o0N@@Y2YNmUMBp|7?aTvxZymC7C;L+k49>fCz$N?ETX zQnr5o-kDLo9)=zb3>{_*sLeFSiKtiiugCO^mceG4fmSuE{^#6~B50c-=!f_tlw0P} zvPPyi_;I_@iLW-+3QF;nV2-9NIPA&rK%V;nhGn@X;Yn!a=YbwHG^D33m|3 zij9W=I6B{P=XX!RJ9Nu>>)wUP&D7lWZ>nDr(f% z8pSt=*lZv^>UQCM9RwHXCs1!(M3uKD(V94z6e{%2P8>`rzgCj|IDAhOXN_y$8e&xu zws_0m1sgu;z5{$+G~bcnkN1;6k0bjbuP+tdYKPz#EFhe28ecm?A3^xRIFE1OpBcuR zOyJ9<_uS7b{@G<&z_ga2HJ`vd%YRMqZwW~KZd58KA5@*#Hu0Gdf-Fpa))3(bLJX5e zOMH>*lL-LND0=mV>H64L(XzRTf^`J#4Qt7dvi@(O%!wbdY$-4NUSdE-7kiMn@S!4% zQ#2^6PZgW^PSG$^r`(tnycXiM4{cvGg*hYA7+9?!W*%-khVfa|jF4*E@RWCgwKmO! z>abV4@Q)%_{usd<1iwu1KM1A)oZv3Mu93gO3@{V~>e4hf_-e&91cuG(b%P&G1bDEd zl4Zn@qK;87N10FohLU=Mr09OQ6P?%%#GM(HdJ_?aG;F~h2LqjJfOB+Fmp{dB{|v#$ z0QhBQ?dp9yQc~r42ba6~V1k)|+zB~@W*G!domS`BX>2^o9+W(2`Z)R5mfM_YS30=T0x%>Q$i#}2VO z6Nl-G11mP7?vy!O{x)CX45xmZDay<8vji6jlpOLIroKY3Pw)+bUnKZ80WDar-g2HP zN^MFsP^?Q$;gLmok}Z@R7`e37729p&S=9HLYXDRU81>JEP$}5ilE*EEN1uK#5QvS( d<|FyoiO6Im5gUmmVxNozBNrl(SS)t-e*yJ%6>kc)dF$g!~M_ziouUId=DeXbB^2!jg8>j@76e zvvESuQ}mRj(KGanWzi?-6E=ySqvtG-UZ5AOh(1N1vT5`g`i#w@m*^!c(dXA>xp@4x zq23rpcg9ko;*o5^v(Eg)zek(NQ<{wm#bM1|n%3+*tg$(&}J6!a- z@wj_P7oY7u{*C_pO8CRQ7&cm{!GXopt1q&HYY4}?vT(Gf!GoJHlKow{{3=~9R17bd z^Ha`wYZ}f^CFgfw!!S4UQ%vaXJ-IG0*_&R56bRGf9D?G?NawQ?^JX_AiS$^YLU_^F<) literal 0 HcmV?d00001 diff --git a/images/.ipynb_checkpoints/G2G_logo-checkpoint.png b/images/.ipynb_checkpoints/G2G_logo-checkpoint.png new file mode 100644 index 0000000000000000000000000000000000000000..3ad33804ec747716a208fede5d09e3944fa37da2 GIT binary patch literal 24635 zcmZ^}1yo$kvIaW1!{82syM>@Z26sZ_{mUEQ;)ry~^QB~TCv5CH%Hij<_N@<%)LaTmbDetch#Az%Oi$ov)}B8pNX zA|ORaJ5vj5697OmB1H{OU1ba>M=LQNY#snFiO_{GUy%&d1VEGr#>7LzQw@WGwG>f^ z)jFbmq1BPJtvKMNNLn16zAaoN`tf%4S_V61@UqXL*YV5IwBJnj(@chw+0is0&oz(& z!=&&OZ~%Wh$VLVc9U(P_Jdf!Qz$$~l1s~WuiA|oIzynNL-#a?HV5{=Ao0A;h8s2&$ zVdHWqU;q)J+$m`jQvr`4fIEj8~NlYm=Mz9PrRIqy;R_TO86ISW?wLpfP zHZsy4Cm{JNH<1G`;D?Ul%;<562Ptm;!E?oCe(P(`-AArP3L3S{YcAjLl`>ak%S=Wcm{HSMQzCt%@GLwVccnID$k-OGCJy_QO0L+b}6YNBZ~SV8=#c`45ry zjj;JH#XKO`Zo0X}l8LKoN8}z?$&|Yg2ASKQ>LR<{g-NpG9ew%al5vS?;xJjp4}!%e z_WRWQ;d>zi{2?f7ivoXvFyVP$vYvOdI;rsN{qc&R1Zq{xL%k@+HG{A4ks^C@f!9J8 zl05(o2GRqOw>0#sR|~j#2l_Noogl@<^#K;P^X) znPkvgJ~Kpg$ixnN~u5p&z1XI7_g0Me&MC7)pZifc|^M^=eX_ zG3ip~bmH{LXg9HsEBRpFo5@U%g*4B8gD3kavBSP1T>XX@niQ184w(NN<^AF*m-fa5 ze15WP4;cnLVxgj-5I>b5g?XI?6xu&+KbJI|hpZ2J0H&+=r3arJiJ5(XJIwj8YMIQe z&?UWyu_Fkr5OzCgfndCFNCyytY{;BhuLm6)qyu2(bn?lQ)glj2awJI->p(w_pYWRV^^-3tX>Sik8MIFz zQ(sgswJc>3rYVLr0(LNQut@NNJ<}b=7Dj|9?_A;u>armQTM~CvYjSJC3YbsmoLYbq zHBpjMCy{9^3`=Y;{zkI6=&p#rXun9NNO#_g%7lu7il$h<*qAC`xlTD`-nQ6CIxRVe z)`z?zX=+UOfc8_Iwxq87mb@*Q9_2%#pwegBR4h_yBC4c%e`Tri((ICr{7zL>RZgY* zuQ_>)ia9mf<(&#ol{Y?zX!M!%ob-$-WAqp)CiGvv$(L<>^^hx8W-5DMG`1vh!EZ|S zjP#6q)=ep~tQl85ET2}XQnvZpRrD+zQ)yZ~{59)`YLRNbEBJSTW)H8%rttUTX{Fo> z-C|$$xAx~eSh)cu3TX-nqw-OrcD^DU2zb%+k-K^Q@V4F=G4kIP=X5 z@(Z3#;hL$M)>X1qj#XWn-!-AuGS`^b5Y}MUzO6|#d7a>G22PilEu<|xEY!>i#(u&Z zA{ruXGXDBjCR4ceD|+dAdG{>&*XI-MpHF2IvncaZ^E&10MP6!X%MEi`d0Z-XNm~|O z?{k`kT@p?bX)#-Z}G>1!erQe8b-J0`i5z14i|``sjjuMcVUw3`%wh28=>| z#6MH$Q3T8p6z+@9;H@E3Q)DTPnHwSFoe_o+$V9ggxDoh$s{0(lMaOBu+GZuqcE>8m zu4A-j$*XT_%(-RVtnq!7$*I0A%p>%P_ty2+4JQ>R0w;My%jM(0 zBH4fPta#)&HQdVJYTj?&iB>1l22h}b;ceBPMaP}!*1tno7U5cOib z<35wRLcYpA_87bkO3PBqS@x~z>HOW<(COwSNNmOvD`@-D8YVz3K$Kf0z~7C$jkV3x z&F?$xOVmTq1NUC?&i9x2T_JsGQuc`CC<+h;aOlnVcVlE(B3g<7GqO_FGS;$7veOEf zi_CWEC++;%Y2OI|F2`$-(Fh~t@8&az)HI;1XuC3bfhPH_<<+%XrQNL`L$Bb_InnpR zQ?E}RYp*Ltwj(~rVTp5!1<<-Ko|bjYrOvSz_+^2!f&BTF{rh+$>8otN>l#-q>NGnvlX+hnAunzz8TI8)?psYcE%T_QA%3MQ&Z7UUP_W=V|}*x zU9>e;og_n}mu1LPtUumX?y1sHO7gvgGlu;)7?F&M&BSfoOdhK=ls$c1zDk~Wr@r;9 zb;jeplcQ6h8_f^7G6HRexm>M48zz5+cz;eJ2{ncSzl1^YKme) zW6E_vdsuWpgky==5Nxx6Qi>~kiL)9hEymB@XPv-7^YuAQ!{v;@^eQiIMiy_2 zOU|#(M31gpk)umZutt2-&Vymt-nSt;9BO83IyTx2y_>7j_}o+JZ!$y0%eWpaYn^y{ zjOrV0gC1kgRC*KcCdU)R;|$}1EQieee;f8^QUuYMWVDvrHf)|sum`dGv6oe!t2UaX zjlRwO2&)*WvDF?>S7|<)TC=d4`PTZ?T314YMq^n8Uum^-%la+yE|<&Lx>XZf@uy;y z-pBf~DZVu&@;<93E*o?++j6 zC${mrL?e-rs93Zd;P(^34Bs1rv&N>$auK9HXP5ZN#)jg!A&S%!0C%$!U6Acx`UfK_JGQMf@x~QGrM~E-uAr%Q~ z9W_IG9S%M>cR7TegbH@1+uj}wf0s(vZ!dut$(!GLv>X)v>Q0d^`F;M=dPmen>fW<2 zbRM*h)`pfVIPUYOt>u92&S40%kfurDdk)Djq>D38&R-grPII>kZ^eF=uNrUtr$c=` zuYXVR(z$0}lwTcBe$Fqq*SK{%cr5VZ@w;EzOs^|kj&GfHZ{PLpi@b~s#wiF)y~;jo zKMg(0Oe|m3xP>}~hKdrgNw{M5>I*H*1KgW10hSnm&+eglvWeS&jRQw7D?dTMW2SDS zo9w}Y`74V?+?Nml_kD(Jq>clcp22aqaAgg*lK9;fV;3pPWc^;fIHYv#KL)bUe1T8j zZ#!E3Dg%;#!n~^gMdA(0AT(_ z0|3$xg#S$|L(u+P2ND1Xvj9N4gYxJCpUL?w&!JGadUHHc4KF@ zb2MXNh=Wb5?rZhdr+8NzS27R~~n_*nig_Wwp{{0~fk?eph< zgZ`8IzY*&HC*nW3{~Mv;Xz?))2LFU6!1iws|H=EeJ|D|Jul_$$;lJ|iU)+yG6F}r+ z`R}9^K-B-@mk$7d0aBtuU)>>oe}}WDT-6xK6*s^V*I4uJsOV~17BGt4Vb{NUVqJkH zr4B2QtR}~;8!hh@Rl)j-9;rSUNS{aB>(4e!nMAb99nG@p*3!|U#1mIP`1j$?Te-r` z^m8G8@uqJ_)x|I0x2?0~?Y}+w-%hx~>kY%!G`Ajm=aJf070Kl-yK)F&$xMW~Oin z876^qtlx*x9gwHKf>2#|o$X*xV?EXp&QX8=jj+19Ch}8?x4Mqj&s*DI@c0nQ;bQq006f>%r~03fmya>a4r5 zC`~Orq^BY8Rvupp?c7%UlA8qs{syw^G9F_BDiSJD3fvk_O5Lie%1~5?n1;8~fw69t z{D!KD!5^@LKii(qV9OObL%>43VF+}I4 zZ<5KD)~GSRV+`C)DJ!chgJlTQ`mf(H^EHkalJ9$UAHbWhSmCmOJrOklENUud4l^xb zBcmd;ueZQ(Np;gYVH2(OfyEo^Rz6p%XoWaC&B`BiEK^zc;3j@WTt$78OfBFwsvasP z(n3DOf@Pm4mK~5BCrhIkx&T`vPjyLk5~Hbln>p}_pLR8d-pbJT(?(7w6TittEME@S zW+}g74M~}5Go9vTy=*l_QsHju;P+uu2uKs&d~A^`tD{nNgs2-8{lt)tJP~680;*3O z(st&YoB8GFeqvt%BQjP|2G+KW&NI#v+F2@F?VmeN8j5HZNXjzWLHAEXPxe-)s_IK+ zxi&T(g=)(wa4P4WjLJ?(PqiQ*1bntfDm0)~SaqrV-nxQO{^na_qwOf5JVu%QMeZ5d=APaNK{;pWGTOTaADNVSZ((FrmlB!-aX2hCARn z@Jy^)gz|KA<9IPOaoWgu=vk`*sJ1xgD8Ck*Y_392kdf~^yu*%=Rxxn4>>p$t z2bmOU^%@MDjmXhR2}nMF`5oYS2bRkN$r>3*kkI;g|*e2N~NUJQ&r6P3)i0mD*VFNfoI?OO!#S6KULy>9FLXgkSLooaG*tdVDP< z$Ji6>wI>GkAn}#9m9v@u15(Gz0R9G~n1Mg5#o?M*-NOdc#Qf#avB{gt>FKHTOD!_Y z;vikPf~AFJ9iUrIV#CxVlO})PIQ{YH5D^X75lx6cd(fcxy~^>bxV)VtJv|*hj{jcW z{q=feU|#u0Qp#CZCXblv))8ljivrWM^-_=k1e;eY>?xRIteBz|h5ag%zgF?E@$-*v zYYC1a4P7wdFF_@91pVTe7<$rJwXevP?WqG^11Bfu?tfplNj0(BUL8mu9P+!m-mW*PTjT?ZS%A5c{((Xu zxni$8YDGnBRW|8Cat@gmeM(zv<*+CMF(=M#PhF?_Lb|eqejO4#bUfNL+*Rs=-xs4H zI3aPBe$WwJf&ib$8$^k)baotfhIB-0SMiqUE;Gj(dRaykGLRX8{8cpcl3G-NfaOe9p; za_lpb?DsuHe(EL08s^Y!(UQJuBOKm~r^FX*horgkIeW2NURyb|u+SrMEh^}g9!5Nv zMOn_m){48UjTMtD72n;*5c|%D)}OhR$7OFNGsIY{*7wzW{$AJ2mas6QQ?C}PJLyWD z7`OGy1gh4?;?RBGYjkZsMk0}J-AehSGStKRmQ-xYTqgvnmEtk_8VBBDBlnRqtUZQ0 z&;G+7Klwc7PE9gX#UPA_qmvkWk3Z7sqY5FiGAfIq_2w(5^RO8y*zVX%#X-^2R;k-e zJX9S0Irw}|2NJ5V-?4dwJk;n)S3Hl4V?Tm{8Hjbl$e%{dVOO$X6r{TVFIqRcsx%DZ zETA;bu+XbY_XRw1%nHSqScekd(m@AuW@xQvYH{?pq$hTgQN5)D9DW=Z{oEy@eg{_| zLbdU@Qt$~DDCw%!hEhMtYez?!ZS3M%eHDLD+B(?#+FD>y%k&9{2y>^rMSb%NmIp0! z18}gvKhVLLTZ~>H#gEAa0_GcQ;Dg&;M0{BPuq5lA_D{(PBv30Fl5&zLU90qnrB=}& zL@Y`tz$x#JK@)g0A6;wNc;fdQ23M(Jc+v$6IK%f$qA@7pg#>w^Yqf3aN zg0#HadD0*Sp#@Cr$FykZ4}0D8lu96}`;~`@r+??(4>g3`{-_t+@+Mg58xVP^4AX~3_H`_F^Xub7Fe5mMT+$gH?2iL6CUc&M zOiJ|~b1o6S%x2+JX4f8&W{3>7z40iiQeDdM&uh!DT}Z+zTZB~~x7^pQB|YgTb6xb| z>;l)@%+;16m}PZ7GE2LpN@J#aP8ZFrEXrYr96x-2vctEV(G+{_nL)>#Y(YzeISWfm z`o?jh^@Rl^&n+dd&^K#49id&6IMH&Un2Q^R$$N{{Nd!66OavrIxY)*TwCdTAsE{$c zR|5{AiJKWyA@ABftpon(WW7?5{bvS)J@kUXQ_o2xN{pm;F$z|z(-{^ECQBi_6rSh; z+>JK2OpTVpDm|4t=>b}YUIxDKL`C^+eK>M>8AJ$Xak&-ly5TL?c@M}W_6V&*J3IQL zhq2Xot$IFfGV#a|&DOFH&J>-&X{TQLEh@fO1}N|oA^brriMlWh|d z47;M2dHtY9093frmrplN+N2#g@+jHN<;&JTJzHaI^J1c-(`}d6*9m9o=D&8s-6Lb^ zG7ZWB^rD;t1?VOoG1M`_(6gawz-_}V0VhpV${q;o4=0e7JV-VQlk({~{wmA}}mUD}m@|yc*UCW%&A6F^Efk{i`k0MAlj_7RNh$!yeUL!EZu;3ODzx#4ZG~OBL(QOid>(mh)LN7A5L#z1G+j!L5wNJe>UbVw1y@ z#_i^+w5IzB1wBM;1Os%6jS{9``s=ENoYHrJ!4oUA^25TAva(7#x+?Km;9fxmbbVD6 zR`in}#-Wff?a(R~8p%R)YQv!*>Op}&$%V<)dOajKhpharvfHmG7u|&{aq|u5YFZD_ zSitkiS7&#FKbo!l`~`7MwOdKfI%Dtn8WH3qwcjz*M=553+}!1?DBTtgC4Xg4u$KXh zQu;gl`ioo4+z~lq4e0s@q^teXLvLeDE5o$wsmn_Is?qOxa31c`Log)v!)7@i!vZ$; z_vfp#%gb)PRXhYgm$9*;=SOSzm2FwJeFZks+ikW>%@g|D3mxeTM2y#f_NmyZSJ`}H zq8(d)ukZGi5Aw&vME&E@Xd9g4G7P{^mc(-qbOoedIF>(-4UW^_YvFNg||!bCKSwtkyL5Mw%b)L0&9D1!Nb^Nt!YA zw9rhbNkg#~c!okI3Z^}l^}&Ya#9Rhx3v(Kla@AZq1A9jHnrXw5gtfAKf@*AanpIR> zeZE5=GOVg7@MGU-%3gH8T*$PEVs~k@a7~WyE9iLfPSzZnAf=^<1Z&V|I7SCn#w5QI z_tbp5r5*l*K1TTLN}|fBFtvx#)kgAyfF2Kkr!suDnQM;XSg9m>XH5d?!0J~{K*don zQpIap|1_;0@bZUyp&)$nV8!<;bPBI==&SS?H4Q}b5DZ!-~l z?mzGEe4sji__-CM>XojvqkVkMipK2;nH@=B=Z#p4^7wc+&<5ZbSvd2U!Szf@ISIAdZ&fj&LYC+iAeepHx&Tpr>lrJf+NWD+ zV;LxmL?&v}V?pu(9x2!(G=FsX!9tLXrJnM^Papdoro3PXeed->b+=qoNXry-^yVUv z%+T5hjLI~XZ(Xfcnkk~In=^sB+D{TyAWu212Dv$!mvCCI#={_{Li*^R0^F`q-@gxj zf89MaNxTPoEIq{8n-;de+t0X;VK(*@}~@7*K!L$iz_4&qH^^TX>+`0j6_3 zWvlUtT~gnCt+5n%NY3^h{YI05>wqX97&p+ZkRQJOvNU7sX{r%mB)cGy@jI;k@|F&? zqwA1O1<1&463dKXz%DQ@&)r@$I(Av9MB8v%->@e)X)xV3>kDRodq4>1?v_52!g9gG zXdjFr)gXO%@j)}-w5KS1qZOBF$aE&?4y*^`lzdDfqst{gWkI@ zSW+H@aD>~X>9papa*i7*GMtV9z?3f^YZI6={TJFQtdGSv(dms)jcx@pdjkY=7nxO4 znAk%Dhf1pMqTA+g3vPMg^U?>f8*-9_YKk?sZLH!Y2fjo^0&f* zrEodI%NnQ^ckXl)cZe&TDG^|!<^y#Hi$p6r{f$s-%ckr7=H_CILvq=C#-s4vd}20D zw@hRuUNcoPP!SP3?5+g%F-Y6qDt!oR0wh%f;EOZhH*5$5S^bSBtvRJAE9A5*?R*dryHFOB!Aki5kTcg(3IMm!N5 zE9lfxu?l+L7Cw7l+nfIkgqqqTSlrHQRwmg=?BVn1aKP@KX{nrRRv=%Hk=Lu25|glK zH2#EMC(nJ>eR0;!yps!B43gB~B=V@_BO#VJ-eE>I2Lzt^-KqJR?6lkO3O_#A{C&2+ zIT-u$d}=G`K;!cDZIB?$gdlRmG$c#R$3Ed_;A+z;xe-_8Xl6e~`jOyHXLy!UoqGM# z?i&r$L5JIcrn^wX;mt%tz7z!&3yJM)Z8ri>lg43L432i0$9k3H()TZ}O1$IvAvFw) z!h1iJq@wSqsRZ8l1-Afwv}k3!@TTvoJ^5~rb(e2M24sJ)(EjR!At@^%9a%~;!2#+J zyS3Oa0wI_+Nd`ZOD+%9*#8~wk<3w`|O;$NP>b-b9Medy|a0Ivw&PL^FUyTi@SR7zZ4u{>JYUBM*(-*{nvr1Ausm?sd<0 zhQNxDhD?ugQC)F~qtN||lX!R+<)j3h6=PtpV~B1*tcapPMWuSPWBW~$Rm9&yXxK}r zO6h-=IJ3PO8lQQZjK|=femJU`W3TA}Vqh@@*#NVHWI7IM4G#Dho6pz*JD;h>U&n#9^jMN%FgsTnw60L;K*wNQ6bB zk2a4#l?H?au(oYwR7{C3d;h3`?1tc3eh`8r2p@(}(0?u-x}>fZ!T}%Mo^DJiEU~OR zS5Xn~t;2;+a8H2MMgT;>5GWAU;n#eze*@Ya-PV?}%XZbV6;ud3$pUSKJ@QO!rpf4j zYWy~YOhc>W&+XD#Nmy3UhHF4b09XamwH&BhmT;U86ld>DNAt6ro1+(%F{<_^Ld< zDJ#=8+LEuj=EVrM0~^2PnGWMHW0ktAlmw!(0|5w9+lA0|tcmrLMOs$F*xGPHyH|Uf zM3e^#)HKF4rr_NX`NjA32UPOO((dbQ-}B{tFF-_Y0A1;1&(WEvd*yd_fcDyHuZeyC zVx(9EN8ZZ*ow!?G&Sx=xMRu2b6=y4Em+E2)*{VtN&&yPFs00mXLizV zQ)rZa0l&*>V-qntoethq`&q0YM^GSc%7VGCxP0~4S)`z4#2^u-@4?TDIU;DR^^`FO zCd`J)G|e0>Ntn+K7A>OnM)mMQ&Wt(3j2(R&{yrSPVoO&J5#Z=Vq18a)7eS=2`tI*f z$0+bXdeQK7A$ZPdM$WyE_71N`msR)}_DO{Fl35yS-*c~1hGr!+8vtJ;l%Z>z;8+GQ zsa1fF&zi&n?70`5`iBu(VP20&O6F0X4MxslIzDOJB|UvW-Pkn+0ibj2@PPg z6nCPBAK0N_;6B7t@bF;5=m56xQjxnzCfLp?r$8St(dGeBuQ7%|#1#u^A@0WF#*Cca zY*;URqA}Xj!?Z}O-1l=SoV*Y;4?r>#NRseWbU&p!RU|}VXvxJP5`UH0pa7q?7)}Vv zZ55XGsW014@=f#W8MbejOW`-Z#AN0s1hTreX3ho{A+`CgE z$x6Gt>l>=3-9gJ8jM95Ggc^K%*tUaGye^fSxhR2)L}TfojX+1B%Az{USadS^{5*-1 zb0Pk{1&DN5cc~JtzO>9yQ3cjc*QC&)NE|>o*Gg!eUtc4hren-T5V!+ClLJNXNdI1w z9yJPM6>I`Q1{l#-Kn4oIU?j4JdU!8UP<5R{oD&Qf6ApDyjvVbUo?F6~8`kU8XvSqc z3LL189b(KHKTE1zN_5fVaeGu!jKIIkE_llhq%#DVi+!2@x=pfzsW>Yd9{3G1jDy(O+E$j~@6_0Hkq7b~ zQ9=g;hS2DQ)z5SbdERcfFjNQW03Yw%bqSdpY?(!$y!sN#xmFH>+?S z4nv4$0a*Qa_;6Uc2O0F9q$>nrMA;(C-ZpXq`mb9>;di3o6`J;c+JUg^8%F#+HxAdlO&u`TT2 zj)_Tcm|1!Vdbrv9kf++s2b7GQ5d@R@A|fK*8(joUoLCk} zB!_()(t8zizh5;QIQG3bUV8K$S8@{s}kUH-ANh`4?LS|aH05NrsTY%Wk=g7=fHmbXksM-s6yzMNY@lt3Kfp z+`w`)d!8K&b`MtQs|{4;4xYD$V}AP+Fhrg3?4`$ySM(Rwde2u>us=KZ+}sx+cNQrW zH1%{X4biilooA8*cDPk3GcW|U3_z|lBXwe8K)DD5d zs{u4YYg4uApNXxxo~wHZfnirs4$(|=$T}2zw7x7h4`u;`indhRla~q)UfWP%kXX^f zqd(Qt6n~K6h5((QP}e16LE#1kO2|f>8u)ahAa_NVw9)y3!6Wo!uDc5 zjT?DD0HfVRw_O4K7*o^ zM7Gwn>$YTu1yk5Ck8Be7;LbnAEF23A{?gQ^bmG0JI5!Rc>jeT_ouEwv--w?x8t33X zuaFSjtL_$BQHl!xVONL-CWaxsEsMQWzGA?24tM(t6da7JPv-*n8s1^g6;jWvu}k=O zy8K`<{Y*;IW7Y|Bg53)LJi)5~G<5;SuW@`g;Shx@XSAhSit@e=JhC%~cVhsb?=HGd zjdxyBAPtTIRpZS-wa*dk#xQ}?OQv$6mPkQ5ZL=v!+%*0~&XG{r&6Iv#FNwW!@Auwc zekUE$_~A0Nth9YUmC{vM8n`n&=FY8TdCKA&bI!}t|1&I)RwvS!PM$D_?xQGv&8*kq zs-xA2jN}jQ?B{j4Kj-oIu8?jR8$JZ7&p4+n9Y?!_A1=kP2fNVVl!;ACI&>b0W3Fl* zI$a|ryJnBNgOOG%q|d#4d^?fJUQZ7AMfg+4@#;?U@>e{CQlwv9dm^#k%(g>dp2AZM zM1l5~(fti}9{uEY7G1R85AChFUXh!}ckU!Ch3Z@1p9e3ga?>WR>%|QUjO+Y=szshL~s&`d7rjr%0~!@MVR+e-yaB2oisl zGMpX>Z0%e5W8KQbMofK!$oQExeZCi{GE3FmV?7XdbFIASkuL?dNRxD7Tqclzuo~7? zE2WlAp&6B4Q)5=h=H|=IHS)KYAZy3N;m-~O!oG!?M_dP@_(bzVMer-2_m}i^S)cB$ z+*M@n0oM?q#uR|pgGlC0e^&*S`9fSsabCs55h$*Ux4T=J$=)`EWm}pMdW(8`Q4d`;lF!;M;;=lUpvMplxY& zORt+3!tH_sng=I^uz$iTbYU(tIUr}N?{y^}Q&CA8CNP-$oQVscMa>+E7VkZ5<}E&y zqt9z5H7LNzk7*mNKnPNuBm6_6VXiM9d7qtp{tDSU%k?-2p(=Xb+=t?9>ms&GR11xu zrd}vZsecis%gazy12BHG#dXi*(EQz46vyuz;`e&^9A)bN7<>7MO7RswRFw2-BE)?3 z%&XdI^5|(L39kB$jh*E;p5j#(}}-_k7Z4vId-XyuCwrV%_d#gAlCozPLa4 zEj^p!w&GUeP71nlfA9J465-PiH2xkaybltUWZ$;)+UY26gr@RAO4)TP%4z!TjXJ~l(@Za2p-%Yp(~7<`A+Sjt)efiF^z?857^3I3&{~E5L}EbQvsnzcKUI5;2yy zPv)n*hIisFV1!82HrpZhMY)dbT$C%Q@%_l2Rq->f?(3Lqdd|50nQ^J{$LY&g`TgL< z<7GO`xp0_#3zpyhIAuXXxaCkzf{8FQkr#bVt&bBtH(-%>??pHLyd4wPn_l!sbuwOv zdU8j$JOEKG&k8Y7fCnb`!wz#aHvm$nU|v*$*|LZpQpa-i^^ zOZ}cpZH&X>c_awQ+{L?Vo-%9dW}TvO{mF7h;rc*17%*3Eeh+TL#EPS2?%_Ywy}D3g zsggDRB+NWz&#>tPJV2QVg+6o#>i3*hbdQ5yoCF`7mP~tvd5~>;?zDQax2{6D@ccV{ zHYQv!yf|UOIdEbGh7V^%J@0olXnId6dKUhG=pk5%wTUjk&dmGyOn=AL9kZ9~h1u=B zC*cYy9_K9Q%wB%FAYkwB+xFj0U2bsiWTe0@OJpnzqq8JOL>hD&9vmsoZt<9inFGHZmSKkbs#nCA|2a!Z*Vk&~UOc77cb`73pV5#v5Pm5zyKfqJ#vF0BhB z40PY^-;d4D_9<+G-W?*FqeDz|_-oNboV}UiSp9*;wZlHd*hbNzcl~XT8NIt10FFhB<>|5(F16rAy;=Q!6y-Pl^!e zwRh(|^WYu#OAvrBpIo`UjUSArKWC=+ht%u5?rQ2|y1*tXKMKVfA$Fe$0k{|PptwkK=#y0It)wS?s$irk1g-RUQL;qsKk{dr^#)Y{ zhl56smp$ULY3!36xi1m+j){(*1)mEA>#cIaZUEOUelIOrUS);qZ@MHO%Z(r*212BO zi#>`hOv~ZQB@e4S1WxtIw$e@r{?Nf59yDBhq3ukd<>%v$DPB0S7S}qE6UR0Ni_)yTcBZZcr`!Ne&^voAF-bFK zqQGiD=@!T6kNge4)ybiIlnv6e8W2}&xs#0VhFJKZGRO?UO_j`Y4L1dsG;3nIu-5m< zp?@I)HbMl!6mpTH^sMp?#l2-;wjEstI=zO@!skk1L~QTB7`KgW>yjLLwY^Wj5*=@r z?jF`3^WF`h*ReWI@^I!5qCzMob?R=fj}4Oc48KwOIw4mL4c$E2yr3f15a|T|juhr| z-wlV_(%1bw{uz-fCd|v8_~RUwX=&P#%iu%0VT1`rt|I@Z-SRlrs_HonEoZEr;sT@g z%zMmFhHtvGQ8U;qXMA@{iF+&yxHfq#+%uuFZyLFG8d=c7I`Z&eex5z;xwNX|^eo>F z&HQ#gH>sC65W?8umOnst%r?5^<)*$)QDqbX1c;JCc@J79F8FVo(M-HvdtbLl@-v7L z!PA~limMX9kNAI?1&h;jRw{(crNh>V&+#w6KZ8Kc=z)1{1Wh{a|P{!O13dCkp$J^-U9p~ZF_@wcMCzEFvyK= z3V2MNg<5vF+F_RBi>A6VQGr2Qso$~T5eCe09v>;eKc!gW{lVjX&!W!O1&9pwS2-2T zTXs7`4RdJWNjcmSeznxerIZQW2~RG^tnVF*TW5dH#vI?g>wG%pV-pa4KmPaF+M?Mh zW?Z{|%KiJqiRXySG>A!JmXq>Id12r$>Up(2#z}P+^S3!m`ZYh>U4!WI3vC$|YwdA< zo!7ocv6DNHvy!n^i?84WU6#egR(WsLl`rNm;5d_>He7MBCXY=cI{w7~XOZ|a&oSgv ziN%JBBwS(oD;8us_W4R7EraKOF-CBdKmTftTVj7KBZ{vV24gMX6K~i3uB>&_E+Wa&f;EP6}<5XVvteO-?A5N zg>y7v;FC8Ee;0B?84iWu%t)^Ok(t*N$!!bQsPvV-eWx;o?+|==Y};!RJ5%;qcJoP# z;7->1asK!w-X(Cd(j;fIL?24B|44~DjVc(u1jzx;D8#2MJ+t-74)`c`M`9e&lRO* zY%D-k&fAqYJX?+K&?Zu%$#c!ReERdZIxaN*{QTFisz-|Ht*t1BLzuaoB7775LYA_8 zU*5#Kt_PnEXmNW;1cwKo{;8&5$;at_<^q|n74+VI>e=i`D>p`@X`JJMK_vS|y&0n~ zjqmd$36F$5*Mv|BQ;Q0#4u*ejBAenmlZSkU)~0z*Lq%BgFf=b3RT%|cAhf~Iw0jniVWu7W2~@6#(#=w0O*0VrL9PEeE} z0yIY5hSL@q4r13VBSdmI_i6P_t zmO7vyLu?8kUDtXmjGJ?O_T?X^LRWm?Jj+4-Io1z*C;3NYo+u&*K>Pat1m_(X;-#0X z2i*-5sE^$K9?(^;$X66EBYs@2%y>Qsm64jc7DVr&C{q^%N;{c*|6@P)W81jCw;@>v z10nQ4`JRU|9AwG-{+qnqhiK-05x>SBuE1n02U`4^*TcE`P+Yw93Xgp>KvZI6qM?`^ zu{`j>b7o1Q?8O^fVbMjiW#LL1*}D}Gn4iz9K8=J@3Lt30J{+VhK(MG-DCR{FM$Ujk z9_B@$m~*{F8*Og1(R6!tz-U9n!4%b=4k@g9Nwp*hgN^!p&I#qwFN~bTgc15sDtZ6C z`OR+*U;3qAx{dg`2jU^P5`uwU%uiNyE+Pc_@8qngeabjRkdk|HK`t*F;8Wa2B;%e-al?AcwbZ`6~j^hO7$3!-N z-}imrCgG9~u)OYduM5BWtG{ZNB&XN?kdPcOzrw-bkd#9^`OR@TU{F4TDgBj9 za9Xa@W^Z>g55H@BW5r6W@SHaswhjOov0$5+U*Qz>PS#l>_5rQDF{N^T0%2cC6-ty?d0pi>j(+x4vs5pJ!?Uo@snj@f7zZ6p+$ zxex?Zk~qKB)-Bj_&WX2K0jk%Ja}FN`W@D0{w;v_` zP(Bp|W&mYel&*#V6y(WHkANGKPMh*{;$Wrh@ z69X6>&?TH8>)i|pw7a7{oa)VmUc?&d4yHF~WKS|vh@UO`N@+pN+M=;-P4j!1iI>$& zJ<-)3`UO6e`<-6Ewt`ChpbR!*f8rt~FQ37L=u?uveBYNSZr7*d z-YJ9&xBu}U|M5-#lJ|q}OErQr$peS+yk4(=vyglJT&Z$uIe%cFz>Cc&@Z59HY6SR@ zg*_9t4C~yhL?@$g4`_|1GyPqmN8-~e7L7X~QGE*_60LR+PHMq|Q+-s|(-&W4lm?KefNLT{Z$x@W4zwfgLgmGRJqX6P?~J zdH;mgXu5E`KMYEwT4!#y0tTV$Y7!iRsi~`)H<6La@N9Q3TsYYudL%j#`cdzMc=;7y z@fBv}p=>8kZ1TVNd%tIjDShfh;soQr{hs)5KgYgQis$E;I{xk7{w;jVw|t9+P@$`s3yLZ}xtUz5kBy z?PQTZ2L=jPgy=Lz1oQ~^&K7BB-A+*-R-+Ehh&$Qe9bSBLAl!$jBR|EO2%EVP1xsw% zI?1!9#S1nBrkvjUx$drT;Y5Gv?bPSHOo87{5M>Li;Gz>34o$2%jL4khg<}->gFpC# z@P~i+hnpqE!NEx<5YNDMHVDqk_G_i~UIyYC^BP=aB0uI>BAI{UCw{_|WUt@rDYkcF z_4;UsZ?p?;asK>yziijgEq>~!e#)jSdzs!YGWv16Y>vt6;My#(#KD1qf@zPK#)xl$ zLm)OK6{HevuK6?_Z@>`8INsYG?iVmF49JNnptM5mm}E8jvz!f+knmZsam+ccHAqzI z5ExELT)J?qFASX+(CUByA_{t|l+Y@9n0Q%OP$0Yu7cOja49dVUln8Lx+9KTq7- zw|?ul+SCHhdAqSfaUSkS`CRks{+#T+&7Oya^gL9?uX)|^-}`QTq5}a0VF%2nBl;EF zvU3h^8LmtI1K?jyQ*k6BP>o?d7&X@j{QYarXRCtS*NZM zy3r@`XHjAlOYyVV0e*^TPK3&GP^i2%t%gp_6M=>i7^0DEjle>QV!F-BkE~afizqui zs}nCq36_pwwF}lYat| z@!lZfc%mQCBPiedG=9CFMrZ5hT>a4?rE@A7uioi);Cm|e&RCvB~IQcZkLgXzVlkYZlV&%X6%!~5R_mMq6>oeAi_y4Y&NXb{)VUL!nC~9 ztkkj2+D*JP-lZOv1q$#mcWG1~lvU_tUzaVL-=_Gba*nk|d96z&IuWZAZJ3c0aC~Mq zjB0`9IRTQmDrWdWzK{;UXcH&u7BB~8o$Bk>iiiRyQM){EMbBJli0Ac}2(cg@{o*hF zqAeIks5VM41X60$rVJm9AQTn>;FHQ<{^eh80howdNrLtIXoiEL+O`)SVRJ{@9bB6y zV+WKUua~yI`qi&CKi1#=?cavq{oUWS>A8TOXHAyHy(IQr<9-~RDDQ;3#Ld0luU`GW zEU-)`<8}S}zyEuaV|fX%e?I<*q!oOU$4T;c(FR#psu?%J*J^sTY5#Hg$LnnYxc zjawKuOcV-3oUs)*1k6^o<*fV^7fv0MzoPSw&Ma3y*JuM}O)Av<=ICU&Ha-<*H3F@w z;~UT^G?IEJN9|Rc#W@l?X2yqa#LPvw(DkYo=33 z{)eD1i@VHeV$lQl9S^6D4^V8yDK8Q2`p9^=DnN`2oa-X8j2v|>M%;yZ6Qx1I&@qHC ztY@8iVOU>GS_tO_j$XcJArY^do8ygyc!=Zn{0-P?%YS>Z9t z4q_5|1j-wV6`e~1*^_vn%1viQmDrSjt^fOHXtI>Am^7#-r0Qq>+&s4s-K(%CNqrNHc zy3rJJ>K5?>0zIC!!6I=B1tZoqXdqxU^D9$Y_e%MPR1O|?d>hp!f63hEZHyBc1i@A+ z#q-9?O6HGWqyG*f#oOTZ$Itly>(}CY^2PJUkK_6LxcWEYcUG{6ow0;E90FGMT6sIq zpFh9JF8x8^66HX_3x`8o)d+n{Bdj!61H_U>>M1RSIj_-s1*;EjvOgTa;`t2MlGyY@ z=#e~68z52uvZjujkk8?1iD3_l+jK|~FA5CREsC=`UKkz^*QLK`rgLa0=md<$v|~Z5 z4=$@bYlZToOW@Vc7Uo{S1;L$7c@+RwGg=Z}i#lh-ad=4s0%o-wgQxUV>_rzl=lPHF zKKWe9-?cv5o`3=(CVYd{I53%E%EL;6){m^qXv2eGE^`)-Xi+LHi0G~kj~PL(qC1G# zAjI1u(gngZbfd^L8VzCtKJ<)@SDToCFk_zRS|2l zgW&*hpc`Wo;e`nSkeZ}8Z>YTsvU<%3tgJ=Lj6Ig$j3-Isp-BfiH9dxc~qV6G=otR6PL&k(+#a2`+(XghGe? z7%wP}7$-c;XP6D)6s17H`7v>Wn-Vt=S(;j+~fetNb_g8L)9;jif`U z0pbt>gg6Y!x?mDnoRSp-5QCn$m7Z@w&w>8K0&KZlr-@dGYip`6uk(E9<-Wl_^JyfD zN`LQf57Z45jP@wiDAY_kVKl_9OpsW)S#-}m_nZNw)MHQ0YH?+aw5uYZCYf9joA?Zu z!jMGRHY}IAb=!ISri4aqbF_)TJDRoprU;yrk=%cCd?uVe)^Fh+oD(MGhtM;L8;r~? zh7E~Ktnsuc-C<7R808?cRU+;fCzci$lFyzPH08U( zqSI`3?t$tltIz;OQX(zk4(Jak(2V?;S^2gx-bP|!B1=!SqBzH#i?-%)<;rFAFPNe% zqA8XXVy4gyi5U=;{;d~cl8WapR>Iaf26qx+B5=5P+Ax4@DY*d8f&wKLHVS~IV@Uk* zmCNJzR&}iB;5&N&Y$L0ug9DOuetEqi%t=&CG%dQG{@u48s2UW+nqXZbG7~W1Vn>Na zTp|?M?C~hJdb7T{63uU{QIvrkji(!3#7(rto*G4#QCWYKq`HcDqOrYL?(*lNd!av- zkwlf1yLJDXz=yflxRjoU#5qJLi<-6N%D3`YbA+{O&43v-!z{U~A1;FOUU=AQicIs? zu?Kbp1q2NdL%_ta6Ket&fHPp7A&!Q)5_AWM4I=mC1kBzyzs4Q_0SGunDTOsI(ja>y zaxkwUem-7ixnq0_+-r62K-6#w{0Le|1}LgnLdf;E=3!iAsT?cIO5JKPxL=@78vwS> zr$_tS0|p9z(Kyq55(?gEz(C*?U|=~*lxaYMh(wGHfFS;!I25z66yBzD2p)xFxI%Pn zf9;q*=p&?ZoWcimL?;GwyuK`IJh!NC%n2aXB-&z=gmgc0e7OWySVnhHHqU7x-#gq_ zu1voM$l|7CFP{g56Q${3dtj5QAQ13<7L@<6zVN$1a>-<&?m1QyN%6bFJI z-?>Meiu@7eB>^<<_pMl0@C}Ig(1~)+8m{vT(^hB1jf&m5{Ty{RsQzq=rkrM3HwY0g zMu3Gj`CV46fJjRXklHiff{i+(E1Tr(x0T(Kn|9dq;&fCwo5{+gt~4EV4{QSk^KVg} z-OG$9#7iXR{=$U|CMLldn1I4H!Z(cA;xAWE#5s}^Y<%xxk`Y#cK;S&3CaAC`D-u0L zQVj&vAsc^(d=+y`lEU@d%eP94{GmN8WtnZx2-vMMukwY@7$9h?$unE$C^Js9kS}c{ z6a?#$ic^An#R!{IrIPoF1ixm5su1dqIJlW~osudnZS%JDp#D9u2?|~=;TR-9f$xD0 z;%C%HghHf34`5@&hCrPt90<-KIG@}oJATfW;79zT%wnlcMA#_h6NOh)r27>QC=m5) ziJC>tr?9UOY1H+UY!BzrB}9XvyR$=FhNTld_tu_O!sDlL)V1(@1!4x~=rgTy^zl85 zHJNNN0#ur8n9wscsd7H@5}BiucZ-V@Yg{s0`g>1%z(C=I%9=tXtV`$*#3~_9xs?bG z;d5ajWIy&j#7cr2P^NPQ0OMd`@7r(u#&4Jx9DfExQY`h#!+Tqy7R!v}3XyK@7B!GvrAaWtoP%Jt{ zTWecb$_k`Xh?!*vr>ZEaVGYt#+BFBHgN4#!T!ON#eEn%romF71qN>wO)%Cz8r-trO6Z2u&J6G{N@l4FWCbWY+-fg&m1H|ir^PsN)D0G!liABq9 z5S^-crwtJ_W3zLm-o9SDcUGYobMiAmiubemzbF)_!0ZYFSie4_^Fw6*CEX&`m5OG#5zk^)?8bPC7 z*SxNBWv18mt_O;OB1ynbgm3~(uP#!tcMBCDfyfcD00^dAd>M#fZ~^Y)yLEtt$wo|@ zB;U}`(55-J*kg7`Tw(3k$-<%p&4r*m&)Y(qM9h3&Vg*r8UT+eF8aW9u_af%`9b0?2 z%KG|hXp$rCzGJ=Hz9qp5A31Tb4z+zkIz+QX9unoWvb2r%%$Jx^gzMspd$j3edvLTKvAk4 zfW^#<3Gu;2F7|*0ZUIQ(Dgc4SwD5)_rVR`X7=e|l0}5hU8s;$UEm+k3gRP*Y@OFL5IE@cil4(oDI z7r}fkHX|Y&gU}Gb!s-f$Rq?#V$^>}*n#X)jM7Jp793vD$VpS3t#qLd>g9uIUP+6Ti zzI1v(W#2hR8-TE8RIl`h<6T-&PZOQi5yJ!mCtCJ!E#_&%F;MLFh)MvXU}z9{%8mOB z92o(rTjCkZ_h(C3;V3mt`_lt^0*X>ihXAl-G3NpijI}~h#4Abo2aaord-M3-9uJdJ zs@Fu2CdcnUc$8ld+s$9mAdb^1;GUD%#2kv`%7NY+)$8VyV`P0fasFBm^#yRusyJEZ@{?(pYVF>si$nIo1vkh@}g6s zZ`!0}pAiwi>^@nxG_p2|p!jl-ZHPmnd^SdFHves+k~k9tm=Pd)1*ZFt^@NLO^Z`nV zL~*exUM1%N;~-pWpj!(ni~3cLahiC>T^y@@)E+JOtdF^y6pp#yC^EXrOrLD zZ=3=J97UU{s+eK4x`_Ay>mFt?{tVuu;sa6T_(}-(x6Ko*YA43P? zlALDZ6|KrMm}?6|$NR%^nR<5;+h`kk0OM&*6(wr+D$CpGStK`ET~<{U4*?iReD$bV zKaR<7ai3N^?~y)HNqeHZ)4zM!1N#OF6l^@$aEV7BebiQ{b1PQ04+!yeC|u*A2QDg3 zD{l)Mj4^Fh(?*#sx2(~egf7x4F^#oh`=u}s$il>QPn6$k5*(vbqRquKCnRb$gv(m! zU`FCpgY*bYw~=sGd5KJ^#SDB};^0Mn1^~;KS=1^oBF(vnJy0EW?6M(PoiM@j%`%oS zWFg;j4!dfq>Ci~qDvQl2jsCWVs}03I9!Dlwz8{~<2`TP;@rb4W`IK=z`k_>t)AQB$z@CBv{{&ubJ|aRq6)Su2#hX%b zPSrPm|L=h7pq%$~DwJNLS-*6f87+Ck$6jXTz{|_~Z5A?t0h4iymgULGvXpDj+UEee zg9_#3LTlY@$Tt?U4sHg93LFSVb+CZDz_5ssSJnnGl~4mH33L$1)wQM^9Uu_CWC-*pX9U zZp9&onf0;99%2k3#apdhG^P>r{}?QJ$ehxAwpt z;uC8!5%T@S6HnN}0w~iDKKS6S78$r(J1u>#ySoSO0L6_PH^Sx1m(4?sx%K5Qf4PZF zcXv0WPnY(aD&?kM5BkxI?GFCxN4mdDKUFF7-X~{?f$L2*yN{`BFC^}zoFa7A%WfVtR}00000 LNkvXXu0mjfnX?c{ literal 0 HcmV?d00001 diff --git a/images/.ipynb_checkpoints/G2G_logo_new-checkpoint.png b/images/.ipynb_checkpoints/G2G_logo_new-checkpoint.png new file mode 100644 index 0000000000000000000000000000000000000000..f195544c695687ce1caf76037a148f81e76d796b GIT binary patch literal 74287 zcmce;V{oKx*Djn)oQbW8ZQHhO+qP|M(s3rXHL*Rhjfw4~_uTJW`>XHAe)jw4p50a5 zU3Hz+SFc*fI+o6LUQvqj67aA%upl5H@KTbZ${-+MY#<2Ig|4 zDe%r$)9`ahwwO!yYN*G_L`RtN-xraW7ngqG?ofv>SkGBUPi88BUjNsh^ygkm^ig6# z6d9t$1}ibbiuVPAKte+PS^GzVr9aYvfb{1-DA>UxN(SYDg5Z_^S^K}PtP3H4Ai;^w zIRjZt%5mmX*CdasC8dplHvu5XelTg;!&WXW;1-yIrbqU@GOzB~i;9^?iWJs0&D4gd z7c)(wG3E^JM@P_>bB+z-_c@Vp|1HVN|6h{GAOSECPG$K2L6ZM0ea;`v62cAs`UFk| z)st3;A-SG<2_q4r8rR9cSR50T)V5!3xNXi@XrG2WQ&6Ue$#Yv&W?2NO3Sjo1kSb8C zGLhSH&x9^pTUMTw)O$&MWwL_vc(zf)9d7TFmd9zg+^9jyGdBMN;~!1^yOIMoVFL-_ z!XgB^foI}19p7=?mV#@HkF8Hskpn(puu&TJHE_)MLDu zp4dVA^f9A!%28}E=&+j?fT4!WA$nZ78j(<8W$7k)Vd7=`=ZUrns9Ho{&O6>?<2*8$ z&z!|W3bPbPS7Rw}Hxss`N15TD^B}%D_U8)f$(OQP=(m*5h>24C1&DC?fUzLYZ%>H2 zeNWpxJLibo{JpCLJpn@O2Oq~Cj*l@4wY%p8l56cN$T4G@*vPgp{zj%nD_Tg(Ikviz zwg{Zx6JijFrlXN8#I=jqJ6P?|;OGP=vsrutli4f=^}wTCgfMs098Pu!m`)WJ^J#Gg z>W|15jws6`sFM>dN@=?DktL`-=T{;*^xFevCBV*Kmq z{zr?&;mgB-{Uib~z1<-i69Fq{A|XB*1iFG683cZUTJ$`}B3T$3K0n~rmjuS4*Gn2C zra(;}9ShMuT9}xIHD)O(7?=ucFU(8^u1xlYHE9SYN+3;R9AgThU<$KGfBWsJLvEg0 z4d%$$;b4dchaz~J)nqA!`9f*+%YJYeMD!en(47E;9xQSdKkzu-DW@;cPe}mBaIjrE zFVkmP35026eXm3RTY|jw1eq>7bCRzE_o+G@WYxI1EtRyXj&V~u5eg$C;~$Ov>ng+1 zI-5~s;FM_hd%ml6aD3cvRoFfMidfsfs<7P?+z9-Xpk>}S+81Syj`&*73Fj3C_LapL z6dU*JuWXj$MMK)Mi1EU~DS-nrHt5G51LzD#LVykp19eQ5je=j}qM+Oovfhz)>YVPX9hy?ma{eC})d-5yem zmmFCT=!Z8l@QLQwJ$^iS{NV;z{d8;^x)mrk>S=*lnGJRwwTZAcU0-n1XDp1{2lnkP z8+q~FOrjc*!Y=UT4s-hR0j$0d!S${q5Im=vxCYK5<~FK-)waqOZl(8xI!H+nD$wQz zm6@=ii&z@HhG!v)#|jR6N7p6XM-8um&XqF6>6=YBCK5jojwBn}pf>&offP-&{(&-@ z#*N6Nlg;z}zOwMfD~qY1?5}Umh9K*ZMQjLQ|MmI{He-yfbaR9LJF$^ZBY{wc&j*M% z{rU1lkwZ5+_0@uf$;4s$bBIp?_(WhVw31)|{TOf%W+rJfr>W#HY$~MPFdW^)8G9)3 ztH5kjW;GGxhm$i20vDJb{_71{&d2exL#T`?QhWxIXqA!(f3+Q2>=$j2dkFf$DlU?J zLzeOMIYsB`)c{q*f*NJwIqWvQurBEb0tBZ8mIKG^$-zh&NNgmpYKjISN&HerCDd_g zZaniOe+(QasG?nc3-Lj0XOSs<}lR5$>3y9pk=gyt4U1xck zkKds3@^G3piQA0C+4Qs%@5MAu&;?B_UszYZ@a4tH8?y*(?g=p4mCAg}$*riK<{y!= z^^`G)Xwq%epgFPee0$&GbET>aZjsnD*1^Q69{hrAOt#QS)dyxNC_yn#ByNf9Vq*?U z&*eNh_}r9lOMaZfnMX#LGqxUFU={n~G#QAU#$<`)F9U-CfAbTTlYctuu*)p^2UpB2 z)`h;rwSrlFW_cLh-HfQHDEVJeUpGQ9BU+fu4F%-Tn58dp#OW5sm5JUOM+%y-dc7NJ z&khy`sy_>Xu9fcF`0b%}WM?W@E-42n#8ND-B7+ zwsQO4Ae#!aMLITqOHDyhoUBL3VXV|U%tVegApiUA=@qt%XU1yUyK_>a z!k&2Q8Z=bwDIL?~Bnj2sq`SC9aqz4u^Ep`4f>ha(hb`isJ^lNdY%U*VpLGhkG zg^*^Pt4Z1HR`K2e)t45uf?f97a8q38;a|faVnk34VLh_oGzkZRtu?ps4?IVslMt}TzP zS~0xkHV_`UnI*MsFlKL`-DwI;#j}pMbl;J=Dvi(jHguhv%pOyuZIrR1LQyk_ZXV97j91cbn5IJ?84%4rSzLG z;-fFYv@8(NF9kzcj60ahwa0vPpz%$E1asc+`hKeLU{~r((-mya(;*(etX@Y)?Aghxh~wQ%>Zay(`ww4o-CH5l9?(N8=V7@ zg$ONrjJ$X5ZHHz-J3n2#7qIzBuMXzEOV*|Sb!U!q7Px?FMH8Xb<_#i-_T&+AFl}) zzAue^BDUT4ZpKqZ5b*h3U||2)@tA*m z#4&n5H{4( zgNd#6o*)V$IUHeF7%dbQB=1NmmLh8++cH_jf;8TOMf+%c*Ao@Bx}As|JXBfa4u)Qk zIzj7a4I&cr;s^7v8J9#i+iv)cOGP?M$>eVKXp@jf2PMlOg3=sjv5!(H!zy;B8ji_F zTKtopltCM?44@Ke;7TnhQ7hWa@~)jPMYi1ZMq8(iurqF?Ws=1dY4qZ$FBGT+2x;et zG#WF-9Vrq7#H*X*%7%<8Su2k^$XZ40aH6edCFY+hGpWyU&YYK6Oo4}&wn z)iK?#fas#L;Bs9b;1i`Gi%#%Z@p5NDf)_#Yi%OmDR)PUt5Q~>ITW&2M z&W~eF463G}y}2?i(yu0z(;dXH)KQK=z;~ic6ly`}Caqd(4CG@;6`}v|dshk%KvYqI zJK>aUL+VHU8}4B09~FnrQwJA5t3nWHoro-ksKdP=GhE7!g`SFqURvQcycBKX-`3w9 zzhhq`Y~kXyU>JU3m%tbp`0k$<%k}4z011!<5jO|H^;`|Y@PihI3Nf1i1h8r!%glB8 z*s^8`F{z)kdieNwv(azKsO2IE%4ZYcnIIDPWaIQW6B{9k%jfO>>}LNqZ@n;c!% zIf9eCg_-jmhW0lI?iH~NbylaqH~qF$gltRG!q8iMIoOHzC%;a95J7*eT0FMS=jIB_ zCkmZf_9{`|I}a2b6uJHEJ}>mD6I;>gYbP_6=9jHJ4Mw$mvsI?WN>j+=kDZP-7}EY+*EyXx^Zt1M;;Et$*Ch9^I2*Ztm=IPfpBhnN8pTPUZ0xz=)zR3OBc%v#eI8*R|REtdTJ60biv|`a( z#4+S!n*VT8Gm{~NU-wZHQRuYjm{mh-$dX&!x#1GNm}K?9UCT?krBM2sMK6+@g?N9` zvL;B^O0!)jrh&)>5-K0QA>`@1H&y62w@Z;l^-k*j-XP%h-Wfd9VCB^3wV%~En)Xv%9NP7CaRyx@VK7=xl zq9f3c@pcqL!pz2kY8qChEF0#l7%-AjzEaqY3vtV*(CQys3ZD61gIF_Op8k^n2yHQu1%8 z=F|vk-R!ftg9szZQCbi3;OXF_;Hq$NrOeS2V%hPViGGU~nP@AzC&{5_gBPdw$re)~ zjH%V_CktBBGUGJu^%ae`z`$M-Jr_UCn}a?acyRxrLg+;XD}iA*99=jCQ{DlXI1PQ6 zKNYk^y;SkkrGl6j*iQO2Ep-3$i6}o3Oz4@-=}GY_@EfCD#7J0bHrihQQB}LB{iyr+ z(ATf=jl7vFxv1upko_3-n@c#V$VKNJEAR~y=jtj>^kD*T*=e8HTxFjF=tQ9ef5ou% zzW%AL!;jxbDa!(o7Jxt-NFhU`wN2lx-8t+{a6n~Erd!o~gXFotejVz0q|Y zTxW)O8;}XnZ^P(2a9w!m3Ms}}e)2OW&s7G;a~M$JYrU#XkT}ZdYNG^#QrVpqc<}RW zB6|H9^C3Bccfv%m9Tm$n2XDx9Jk8s=ffwA9p?W9d8k9;JU#{-T>J|Kj2WI7S=);l* zl_gd*W}>hpu{u%H8z3gp#H1208%4lNP*Q<~TBi0d>BeeSDmfJrVu7+yxGeqdKfEyn z3hInATI@rf*X83+y^AmkM>GtHTSUA$@kRXYvg>O7mr#dn3=>@B}tg=7>+ zZB}FEzf72y3CYmihVkYdl8M|8yDbW{P>X-payvZ4t=Zypp25U$$WKUBa*lqg@9D-} z`SHDNnq2CtFBNcC8-@|)a`_i(_5*;>AHRbnMk5MF=~P04xbvYf2oW0%8U&5S0T3wZ z>Ho!9SuiOp11fiaI8%FP5o~(V2zX#fsC+=S7K*Q!4{(%Vy0+k(l{A4;qJ$wYqQNXe z#ub)k*_MM@2f;}WJko*-T&SitvyR9K9AKmU^yT%+2$CHj-`Xjs9m~)kRi5WV4NLd@ zseP8=7N&M}G4QIUqSM1cxf}}rMj4P!(SPvmF63dp`8;IrS15v?hG& zXkSCL$3T7!ase}~+ScV#ZQd;gnwZgtElt_w@c=1$j_OLflbB{%DLViFH&J{x%Jz7w84;WA*8KyrjVy$9_Vt*5RR)D-09I7x){asJu z?0`8z;4%S$@?-IrP%VggvowgEf=q8YHJ7^CxpZOL8s;NKVUt%Xxr7bzm+lB=`STu~ zzL)2lt#-7DFqzFq5JZMRI=xM#{I)Qv9<1jre(nhNo~|DGm-Stjal}nz%vwbU0nhqo z;!`|l$`+W{rsXY6p2Qcw4jqLTk+Z97_qo>%VZN{~)FaSDjh+$?tDaqCQ(5$Rkg2W+ zK_jITn~9_qOHsZ-RJn>KcP4Q!prsdCEYbAM#J1+b{(a*V1%!Cx8TPj-P7D)I^%Rkr zF~S~7O4c|7P_|Q7*TPBnR>1AYR)Reg{G!GI@9Ld-O09$hrpJkwV(^eRdYqu5;&jGL zgoUTOP2S_EH!`QkUU{7WHwG~bF^l`C&?J^-NUAgZh)7r=6m9OVH(NoL?r6VhRMHUIni!2Os65D0W0S5!?LE{66w;i5QN3RKwh z>wveBY;1vhyaD`imrNopFpkleHIFUYBzNY}3M;+WkHKfz?`*mTU?b$9JQkNesWG7o1l#nJ4S=xU6NAKx5ZGW8#(k<=e);K?pRGX-@cCJ*mite~e-ks;muqLq{nm5mC z;n!EaI!P@dUDz;yd7!Z|oJ;@Nm-l2rWc?8!Uiy6%^M67#hvQfJ|DFZ-KXGu_{@b6L z6oe2?8@RA^9fkqw;uk)lZMzyK-iHo?}hQZ-$)n1@db^Lq61a70Tzu}t}^ zVbieV$A{tMDpabx5C~6Xurdhc%jQ5&aZtFDLo}5AG%D#F6E6B3QYm&2y2L|jJn?ljCd`>L zmS>}Aod)#@@m_e@{TR}W{!d#Q@-yt)btR-Oh=M~#R?Q}{Q?UV2B*&PhkEgaUTp@9@ z;PofFHZ4;xFUevOH_I>O?LYZ+mzr<4@Fvw<9ne2U%P{m+!%7>$gxhB|_Gl}yHc{hB z#t@i9ZojW``LOvIQzY=Gz_6JWy<#JX2R1ad%*C-Eyk+_ zq6g%wZ!g8o#XK~HH*G~}j_xd!p}1My#77{_b6b)s*lwvZ7WNg>t&3QEYKTzm3az%U z4hJ0129Ah$Ron?WAKBB>rrDNiORGjKzIfKm6@%=xH>+C*gQCGk81kDJe)XFE2gh3q z7aR)39)2#!J@kE1n@rVt*pdzlx2^RZQMZt+7S86KAKJMCCt}9S{IuzD$CCaC`vG=5 z*-?K|{x1w7v_qZ28f<=|jN@eGI5LGmYdvgBjj;oUX~Z(bW0)=_YDQUVuR}rJyYrk8 z2*GHyjPa}j?d$x=RoN(H(mH{BD<MQ(4{n6>Ye-fFTg z7qRFay#@sMeslyBhpD3tA^rte7XW?M1Am&qBpG4-gtk1kXKw<_h9mWQ(lQ)tr0!aX zixebbp}D6jD0`Yo=YrQGz-^9z;iX{IHCNcDbKpX^Q|Qt%Ll-&2Xp;LCOAtlfH>|w{ zFqVeS9lAwJ!8k=LTk=jMpZ?!90`ROnoR{5IJnmXStt9&m@~}T_bGK_pY8A2v7M%y= zq$B;_ef1N1TDH6F0`thFAfwZfch?HC--^CiU|vG0ftfKINAmB?M1Lq)LDAilz`-l* zbmGTnD;N_)&HV)SA>#0^X22bmag0uoDf6_v53BsbYMf3|_!`G2*5K0(j*yv#gc&w+ z)P{=lT%sHbOA?DVnm7{$A+`gP!S%Ev&VxIeh0IF3O5q?YObbeyulw~U90Wt=;7GzS z?0o@hS&~#pJV|=>V4{u8cX_yEB)Uan3SauDSF)Q@oBO~Df8=2ckUGWe8e>u^S}|G@ z1>c6*d0pMv(}Z@Z5P~jbmJ~(}V~iLEJjy<#)oJ&AdmKqzdDW?&a(-m(_VpO$$cS9L^U=xK^&yB$U~t- zXyB*Wi2l-uG3V{ant6_}qbejXist*1<8AI}*UYNcVJhrVVM|Hg|nrZs8j6HRylU}awt!T@3ivL?7}2e1r04z zFR}}mY(s)|B0Q!iMJ?dY!4ONgSAs1el&-8-_~Koz?$kd3WiKY)r(VYn50R#Zq>mAh zlQKHJU1Z1e89R5CUipI?7Sk-U-w2Bkvv~9utc9s1CTjX(m9%n5wCGFUPa!gW?GXCz zZ0gwa{UN5Q4WAUp#OM&1Au)es1?3^gk|w0kJ>z=~MfXk~sM=_r_emv@ueIx8ORQ6W zgU$cBvv>gL`(p=4q8G(KtU%T7`$40CK!!nNqEJdICzL_5w_#7EwqOPOGfz81x~1jHA~s}2va|lN|47Q#w!B- z?RR};nHKeuUjx6UV&ZX|0Iw3v*Zf@EbYE%MtlFQCpO#C2*Y|Dr0+90+bUVk`Id>q` zJu~iOI@>0(Hc@~AEZ<>OW6mkZ)z>1Nn3?NJe!#_#Qp?k}@20rhr8c3c&))0W)D5#n zh5+zK*LD_~S3i^pA1D4FQ;j1-(LYRfU*dL*`QsW3eON^@Pynj*57tmi*PDGr`z6Q? zN5T!<8&J|~)2;-yZ$%129~p=bg-{JZ)LtO(;!Jg>HE9`sHxMVS20&ymmo%1ZO`nUe zM?r<7WY{DgpoT7&<~33fUv&-gR;%-H@jzCq``EJvyX`ll5)l8uq$^2(tA=-(v{+sD zoC!1Y{%bKO4fX+(Y%ll<>2;f!c%7-xN!2K(-Ct()%?xS?Qp9cJ+!vSv`rb2F0Z-)n zXAbkR?=qdCIB8j!CQX{cpp%fJYfwkBVLad}ssd@&o#~T+2zhfBA=UD-l$AZ6( z4z?x^W>zSNpFJ)1^6s>k^k>c+R{gZ$bZ#JXx1s7RB^Gr{%t@LFD(}Ad2S_E0c*$@R z%rI3RrqMaJGkX8lPD5X&X_VO(j(FVtw2H(%Ulc_uII|^P8j<2mYw}73`|@|unIl19m75_c=L62?(wjTSU@OPh$HC?lvsJ>g zkM&c#Zf2{)3&>yN(hA;_TCUHTnMwH06ZQ0uq*5$$$_tia)bb-VuB`U=?-&Ya4c#3L z$`;)2zO%>^7zY*YsU~E?))0&)2uAt3KbznAb9hBAt$Kvao9%Rd23@}#rX@)9dPF^* zPCFW7)oL-N#%)0?{%Yq52}K+CNyhuMB`zYjqTo)-7&0$fQbY67d2aENwiL+WSvD^6X(<`29QBUs8- z%}r=BG9RA?GHWqSe3UpzqQ=rbD_$(CLHE}3xT|uH256J}A2oRb`$8BO*tFS^lLg>! zy0*$*jcN>Qmu)hK9gF#wsLWBo107YJ-EM)Ecq;wGjN$PkR{2T9r34Gd?`mNh{G&uSA?`CF&*0dNIOC@2z~GuapgN4;;6oz2~c zGj*_hDor7}yIK&egEv^2Ho=Mzr+{fMQ`62`ueGV&$pOeRnc}sKj_k9mCslv#viQr^ zwJ88X=&cuPw}nRZG?8~b_p1K_>am(-04XVgvJ9<&2d%~ zMXr7Xu=6j&W1*k8sbe2nj$`UyUm6Lg0~G0iilRoJpYFq9n3^A&P(EYTAEeh>7@(Hs zi+)Ddonk~zoxK#t{!Ya6SX7Hl?Z5X5rg>8=Dsc6gni|4RTPk@(51{^iN*@4b{=qzq zL%`N5EV#+@VfGUkkRB1NtTKnwAypqS`SgQ_xI25^aAj)xAPSn`Y7--1IEHbSA;5gv z2=T(Y0@KALuw~P{LIAgJPrTtL+}JDr?<|NW2R-?9EI4J+F)_MM*9G@%! zg$L*Q1M%2>(zZLq%1pQ#v&L&2d0)4wL3Q7b(n@@ONTx zGxk-SE1}x<_fG0{S@Y`64lv541@n6M6RRXLcy}Kk+09IO?UgGhv0}cu)bH3D@a-My=Fv4q;M}z+=vf!>V4{-@hO>7H%x^?WlvoVUHC@ zcqky7Q}@3S+la1S-;fE0QyA%)hls~P%ccE}*xz)+K_sQg@^-aK|QUPl9krc{R zNduTAL%4{1!gc7Mw2enq z2wrIfQ4t7S)}_qm3>K8r^_;Qu6i3}W(Lw(8qhuc8tNipw&-*GxT9Uoj;*~%Rg95q{ z7qP_c&FXWbk?Q>cs%DcFT7A=(mg}o~42-e+nMU>3%Acq%EmsV#1hi{cW{GiMnK=kj zzgdk!b3twxLHFmF)!Fa}nR|q6_*`+b_JvVYiyvWZOrT=P%kPELJHn2} zBCZ^JaEyIBKJ4MH`GWkt?-f@ohpL3*FL>~C01x7y?E!JAul$gaA)i2jWAhx==fG&E zj^oO1CR&7H`N5{cUl5Um7&(F zAeRlE1Ng|wA03&GS3fP|$%N(am6QaB45u)W*a>w|J=(uG-GP+TLMOMo5$`$Mibo4M zlJ{XQAb?ka`eCfZqp9O5F~HZIIkb@T-P?3dXEw4;x*x|B+8RETDcnKlN7zQNE9Iss z(S6oLvkI+#5qWL1Pk|!o$V?iK?aJXxCUXrm3;aeixY>;-cr$F4TT#I)D*4SiXGNlj z7uyP3lB(~DY-Zg0pzcyeEN@3(_?AnS+y}g;1}69~(Z80zTKhNNu}0^$5PRw?sQ@51 zF`MKreppC))IwJ)5bSCl=G474z*^S@r>w}Pr@z=wyP~+mq6RkAJ!5vKq{l6u=@mTX zUCn;sSo)HuaH2H51M-7v>u2Bm+XKu6U08ZHeGhG4#saKkF|*!QLUd`x#gwF()}?m3 z$bIT9C4Ehom~JCK+0-sMx@Bw~1B9L3*=DYT%bRr~ViQocWThp>XJRzVYk6IKz!}TB zk50w=8ZJImPyL!Q!o)Vy)tl6gx&~f`Xlt$sd(Rus*liGV?e#!njWlFeL=u7xEPmtQ z`K{!nUGHh>X(>KiwUu3;%Xd^~u#4<5CNPPX-rwe4n8(3xj>VoMSWEsyW%rHW{Tdw5 z!XtjHpAoS4-LEW7+5gXBhJQOtKHuzw{U@aEvQ^$dva=t37>tc;tJa}0L}jjU0kN4a z&5Crw>dwhJwf%Bl-WoaL#5x@WO-51{Aq*Ti@D6nI3}zr$L1Ho#A~2++nRePlV9Z&m zN^6fHPKy{YF(PEp)D9f*=wIf1O;;0DP~VRf6OT^6u(K$8Q(Z2)rr2`)wiQ0vofHUooFDygC8$K%7T`~PcC&Nq(|M2a z-|Bx`WNpk+8{M>=hhSm`J;C^cPm%M*T+p|@8v<)oQ=tt3ema4Gib*lbpe=>P#uW?j;^tm*p()G#zlSz{D=e_c#w3IHnCF0sb^(=M`j ziYbFdu@k^7;5z~ikrsN~$<+_F)^=D&q1R}A;cyu#zimW`->PPHDx;C_*q8|NFs}0P z3UxZ}WcoGwjHwqct(t8E@u@y11cf|c3*irnlQj2G(aogh%v0e90fgaf(7U0bb6Qk7 zM^hp~Wki`qi>XOun}@ZLKMVI9SJ%%8`kTijq~wj5@d`_&>R1&Lqe@Ihxb?KzlT}_L zBwBNO=CXgh{DngH0MPwU`$`@ za$8l6ZhPfxKp9r-Fw;jCn)E~&ow`J>%FoJ7}qeJ0@*&rl?98P|an z$052goV%Ok(3)Ue3wf@0cV~$8`vWfOcSrL*t|A|W%Oxc*t47;&(wxmrEryo9Y1B+M zieNOujIs0ro=b?J3vIXj)lPdE<#6}RqpZ8h&z5suS7z2_f&jU}Fn1xM?_21?&k%ou z!T*i%#cXI;G^o7zqQ2v%l_Ovd`s^foc>oZ@c~e3WrrX9^lI1#=?x& z2Dp2eWWd+WSNl=fcW(UFO{R?^(6J)qf-b7-X{Ru5R}mf?`}KwB$CcM7oZ@2O0b%4!plc~bR=H1kTLRk&_NM0*^TTz-aP2S8-r zR>dWo5dl?MP+oL!9cGL0xEv)%BJ@<*CuF|lK`LB~2KRd-zh);{%e@>hUW0IfCiA)6mh6o$ zSJqFbCt6jzDAgC))Z9)hpUcf#`y3@yc#-Y;ib3{W5?~}LVOK-VU#BTflH{fb5BSRG|?VOIpf%2A;8iuJ-q{Jh(39rn|-3cM23SbWbt*L47Y<;Q7&yoPi ziSk|@1n%HnvkmM=xkM2X9rma%TH8>wDf%J=Ve_sP4ZHNS*fLRzUj53}#VREkV)&5; zX`%(y0UK=`%6GB5J#=rnspqN)z2APIDMdI>;GR*;w(Ox5_rBhZlMBNkh8M6}`Eg~^ z8o*+Vxy|&c^oGdMO-_eLChs^C`HXmuzS4nGQ=@*HHEgt~2tG}+`hnzZA6Mv^C*YcG zfy92NYHDDR=)u=2#AbpnE>}E9nVtFewt}*g%+dWSkeg04>@hOJ2-oxfH1jlcdJv1-A|j__fV>{ zT>?LQT0ZC3UCwIL!Q|V!dYN~7p{uE{8K85+$l_^X2oSZ4-_KOio>gvWr1Gpf!>BKp zk#?u5sRKCf(!`Xk4K~jSd6CXjJySpuG9zl`;uzKNN!hR~5 z>KR3JgJ57irlJA}wV9M}n}Kk-hZMequw{#@fI7>J~&%Z(v}dU`Di5~yv2-AT*XuHmeQOcAY9SqCBX(X zO%Ln^W+|6?Jdm}fXUbQbnY*_R&2F%CNE&Cu*jc`RfQKCz!jPF|sm%gw_)AbnMRr^T&|mQ4++yTB@g+;twDz^2@(+?4dGG zkmrBT0{nkx?T0t9t61T{jXXJmR??*%FbXKPfn;4TkimOWNF|z9CS+(H8mC6b-=`2% zyVy#pNLP*6(bjx@fiKq<$b^&Ymwd~uPaf^L29d08zle6;Bl|~f-oQd8slJPKy&fCU zP1uEr(Qv;U+uVWQ9oRF7Z+xDyEfZw;)i|VA>b5@8_$upuubw>hx=lVClI?BKqPX4K zW*Q$k+~Zz8^CzJ&iV}P+UKD^iRhM*eBr9_6uZAWUgJ2$m2;)xvvQ?ATAtWbn3Ic=4 zO5JzuV?Vm+N27n{&DCg}j6i|O${XeW11R$0K3Q`jib*Mr20rOT?kZ}cqqKKe_ zzsR9llVEJ~ENj#=22yY|=<(c?RJ`9`0Ram(c1AHLru!iKhb>QVcaDF!B*9qKW3w>OT79oNX zNXreS87@h7K{%SD3K9vM>$9j)-!b7I-{|5w&|)Uhx&PB_A?jNQ<6k8f7JWP`*dm z^fdPX6M!w>iQC6frf0N)dwJk&SUeRe{A{-~=`Q>VpM!nVtP}(Gbh5}|>m94oY@|H! zy}*<8S50FvT`*IslIOY~cX?NUQi#Wk>-m#MFhhXE2cg35?J4)t45o1mEt*6#$!1%& zbk%J{IpfxgPDVRYS0@@PdD!d9DvCjh;f~x6UE1r@xTIZ2$!R zF@+(3Kq|FRiS&ZKY-A89Okb;!={_z=nz$G%Tt0{xVHzntba|`B{{D2qb!<#01BifyJ29i4rTNvztDW8-AAx;y!TQqKY&Is?9M8$ zXL=+^KtIJ=wORNmX_l7dZ44hJ9;BR9r!1%yQgPw~_KU;(O=`y4s(hy>XJE1J&+l$d zUK+$YB5TeCiB5a6v1XA_2_(rY4TFV4zjBj@nu6m9Mv?L%^McUV@#oMcajO`;e+X%|WU z97l|6Bp0tBQ(0^IA)@h|sjx*aMouN96q0y&;&_5>rmq7^@{{qitk?bhDk#6t6f8z2 zq@JA~1=t`@=Lmpij(aH6q>jDhh!E(c%3y_HQm!C8yCsy$ilp2J6EoB{rM&ks%yvke%rp2)yZ$K|P z1>(GdpPvs&4`R6}#z&7+IIO>u9+iuav$A@!+BZsx*_fgaPTj8?_yco5o>->8F&4c6 znEF$#qlKl&2iPtoM-!^Mnml;vMk?(5oGJX7+%b!y7l}w1*#Z$qhBnVyG&q#wB0|4X zvb88DbM%9SLuJi3=dSgok&B7`Ng;vWfr93xH+tY?0HKPuxEmd6pq`=8M zyDrXu^wjtpu>-fNak>!l=>Xs7)#VqE0YhMWx1=^Wt;*qwo=QILgOlZNlKwKdCmI8v!%y62s*WBUwiBh9G|-H5c$D>;q zddZp?Avsqf`i*SwTAu(f55@Z{cGTQnCJxkWwh@gDG{VWisEm2C0(_e`yj(i06(ha< z*Ml*?z#ab?&j>U)UO10O-l|E8-0wAF;cYm?kM35;z&x#;~EVeY98Ae{Ev z+o+9}MZdNi7Wo#1PRfl8$1?P3;~d4N2$nZ?zwu*wbXrJ9fZF5fYiy%VF1SXEp`7W( zn_lOSa;yDvx2nRM>obm8WHPVMq6+Qnt{N6Rm;o)I~}IoX%b z>R$;1P$jr=;lIfKoJ-UX(T5F?C8&g8(7RM}UYV~x!dTl-9zCnX{XDwe)3y4(B;|(c zquRKt{CfW^`Wu|#O`fMfuC@9-k3@x+?5!8ZW*{d^+c^6-t0uDo3wFYrKmG~^V zbGcZaVDK+O?Nns68d_o^7xSE){wbbg1U?s?#txRNnfJ~+B(7r*V5Wc}tyBHe2L-wW zQ?_gnpD7!$e*$E~)-@yEFQl!ew8`JD#^5A4bTt=zbA(^4Mv_H^@pl!K?pcM*T9;cK zb1cgM^u<3_AXw2o0%8inr~QO>ZIZqbiIrl#67Vi&;x~(IuAtUwgX}vo2A4hR$q&h{ z!KFBlLElQ{EJpp@tCf>OfSDnAgt8ROk18CLO*746S!a7LS$Fjn6E@>Id=~#jx~1<5 zqC!V|Y*m$RKdyah`GsY0^>=YA2Z+*+O&I0_82=VR$U`)Kl3h2Suk>%Ige` zNJN~_o9){pD$!%RHJ zmiC=WF}ZG5JUc6I|BBPipOQ7jRl{2^CDS(pgpKOBmD&~9T&S`KTC>3I|AogI{0WTm z{eJ>uDO)Ora@{S>7a0!zU`Rhc%;9CG4)Qi<6F8-!D5jeTB$ObGdL%gdqZ}S9)K>1w z?~mr(lN;C=cZWFwy?Hm^SkB^W;wB{+xb;!$o09fua8;&#u_=zQR72ag&5vSxnW?gA zG|4gabW8LKQE{6zv0j@bzaATgYtA^bLTsuv1cuw%1(#{ESb%|W?-QNm_=4OH7kZh1 zU1jAwN`d=UtAdyxBn8_yg18(eJ{O$GxS}WNw44cEC1Em>Dkpv`f51iNRTSFFwitpBuKYs4g1th4cwAvT60Hl0yUd(wO?4sDt8NGwIz%saAdPgG` zAbxf0&wO)sWRMnh z?s3WA@80D5)=FK59pNsDmeSFTmuAP3C0!&JgkUFDobl&WZXr$ovL=Mis4I)C`FkH( zk(DwLdzhF`7L>Yis}Na$oSTaD--xRR0J%R74=e1-GFsT* z6c*B0(Ogoj*%8x7^ke$e0NM_A(0RQ9PKKB`l7p7*Od_3c0RpZh}9wZ@tlbIiGNF5?(39Jr5O z(=Y^iCX6}4sv*yJMT$V5kOmF)3Nu&5*a)bu76cLGG>F4OWZ?*QTQC>g4O_04xfw>kW|>T zArvWa#=3HYV52NL)UX-WJQ^sRj#U|jFtH3msyf1K3G>%Cz?V~}3gvo>3|?TD1nai8 z3z`;3NH%L;V5+AyMYCE_23@r~WHXdOesi*@rjn80 zGWMh1EQk+Yezi%)N0yugP{G>8B?b>K?aY;nDGqXzW#bo&m?V5_cPFuvEpWM7i6bd< zoFPj^zBP45^kfgG+c?_Wf#p)=WWp*CJ9U~Mh+bNJ#$Go3K53H(_5009Iz4KEn<7_4 z+ul%LI?GSmT~~LU%6fbTr7rKTQ0|9^EyE*EJ%?OM&GX2G8tSbDuIY0cD?FPPuZ@t&HI^=W1g2*RGf!SNpCa7zOVqJ*T$uI7s%i!6u?fg@$*uNe>*u2$x-eR5vDLj%$d ziAmlpVSq{|SqExGR69trbs|M^9>;2imvAhNqa|fbY+4-cXY=p+fo+{an#*)GBX7)+ zN>SS0xOD|ujj-{39IQ6_+k>;QK^GCQW;9`fVq)wG<}j_3X!O;+5z3w;FDajL*3T9B>Bv;W=mTQ%1xiA0ifqBs3z}W#)jqF)@Oz zhpy%C7YUb~$B@ah3c<-wk!61wfxObt9VY+Y;8*Zuf!wa+^Oij1<>yvickl;>VhCZc zRU{iQj$m0Tz68?Xp-f4Zi-*+N{$YdlFisB~+z8z|p9?~acFPplgz z#600PmfV%^yzVYTAbwtDB}G3uP3fZsQK(wRb}7-%(@*yG z^j3K7j+rQ15=j5Uju4&L*s{yPI{kPRJFkqnsoRm^kxP|8z;%P{Y;cm0L9_)!C`w_4 z-i`?wV;Fv~*Pmx$SiPv+EI5*eNrt@EZhBBj0zKgx7o^S|Jh~P~sO3vh)VOVCD|T4- zS!9SY@_%eq`tP!g|N777cX<)$8-?I#BH>1Xg3v@**f9}G2*O5%AtiBzx4JSk-e5M~ zpxUxDg3!=kWAi(E17MHf%d3-|=x!G-Rasl(_GXW!Y`eak$M831n~`{Cq+h zG4eF5VpliAGY@LVON8-rpT1!i$`%UlHT{R=GXj(g6v%+$-7N9ms8&EHn1b{Q#rY|g zbIUaM!_Up3dzB-l5l)Q-)v-^UiE?w60?l2G!zdjfBSn;lnv;?-j>Q4nFj!0As&b<2 zQZ<)TUSh&DvizFH#2*t@s$QVmZyGF&Iq|7J2@MlLqanU9}$aI!-7VINu~R(?2Ks-or+Hzx=nrS#&SYjtC( z;{oI}S)~~=W0}`M=$)i)dD+>txO4+_J%xo?Pv>m&)c*<#_EJRsg1|kITa;y8V$H_2 z?SbnLtYZU7dgURizw?Uwbaw@-*_*O;f&axAQc;#(L(N5hx31zt;TloPab zvp|&hy4%!9(DVa0O57hYYH+~3NL<}RD2^r?!0<`<6kwF-NASM&kv+z~5bF?=_Att& z752qkJ~oB?44UAcc~|H-d^cMrqlwNUHDN|8B|EK}Go1`jGdA9c?vahh3-8}s(~!{ogYJz*Sk^&&<4Y%_?-rAvnq0SR_11M2%|2J$4%Ern z&B)050T?fO3ADlywt3r7XE)lWI!x zP!u~RpnyYc<`EMI0zECN7+&L4rcVP!$*GWzF=7u5u8e{@S206G4e7Hfn}wH0nRfq~KxqHM^)PQCze65%$Kdtgj=b&_>i_6wRCRdGv=*YEKM@PNkaqm=s_+@-bF)_V+?D{UG&^vCTw zEtR1KDo7dLzzo0Dl#e3qna@RMB*xX)ebi>~ws_FT)TY&q(oei;oBA1cB6o+Vl}>LvkPJ&vA?KzRy?Hr+IXJHXf}1i zIsO}1yv@I|4M7ZlGZ$L=OkK6}bWjfnlDIki27E-2kC?*k1yU$3S0pNR!=UUG0$&2Zg>DVzoo;&nsW&;+xAUCueH#%#cTxcMn1B%;OgP@2+mdAkM zc%?n^cMx$wa1Q_z=o-6_dvqUlCb=*6`{+D1t%9W1aOL zksZz~$kC0!K2~4AkJHYSznYU!4)hQz-}^l5e##k)(x!okS+{Dj)v5%)?kW5FpwU}d z{Lq^6Z^SSx036v-jENzG5ZQUa9@&t>RK z@M2h=UU?b%><+&^kdYGA$WRV(R0rRg00&~0{U$S?xfim>Y9$7Xqc{RJao3A{WCmgM zG$Q*{y*CN;&AtAnPmLFQU`@j`kSnnG*?YCot@@%wXFpSb&ooiIox$AOw(Pa`B{!1( zcXY-a{FAlYjxMqQkrk|3r(n7))HMRB31qXWj6;8*tejDRkc1R6LMBZ{NeF#5b0k84 z%6&`M7OXm{ndY#|z0Q-L#Xo*${Jdyb7|}l7B|SBLOJ#whyexb{V}F=Op60C+?uiaf zKgxRNSRdi2%@)S&s@+uh6lqffmHO z7w+gHRr<=}?Vm8ByOjlm*@xXIyUoC>;hH;`H)Qw6Yfx4$A(~YY6Ik#j2)FloE9b@9 z@E2liIQF>bI8G?Nq=ILC`9`R9h{M1#w0ZRLdOCaZAZ$@R<|FCrsxkLI=dsSK=5snhLqpH$rM~| zkahTTmL#j^#>BVV!jehV3N|1>OEv|f!9m z@OffUK{9nhZK4HaE5fPdMTae;YmxEsc<+63QH4b-aPDfnq9TZZ;bEu5?kr1B9uNF2 z7cSsV$r@LXg7kv;Eihxz@>l-)X}ECH78*DaZe36h&DpJ>z>EdVa9K)X3R;t(g_;bM zrZ-ATl=iGA9*z`{CaT)(xW-MjS8zA2cLKnLDaW+xCX7jF`o3yK-@AE;okJC}*psAe zq^+I4eILC=Xhe;i;?JKc%4+-`YjZGDn+fgD3r@iQL&#A;btQQg?r^X5_cgR5wc3xNe7ftVLJ;TAHFCNPlUrb`N6;h2j+!n39}S^}NpeO*r3x$<>O z*R`c-?T#@3uf#*F7Qk7qPX!T3o2&kZfg*)}8-vM`FJ84+fPV%v0{a%?ORP8D{ruEr zOC0%rL)-?f;WHgw-RiKH=C zqiLzF$RFNpU$pik-#VMWK?QyB&#Jljx)QRoRF~AYb@w$V>q=zL*c3 zpBfzzZMqUlU{*5AoZ9+drE}X;HHNAMmW&KJGrHjK()s8!^W#(IH>F~r|2J6ah7bn1?M{EKEPe?R&%=NaxplvlrX{>_`zgmx5%;sA8tDw=2s$IzqY zlxSs!azXvR3McYmvtsbW+z$ITuwznos$Av(0E>^FO!2o~HS3}&RRHS9T=5SqWd|u` zCN$@40I{y_j#e&0qOk62IxlrKbxNywj5~Wjvj=mCnWi%?j!N4$;|JePSz&bQBkIU% zUtiDSIU^OAy5tDzy^w~!&{a?=4+&-elq0pIIPNOdPdPGz)K|a(qV2GPrHW=H z}Cvrjd(?Df#4$!M=HfZPpjf_YiLg0(R`DOE85i#dxQ&_XE` zM$!ocVz@Z@T3#>qHqSV~-nH&{oM{x<+JbAe4E<#)n7W{QNP(&NYAx(F7%5WOyY`4m zE?7^mRdv3{kCN7Dv?@{8M(_+|;008#2Q_r2`*ufPADAC$?!J&7>KjixPs_9*BP&y} zL40LlS*KY^ajF?Sq?+zf>3-gsy&6Wp{0;2F|K`X3A;2h;wA?V4Eufo}swSZGrYnHh z@Pt?pW9qN(D~@K@<}98X~&2(NYSS}*yRT?m65#Tn>eYfiKf$wtP(An*Vknu z%Oe7tgtj6Fvo@l(l-l zb)_5*h%IEtf-S;E8^9PI5gD%~-hSqJyC2}}E$lO=rc8M^6wzZcm#lH5Ug-}_D_xQ< zXNV7Xtc{NV<1Xr-(qc-zpFntCz--sG}2Ztf}dYi`X?v)m~IosPPF zV)z{u=%zZiTvT>v^@n-*_?@4npeW5wx%LaEfK9F~bU8ni5gtWLl1;FwVBX1m-P6<1 zX((7Cod|48upqWfD4v68*LyZE41jcAW`~{4Z>}9?m}!LJ?%lPj_ro))H;a%|-I`67 ztE$1WnyJ^I8XoUPd&*O3N73|-8Y&w2#LD-Pn*|{ctLL-0e@O;^qVx`V~&hl_8YO<>hVvu!oCmHW|K) zdVWoSwII>O?GngE3fI`XeSD?ejTcIE{^uOpdyajmd`gn|!a)djy1x`Z{0O~fOf{=` z0I|ELb@yzGH$0Qm*q5u{Gj>$HmTtlf@9h3!b4e2!JP36GMq7$R8Z}}h6Gt^)FW7R4 zs=jz$+9AC{3(0cYcWwQTs>Sgr{0E$GXP05f3)OY*NTF#=t2D?$iIRgJS!b1q91pQ( z;1V+MFqAF^^<;+0+afr1v^l#kfb$Ffi`lfVY*gV1^#A>+=AQr&Bj+!ngj)Yu9@x$d z&HljZyqon*A=HmvIWub{2!gIP8v{n$QZ8Fe=Qx9VT(~z9qNy7y$&|5ax-j_iN#SS8 zXx3v5zHn3*h#YfGcmBHfD0LQLK$L^pfbV0V5ooqG6YS;@41Ebo$Kt5cLoEJr4M#=9 zwi@!QSVo|(V~7EpHGW}J4^_jmy96(ORuXNTQX6|92)_iGLf~em~Xy3Htj-qGXbo{P<0$yKeoCF5JSLuKt#jz-Inx%?nePt+qiyiJB!>0I`GW5%58N)0rn%;_;dV z5xya6i78=?+4&ZTyNC}bfjeO}Hf^o(!v=W2`}&3Rzd<^osZGEi>~NZz%ttQEVFqsoQY~?Uu29&VUYBm8L5r7sa4u~Q$;9_YE*Rtm|H?GivN{N($f}gO zk~eF#+P_VS%d4Cp&4GiF%<065@s=r@_@nsq9)2h(`CWkko(-@gg@1G6BVtyX8bC=} z;*~T2E#XJ3#9pvm$fU*n05LV-sSAW)BjDngxgqCL_3z86mbl}V9V@a*W z50E=7jWV(KbGMLQ&V(QmB8>nPsmG$u(Xs$${Z}X<&f^omM!$!$m~eF6+@7v=WP(#k z<^zkXRH>$)qR+}_rz!o?U#PEh`@|r?7~o(=!!?(Vv^<*p<#-GBrn{HN z`8Ir*?e#azviK{R{;TZv1g=piCWIzQb222d^-O~W{qXY08{ImTsuYtF893q}Iq@^p z^vaE#8w?72U&YMtXqri<5mbNrh5J22Jvm7-NhUqLN__mJ3ma0J<=8hKH{&>C&iKX* z+1^4~fzqQufi1)^A#G3pZ6{#NUZdYq)gPU8mNP{GAp_#+IoDpuigj4UhK*FaX$$4k z8g)zyqw+wInWi+-*OWP>iPCFt{~e|YdOL9Bmn5^P1KRZ4gR)@U=ssbTkl8rv0fvH)M1AN=%Qys1BX5*W7oyS<4q30A)CMv+^R z&!ieHRxpIFDw{zRhODM!+997_z+kV`j4Clf2WorQYi}qY ziGilZS=s5(3dg2q$#mlx#7}!&`8xoj3|y_{#rFpeL!U9EE^fNTfI2zWL-e{^sY zVGN|MszKGDvrc8SG-0{ZxqnS?605E1{~=7`=jD8Kz=3vN1$t}Y;--`nrRHeBg} zltu_{N0D7XMi^D1j}Uh*p&qsSmhWB;oc%CJ-qU8SqKK>%?=Kkd<4df}Lfygh6c?gl zu%57!sOTt(Jt@i2&t_a&*hh&TA98G4|Qdy(B7~*^M@K*@3Thku zXePRq$NZ=gqh?i_H5WoqL6YQI?PS)v;Ay%dF)s>HhrQUkocs+A>9KA08PDIRgkGDz z>vkS4^0CnI42-9rC&V)oV)QfkkK677&xSl?6Ab6cc7!7>4!pSCkxWZ?sSbD5_^g)h zY$i7?V&%0p6poAVodvdmaV6b8%7bucju+)GFI5A;?p7d`J*Y;?7VwES9V#o46igW{ zQld~cOb1?NJt$C6F+x!Le)TkT!mB5FAs#H=8GXxKb}C|ua5pE-_rk=kWv(~B-Gy<(2@axAAmg+fBrdZl=|M+^R#1Vn5E-E7m zTBD|}uh2)MK*fkrO&(PNWeyW2l9s#~9lFS3V7dc$aIveFu{q??9qtN9wtYlLJtpRjS~1+oTQBO?9a!J=(k`se{neWYb(Y@|KQ1Ww%0q5c26P zTv%DUR7Z%-jW3g`az2isy4jr=E_?JL~N2;oyo z(NDJC6B)TlL&I^ir#@16I$cx`%y$0jogBRfD!LS4WffzKYE_qea%T9LOOAE9N6dyK zj(Ljyq>((WRv*lXsF-Xl&s1_-Dph|frHBS2EM_~!X zdVvFH^ioMcD zM#W0*08CR?)3v~)yH5Z=FQ2cS0e9B11I%Rc_E*!$+NJukg#@k# z1krn``qb75Le_#5^Uu1N`Oc*{s=ak+<)dL<*|kHv&`XJ505H&^%3ilUCh(+#&HC*7juYnG+VhA>* z)V7h3O#($(*lrbK3CB!F1+1QZ?Bj}OnXP=>R}n&k=;l-@6+)BqKy zom2^kvyE^+)5!AMS23d+NFL{%3>?N9qR44QrEOsLMIQWW{DcG9@`41$yYCB^XJtY%47nPI$T-)3)88}4fWI!}k zP{dMn#LIw4fcc2Sr1x*MF)*Ez-yF|&q@%+og$m9@{T6Ri=J;Env`-unC-TUC4>b4x zp()xkk$HUBTai+be>L95!7BCv1z0qQ4w^%A2-dSZU?#*8Z6a2C=LC~ymLKH*@2t__j){oXVjPjcCnFh5{ zj{1E@uz%Ne%t7}%&dE1`e?hMO*x?Uy6VV;a%>EMcOG99V1)N!Vj=LJLPjm_i+r*ND z;2M!ZnI2a5!xEHsah4!*S!H%!0$c%w6ZW5~`E!f;QS!3l4cuut<`Eqg_ew$(KBKoX z{dH2^RjT;fttI0{)Sn#J$)gaKm5aYdNERp801H1IHqt!n%r#Uzt+Etc#4WS zSLSGnysD>&5J8aa9R(C187sA0Wi5Y8fHEJ9^=`@Q52_UA5wm9jNrKZFPPYkcaU3z+ zO9v}|1Tb8^6j^{0N_Xl9ev5CYY~Y@umW+w7YzA-W+W<7BIeKfmH1QSC#LpGY`1i1% zy?-h7hUwKdNGVa2WX#a#M7PcnsD{4;PI|wm*lR;Yt~XFdCS##4IGZDIZF+6r*Wy3~ z?#R-QA7*t<)Ey3b^$z5XPvyDirWgWL=_BiJ&5K{@t)(;n5W}}7Gs6yZCo%DeY)oNLTHu)QF42cG2SnIYN z3bFPVV+=?nZ2^Bop|-FKKri2sQ;qUjs^2c1Lb;3czq9~NOFBE?I#Q*hMm6sMTUf9d zSs{c5LkdF1zPM#7V{b9rp1#C-c?m0P`1c))+L4l%s#aQVd$r4ReCC`ZTfe55{RG@e zdzu_{Qh!E7qsy@}KKa@IvQ z&=);Pxg{k@GkxKx{<)A?mDhH7%iKzn|4Sw1zo#X-oqxp}-RFhz@&1vZDG9TRjjki}>Cfm|H;QLr=GEmaVjp1U z(b77I3BjW1F;m&hO-ch9sw{!;H>YckF*uW`H<$YGKEL1Y9= zgm)j1R#T4Zt`+E75?^?~n{=UA`FU2^-DkmRJsW_&R1w##A5e&LhXd zxi~rbm^jY*%hGCW+gjaVrqK=Z&u|psD9x05uNeb$C9uasfr z*C8v=uE5%OiWg3?yq#dLzQPaCWgzRS{de$Lfaz4VtFSMQ8#P}Zm&9S5kGahi2T+Yz zzh-i%@;cdZh7G92nw|2R>>ha7;-((UT4c?Yhs(d!WLb1gYm0+tAQKH)Qkoram6ic@ zmed7$jcyB?`H#>>)JWzVzi&a8&nksYrHQ@@*Dl;jpZz}1h>7a{7CPe}!KazqfLK9u zfFA<|2?k$(un>PjaQH3p#Zus`osgtVbPN)-E;`V#*Aw57riKFJ671w;&kr+GWJkwq z!sjMltevmiW-l~9i|Oc}!S~IqNq{H{=NOZT+B7LbX(JbR>(Y>}>CC{xH9agvo{Fi3 zdp~A96qXcKiY2