diff --git a/bhm_generative_model.py b/bhm_generative_model.py index 17afca8..b59af94 100644 --- a/bhm_generative_model.py +++ b/bhm_generative_model.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import numpy as np diff --git a/bhm_local_search.py b/bhm_local_search.py index 4c3f27a..74c6fd7 100644 --- a/bhm_local_search.py +++ b/bhm_local_search.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import time diff --git a/bhm_model_fitting.py b/bhm_model_fitting.py index 61307b0..4c5614e 100644 --- a/bhm_model_fitting.py +++ b/bhm_model_fitting.py @@ -1,12 +1,13 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import time import pickle import numpy as np import dataset_utils +from os.path import join import generative_model_utils as utils import model_fitting_utils as model_utils import bhm_parameter_estimation as estimate_utils @@ -86,8 +87,9 @@ def fit_and_eval_block_hawkes(train_tuple, test_tuple, combined_tuple, nodes_not print(f"Test ll: {ll_per_event:.3f} - Took: {toc - tic:.2f}s") # Save results - result_file_path = f'{dataset_utils.get_script_path()}/storage/results/fb_bhm_fit' - with open(f'{result_file_path}/k{num_classes}-model-params.pckl', 'wb') as handle: + result_file_path = join(dataset_utils.get_script_path(), 'storage', 'results', 'fb_bhm_fit', + f'k{num_classes}-model-params.pckl') + with open(result_file_path, 'wb') as handle: pickle.dump([train_node_membership, train_bp_mu, train_bp_alpha, train_bp_beta, train_block_pair_events], handle, protocol=pickle.HIGHEST_PROTOCOL) diff --git a/bhm_parameter_estimation.py b/bhm_parameter_estimation.py index 0fbf3a2..d1800ac 100644 --- a/bhm_parameter_estimation.py +++ b/bhm_parameter_estimation.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import numpy as np diff --git a/chip_generative_model.py b/chip_generative_model.py index 05d5565..8632bc3 100644 --- a/chip_generative_model.py +++ b/chip_generative_model.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import time diff --git a/chip_local_search.py b/chip_local_search.py index 2d7367e..994f7c9 100644 --- a/chip_local_search.py +++ b/chip_local_search.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import time diff --git a/chip_model_fitting.py b/chip_model_fitting.py index 17be245..0b9a604 100644 --- a/chip_model_fitting.py +++ b/chip_model_fitting.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import time diff --git a/dataset_utils.py b/dataset_utils.py index 2721f6a..ee372ee 100644 --- a/dataset_utils.py +++ b/dataset_utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import os @@ -8,6 +8,7 @@ import urllib import numpy as np import networkx as nx +from os.path import join from datetime import datetime import matplotlib.pyplot as plt import generative_model_utils as utils @@ -22,18 +23,18 @@ def get_script_path(): def load_reality_mining_test_train(remove_nodes_not_in_train=False): """ - Loads Reality Mining dataset. + Loads Reality Mining dataset. - :param remove_nodes_not_in_train: if True, removes the nodes that do not appear in the training set. + :param remove_nodes_not_in_train: if True, removes the nodes that do not appear in the training set. - :return: Three tuples one for each train, test and combined datasets. Each Tuple contains: - ((dict) with (caller_id, receiver_id): [unix_timestamps] (event dict structure), - (int) number of nodes, - (float) duration) - (list) nodes_not_in_train - """ - train_file_path = f'{get_script_path()}/storage/datasets/reality-mining/train_reality.csv' - test_file_path = f'{get_script_path()}/storage/datasets/reality-mining/test_reality.csv' + :return: Three tuples one for each train, test and combined datasets. Each Tuple contains: + ((dict) with (caller_id, receiver_id): [unix_timestamps] (event dict structure), + (int) number of nodes, + (float) duration) + (list) nodes_not_in_train + """ + train_file_path = join(get_script_path(), 'storage', 'datasets', 'reality-mining', 'train_reality.csv') + test_file_path = join(get_script_path(), 'storage', 'datasets', 'reality-mining', 'test_reality.csv') # Timestamps are adjusted to start from 0 and go up to 1000. combined_duration = 1000.0 @@ -53,8 +54,8 @@ def load_enron_train_test(remove_nodes_not_in_train=False): (float) duration) (list) nodes_not_in_train """ - train_file_path = f'{get_script_path()}/storage/datasets/enron/train_enron.csv' - test_file_path = f'{get_script_path()}/storage/datasets/enron/test_enron.csv' + train_file_path = join(get_script_path(), 'storage', 'datasets', 'enron', 'train_enron.csv') + test_file_path = join(get_script_path(), 'storage', 'datasets', 'enron', 'test_enron.csv') # Timestamps are adjusted to start from 0 and go up to 1000. combined_duration = 1000.0 @@ -74,8 +75,8 @@ def load_fb_train_test(remove_nodes_not_in_train=False): (float) duration) (list) nodes_not_in_train """ - train_file_path = f'{get_script_path()}/storage/datasets/facebook-wallposts/train_FB_event_mat.csv' - test_file_path = f'{get_script_path()}/storage/datasets/facebook-wallposts/test_FB_event_mat.csv' + train_file_path = join(get_script_path(), 'storage', 'datasets', 'facebook-wallposts', 'train_FB_event_mat.csv') + test_file_path = join(get_script_path(), 'storage', 'datasets', 'facebook-wallposts', 'test_FB_event_mat.csv') # Timestamps are adjusted to start from 0 and go up to 8759.9. combined_duration = 8759.9 @@ -321,7 +322,7 @@ def load_facebook_wall(timestamp_max=1000, largest_connected_component_only=Fals """ file_path = download_file_path if download_file_path is None: - file_path = f"{get_script_path()}/storage/datasets/facebook-wallposts/facebook-wallpost.txt.gz" + file_path = join(get_script_path(), 'storage', 'datasets', 'facebook-wallposts', 'facebook-wallpost.txt.gz') # Downloading the dataset it is not in the storage directory if not os.path.exists(file_path): diff --git a/exploratory_analysis/enron_exploratory_analysis.ipynb b/examples/enron_exploratory_analysis.ipynb similarity index 99% rename from exploratory_analysis/enron_exploratory_analysis.ipynb rename to examples/enron_exploratory_analysis.ipynb index 805e80e..906ed01 100644 --- a/exploratory_analysis/enron_exploratory_analysis.ipynb +++ b/examples/enron_exploratory_analysis.ipynb @@ -28,8 +28,9 @@ } ], "source": [ - "import sys \n", - "sys.path.insert(0, '../.')\n", + "import sys\n", + "from os.path import join\n", + "sys.path.insert(0, join('..', '.'))\n", "\n", "import numpy as np\n", "import dataset_utils\n", @@ -73,8 +74,9 @@ } ], "source": [ - "train_file_path = '../storage/datasets/enron/train_enron.csv'\n", - "test_file_path = '../storage/datasets/enron/test_enron.csv'\n", + "train_file_path = join('..', 'storage', 'datasets', 'enron', 'train_enron.csv')\n", + "test_file_path = join('..', 'storage', 'datasets', 'enron', 'test_enron.csv')\n", + "\n", "*_, enron_combined_tuple, _ = dataset_utils.load_train_test(train_file_path, test_file_path, \n", " combined_duration=1000, \n", " remove_nodes_not_in_train=False)\n", @@ -552,7 +554,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.7.1" }, "toc": { "base_numbering": 1, diff --git a/exploratory_analysis/facebook_wallposts_exploratory_analysis.ipynb b/examples/facebook_wallposts_exploratory_analysis.ipynb similarity index 99% rename from exploratory_analysis/facebook_wallposts_exploratory_analysis.ipynb rename to examples/facebook_wallposts_exploratory_analysis.ipynb index d032b85..2505705 100644 --- a/exploratory_analysis/facebook_wallposts_exploratory_analysis.ipynb +++ b/examples/facebook_wallposts_exploratory_analysis.ipynb @@ -29,8 +29,9 @@ } ], "source": [ - "import sys \n", - "sys.path.insert(0, '../.')\n", + "import sys\n", + "from os.path import join\n", + "sys.path.insert(0, join('..', '.'))\n", "\n", "import numpy as np\n", "import dataset_utils\n", @@ -74,7 +75,8 @@ } ], "source": [ - "fb_data_path = \"../storage/datasets/facebook-wallposts/facebook-wallpost.txt.gz\"\n", + "fb_data_path = join('..', 'storage', 'datasets', 'facebook-wallposts', 'facebook-wallpost.txt.gz')\n", + "\n", "(data_event_dict, \n", " data_num_nodes, \n", " data_duration) = dataset_utils.load_facebook_wall(download_file_path=fb_data_path, timestamp_max=1000,\n", @@ -664,7 +666,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.7.1" }, "toc": { "base_numbering": 1, diff --git a/exploratory_analysis/generating_chip_networks.ipynb b/examples/generating_chip_networks.ipynb similarity index 99% rename from exploratory_analysis/generating_chip_networks.ipynb rename to examples/generating_chip_networks.ipynb index dfa1975..3bf8c49 100644 --- a/exploratory_analysis/generating_chip_networks.ipynb +++ b/examples/generating_chip_networks.ipynb @@ -21,7 +21,8 @@ "outputs": [], "source": [ "import sys \n", - "sys.path.insert(0, '../.')\n", + "from os.path import join\n", + "sys.path.insert(0, join('..', '.'))\n", "\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", @@ -65,11 +66,11 @@ "\n", "`class_prob:` (list) Probability of class memberships from class $0$ to $k - 1$. Make sure it sums up to 1\n", "\n", - "`bp_mu` $k \\times k$ matrix where entry $ij$ denotes the $\\mu$ of Hawkes process for block pair $(b_i, b_j)$\n", + "`bp_mu:` $k \\times k$ matrix where entry $ij$ denotes the $\\mu$ of Hawkes process for block pair $(b_i, b_j)$\n", "\n", "`bp_alpha:` $k \\times k$ matrix where entry $ij$ denotes the $\\alpha$ of Hawkes process for block pair $(b_i, b_j)$\n", "\n", - "`bp_beta` $k \\times k$ matrix where entry $ij$ denotes the $\\beta$ of Hawkes process for block pair $(b_i, b_j)$\n", + "`bp_beta:` $k \\times k$ matrix where entry $ij$ denotes the $\\beta$ of Hawkes process for block pair $(b_i, b_j)$\n", "\n", "`end_time:` (float) end_time of hawkes simulation\n", "\n", diff --git a/generative_model_utils.py b/generative_model_utils.py index 8624560..b05201d 100644 --- a/generative_model_utils.py +++ b/generative_model_utils.py @@ -1,12 +1,13 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import os import pickle import numpy as np import dataset_utils +from os.path import join import matplotlib.pyplot as plt import chip_generative_model as chip from tick.hawkes import SimuHawkesExpKernels @@ -347,7 +348,7 @@ def simulate_community_hawkes(params=None, network_name=None, load_if_exists=Fal :return: event_dict, node_membership """ - generated_network_path = f'{dataset_utils.get_script_path()}/storage/results/generated_networks/' + generated_network_path = join(dataset_utils.get_script_path(), 'storage', 'results', 'generated_networks') default_params = {'seed': None, 'number_of_nodes': 128, @@ -365,8 +366,8 @@ def simulate_community_hawkes(params=None, network_name=None, load_if_exists=Fal # Load the network if existed if load_if_exists and network_name is not None: - if os.path.isfile(generated_network_path + network_name + ".pckl"): - with open(generated_network_path + network_name + ".pckl", 'rb') as handle: + if os.path.isfile(join(generated_network_path, f'{network_name}.pckl')): + with open(join(generated_network_path, f'{network_name}.pckl'), 'rb') as handle: [event_dict, node_membership, params] = pickle.load(handle) if verbose: @@ -409,7 +410,7 @@ def simulate_community_hawkes(params=None, network_name=None, load_if_exists=Fal node_membership = one_hot_to_class_assignment(node_membership) if network_name is not None: - with open(generated_network_path + network_name + ".pckl", 'wb') as handle: + with open(join(generated_network_path, f'{network_name}.pckl'), 'wb') as handle: pickle.dump([event_dict, node_membership, default_params], handle, protocol=pickle.HIGHEST_PROTOCOL) return event_dict, node_membership diff --git a/model_fitting_utils.py b/model_fitting_utils.py index cac2ab8..4a157bc 100644 --- a/model_fitting_utils.py +++ b/model_fitting_utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import numpy as np diff --git a/parameter_estimation.py b/parameter_estimation.py index 8a6edbf..44d7a8d 100644 --- a/parameter_estimation.py +++ b/parameter_estimation.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import warnings diff --git a/plotting_utils.py b/plotting_utils.py index b6050b3..af22da6 100644 --- a/plotting_utils.py +++ b/plotting_utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import numpy as np diff --git a/poisson_baseline_model_fitting.py b/poisson_baseline_model_fitting.py index ae7349e..edcac89 100644 --- a/poisson_baseline_model_fitting.py +++ b/poisson_baseline_model_fitting.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import time diff --git a/sim_AS2.py b/sim_AS2.py index 6677599..43f42a4 100644 --- a/sim_AS2.py +++ b/sim_AS2.py @@ -10,11 +10,12 @@ Expectation: We should see accuracy increase in both these cases. When mu_diag/mu_off_diag ratio is low, the algorithms will do poorly, but as the ratio increases there is more signal and the algorithm will do well and go all the way to 1. -@author: Anonymous +@author: Makan Arastuie """ import pickle import numpy as np +from os.path import join import matplotlib.pyplot as plt from joblib import Parallel, delayed import generative_model_utils as utils @@ -23,7 +24,7 @@ from spectral_clustering import spectral_cluster -result_file_path = f'{get_script_path()}/storage/results/AS2' +result_file_path = join(get_script_path(), 'storage', 'results', 'AS2') also_use_unweighted_adjacency = True @@ -101,7 +102,7 @@ def test_spectral_clustering_on_generative_model(scalar): mean_proportion_ones_in_adj_err.append(2 * np.std(results[:, 2]) / np.sqrt(len(results[:, 2]))) # Save results - with open(f'{result_file_path}/all_sims-{sim_type}-w-adj.pckl', 'wb') as handle: + with open(join(result_file_path, f'all_sims-{sim_type}-w-adj.pckl'), 'wb') as handle: pickle.dump([agg_adj_mean_sc_rand_scores, agg_adj_mean_sc_rand_scores_err, adj_mean_sc_rand_scores, adj_mean_sc_rand_scores_err, mean_proportion_ones_in_adj, @@ -111,18 +112,18 @@ def test_spectral_clustering_on_generative_model(scalar): agg_adj_mean_sc_rand_scores_err.append(2 * np.std(results) / np.sqrt(len(results))) # Save results - with open(f'{result_file_path}/all_sims-{sim_type}.pckl', 'wb') as handle: + with open(join(result_file_path, f'all_sims-{sim_type}.pckl'), 'wb') as handle: pickle.dump([agg_adj_mean_sc_rand_scores, agg_adj_mean_sc_rand_scores_err], handle, protocol=pickle.HIGHEST_PROTOCOL) if also_use_unweighted_adjacency: - with open(f'{result_file_path}/all_sims-{sim_type}-w-adj.pckl', 'rb') as handle: + with open(join(result_file_path, f'all_sims-{sim_type}-w-adj.pckl'), 'rb') as handle: [agg_adj_mean_sc_rand_scores, agg_adj_mean_sc_rand_scores_err, adj_mean_sc_rand_scores, adj_mean_sc_rand_scores_err, mean_proportion_ones_in_adj, mean_proportion_ones_in_adj_err] = pickle.load(handle) else: - with open(f'{result_file_path}/all_sims-{sim_type}.pckl', 'rb') as handle: + with open(join(result_file_path, f'all_sims-{sim_type}.pckl'), 'rb') as handle: [agg_adj_mean_sc_rand_scores, agg_adj_mean_sc_rand_scores_err] = pickle.load(handle) @@ -157,7 +158,7 @@ def test_spectral_clustering_on_generative_model(scalar): ax.autoscale_view() - plt.savefig(result_file_path + "/plots/" + plot_name + ".pdf") + plt.savefig(join(result_file_path, 'plots', f'{plot_name}.pdf')) plt.show() else: w, h = plt.figaspect(.3) @@ -190,5 +191,5 @@ def test_spectral_clustering_on_generative_model(scalar): ax.autoscale_view() - plt.savefig(f"{result_file_path}/plots/agg-vs-adj-density.pdf") + plt.savefig(join(result_file_path, 'plots', 'agg-vs-adj-density.pdf')) plt.show() \ No newline at end of file diff --git a/sim_AS3.py b/sim_AS3.py index 7ef1d57..2bd0101 100644 --- a/sim_AS3.py +++ b/sim_AS3.py @@ -10,11 +10,12 @@ (b) fix $T$, increasing $n$ and decreasing $k$. (c) fix $k$, increasing $n$ and increasing $T$. -@author: Anonymous +@author: Makan Arastuie """ import pickle import numpy as np +from os.path import join import matplotlib.pyplot as plt from plotting_utils import heatmap from joblib import Parallel, delayed @@ -45,7 +46,7 @@ def test_spectral_clustering_on_generative_model(n, t, k): return agg_adj_sc_rand -result_file_path = f'{get_script_path()}/storage/results/AS3' +result_file_path = join(get_script_path(), 'storage', 'results', 'AS3') plot_only = False @@ -121,10 +122,10 @@ def test_spectral_clustering_on_generative_model(n, t, k): mean_sc_rand_scores_err = np.reshape(mean_sc_rand_scores_err, (num_test_values, num_test_values)) # Save results - with open(f'{result_file_path}/all_sims-fixed-{fixed_var}.pckl', 'wb') as handle: + with open(join(result_file_path, f'all_sims-fixed-{fixed_var}.pckl'), 'wb') as handle: pickle.dump([mean_sc_rand_scores, mean_sc_rand_scores_err], handle, protocol=pickle.HIGHEST_PROTOCOL) - with open(f'{result_file_path}/all_sims-fixed-{fixed_var}.pckl', 'rb') as handle: + with open(join(result_file_path, f'all_sims-fixed-{fixed_var}.pckl'), 'rb') as handle: [mean_sc_rand_scores, mean_sc_rand_scores_err] = pickle.load(handle) # Reverse results in order of k for fixed n and T @@ -153,6 +154,6 @@ def test_spectral_clustering_on_generative_model(n, t, k): plt.xlabel(xlab, fontsize=16) # ax.set_title(f"CHIP SC AS3 Fixed {fixed_var.upper()}: {fixed_value}") fig.tight_layout() - plt.savefig(f"{result_file_path}/plots/as3-fixed-{fixed_var}.pdf", bbox_inches='tight') + plt.savefig(join(result_file_path, 'plots', f'as3-fixed-{fixed_var}.pdf'), bbox_inches='tight') # plt.show() diff --git a/sim_count_based_estimates.py b/sim_count_based_estimates.py index adbaf6c..5c18c25 100644 --- a/sim_count_based_estimates.py +++ b/sim_count_based_estimates.py @@ -4,11 +4,12 @@ Empirically analyzing the consistency of the CHIP parameter estimators. -@author: Anonymous +@author: Makan Arastuie """ import pickle import numpy as np +from os.path import join import matplotlib.pyplot as plt from joblib import Parallel, delayed import generative_model_utils as utils @@ -17,7 +18,7 @@ from parameter_estimation import estimate_hawkes_from_counts -result_file_path = f'{get_script_path()}/storage/results/count_based_estimate' +result_file_path = join(get_script_path(), 'storage', 'results', 'count_based_estimate') estimate_alpha_beta = True plot_only = False @@ -113,19 +114,19 @@ def calc_mean_and_error_of_count_estiamte(n_nodes): beta_mse_err.append(2 * np.std(beta_mse_temp) / np.sqrt(len(beta_mse_temp))) if estimate_alpha_beta: - with open(f'{result_file_path}/mses.pckl', 'wb') as handle: + with open(join(result_file_path, 'mses.pckl'), 'wb') as handle: pickle.dump([mu_mse, mu_mse_err, ratio_mse, ratio_mse_err, alpha_mse, alpha_mse_err, beta_mse, beta_mse_err], handle, protocol=pickle.HIGHEST_PROTOCOL) else: - with open(f'{result_file_path}/mses_no_alpha.pckl', 'wb') as handle: + with open(join(result_file_path, 'mses_no_alpha.pckl'), 'wb') as handle: pickle.dump([mu_mse, mu_mse_err, ratio_mse, ratio_mse_err], handle, protocol=pickle.HIGHEST_PROTOCOL) if estimate_alpha_beta: - with open(f'{result_file_path}/mses.pckl', 'rb') as handle: + with open(join(result_file_path, 'mses.pckl'), 'rb') as handle: [mu_mse, mu_mse_err, ratio_mse, ratio_mse_err, alpha_mse, alpha_mse_err, beta_mse, beta_mse_err] = pickle.load(handle) else: - with open(f'{result_file_path}/mses_no_alpha.pckl', 'rb') as handle: + with open(join(result_file_path, 'mses_no_alpha.pckl'), 'rb') as handle: mu_mse, mu_mse_err, ratio_mse, ratio_mse_err = pickle.load(handle) @@ -154,7 +155,7 @@ def calc_mean_and_error_of_count_estiamte(n_nodes): plt.tight_layout() #plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0), fontsize=16) plt.autoscale() -plt.savefig(f"{result_file_path}/plots/consistent_mu_mse.pdf") +plt.savefig(join(result_file_path, 'plots', 'consistent_mu_mse.pdf')) plt.show() plt.clf() @@ -169,7 +170,7 @@ def calc_mean_and_error_of_count_estiamte(n_nodes): plt.tight_layout() #plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0), fontsize=16) plt.autoscale() -plt.savefig(f"{result_file_path}/plots/consistent_m_mse.pdf") +plt.savefig(join(result_file_path, 'plots', 'consistent_m_mse.pdf')) plt.show() plt.clf() @@ -186,7 +187,7 @@ def calc_mean_and_error_of_count_estiamte(n_nodes): plt.tight_layout() #plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0), fontsize=16) - plt.savefig(f"{result_file_path}/plots/consistent_alpha_mse.pdf") + plt.savefig(join(result_file_path, 'plots', 'consistent_alpha_mse.pdf')) plt.show() plt.clf() @@ -202,5 +203,5 @@ def calc_mean_and_error_of_count_estiamte(n_nodes): plt.tight_layout() #plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0), fontsize=16) - plt.savefig(f"{result_file_path}/plots/consistent_beta_mse.pdf") + plt.savefig(join(result_file_path, 'plots', 'consistent_beta_mse.pdf')) plt.show() diff --git a/sim_end_to_end_count_based_estimates.py b/sim_end_to_end_count_based_estimates.py index b4e30c3..c8a13cf 100644 --- a/sim_end_to_end_count_based_estimates.py +++ b/sim_end_to_end_count_based_estimates.py @@ -4,13 +4,14 @@ Empirically analyzing the end-to-end consistency of the CHIP parameter estimators. -@author: Anonymous +@author: Makan Arastuie """ import os import copy import pickle import numpy as np +from os.path import join import matplotlib.pyplot as plt from joblib import Parallel, delayed import generative_model_utils as utils @@ -21,7 +22,7 @@ from sklearn.linear_model import LinearRegression from parameter_estimation import estimate_hawkes_from_counts -result_file_path = f'{get_script_path()}/storage/results/end_to_end_count_based_estimate' +result_file_path = join(get_script_path(), 'storage', 'results', 'end_to_end_count_based_estimate') run_analysis = True run_plotting = True @@ -167,11 +168,11 @@ def calc_mean_and_error_of_count_estiamte(n_nodes): kce['beta_mse'].append(np.mean(beta_mse_temp)) kce['beta_mse_err'].append(2 * np.std(beta_mse_temp) / np.sqrt(beta_mse_temp.size)) - with open(f'{result_file_path}/mses.pckl', 'wb') as handle: + with open(join(result_file_path, 'mses.pckl'), 'wb') as handle: pickle.dump([ece, kce], handle, protocol=pickle.HIGHEST_PROTOCOL) -with open(f'{result_file_path}/mses.pckl', 'rb') as handle: +with open(join(result_file_path, 'mses.pckl'), 'rb') as handle: ece, kce = pickle.load(handle) @@ -198,7 +199,7 @@ def calc_mean_and_error_of_count_estiamte(n_nodes): plt.xticks(range(len(num_nodes_to_test)), num_nodes_to_test) plt.tick_params(labelsize=12) plt.tight_layout() - plt.savefig(f"{result_file_path}/plots/estimated_consistent_rand_mean.pdf") + plt.savefig(join(result_file_path, 'plots', 'estimated_consistent_rand_mean.pdf')) for param, err in params.items(): # estimated communities @@ -211,7 +212,7 @@ def calc_mean_and_error_of_count_estiamte(n_nodes): plt.xticks(range(len(num_nodes_to_test)), num_nodes_to_test) plt.tick_params(labelsize=12) plt.tight_layout() - plt.savefig(f"{result_file_path}/plots/estimated_consistent_{param}_mse.pdf") + plt.savefig(join(result_file_path, 'plots', f'estimated_consistent_{param}_mse.pdf')) # known communities plt.ion() @@ -222,8 +223,7 @@ def calc_mean_and_error_of_count_estiamte(n_nodes): plt.xticks(range(len(num_nodes_to_test)), num_nodes_to_test) plt.tick_params(labelsize=12) plt.tight_layout() - plt.savefig(f"{result_file_path}/plots/known_consistent_{param}_mse.pdf") - + plt.savefig(join(result_file_path, 'plots', f'known_consistent_{param}_mse.pdf')) if run_regression: print("\nRegression: \n") diff --git a/sim_growning_n_increase_sc_rand.py b/sim_growning_n_increase_sc_rand.py index 32ee7f7..596cde0 100644 --- a/sim_growning_n_increase_sc_rand.py +++ b/sim_growning_n_increase_sc_rand.py @@ -2,11 +2,12 @@ """ "Spectral Clustering on Weighted vs. Unweighted Adjacency Matrix" -@author: Anonymous +@author: Makan Arastuie """ import pickle import numpy as np +from os.path import join import matplotlib.pyplot as plt from joblib import Parallel, delayed import generative_model_utils as utils @@ -15,7 +16,7 @@ from spectral_clustering import spectral_cluster -result_file_path = f'{get_script_path()}/storage/results/growing_n_increase_sc_rand' +result_file_path = join(get_script_path(), 'storage', 'results', 'growing_n_increase_sc_rand') agg_adj_should_fail = False plot_only = False @@ -103,13 +104,13 @@ def test_spectral_clustering_on_generative_model(n_nodes): mean_agg_adj_sc_rand_scores.append(np.mean(results[:, 1])) mean_agg_adj_sc_rand_scores_err.append(2 * np.std(results[:, 1]) / np.sqrt(len(results[:, 1]))) - with open(f'{result_file_path}/{file_name}', 'wb') as handle: + with open(join(result_file_path, file_name), 'wb') as handle: pickle.dump([mean_adj_sc_rand_scores, mean_adj_sc_rand_scores_err, mean_agg_adj_sc_rand_scores, mean_agg_adj_sc_rand_scores_err], handle, protocol=pickle.HIGHEST_PROTOCOL) -with open(f'{result_file_path}/{file_name}', 'rb') as handle: +with open(join(result_file_path, file_name), 'rb') as handle: [mean_adj_sc_rand_scores, mean_adj_sc_rand_scores_err, mean_agg_adj_sc_rand_scores, @@ -149,5 +150,5 @@ def test_spectral_clustering_on_generative_model(n_nodes): ax.autoscale_view() plot_name = "agg_adj_fail_100_sim" if agg_adj_should_fail else "adj_fail_100_sim" -plt.savefig(f"{result_file_path}/plots/{plot_name}.pdf", bbox_inches='tight') +plt.savefig(join(result_file_path, 'plots', f'{plot_name}.pdf'), bbox_inches='tight') plt.show() diff --git a/spectral_clustering.py b/spectral_clustering.py index 4d7e8d6..898b4b9 100644 --- a/spectral_clustering.py +++ b/spectral_clustering.py @@ -1,9 +1,10 @@ # -*- coding: utf-8 -*- """ -@author: Anonymous +@author: Makan Arastuie """ import numpy as np +from os.path import join import matplotlib.pyplot as plt from sklearn.cluster import KMeans from scipy.sparse.linalg import svds @@ -41,7 +42,7 @@ def spectral_cluster(adj, num_classes=2, n_kmeans_init=10, normalize_z=True, ver plt.grid(True) ax.tick_params(labelsize=20) plt.tight_layout() - plt.savefig(f'{plot_save_path}/singular_values.pdf') + plt.savefig(join(plot_save_path, 'singular_values.pdf')) plt.show() # Sort in decreasing order of magnitude