Clean up

IdeasLabUT · Jul 17, 2020 · 93fc639 · 93fc639
1 parent 221e05c
commit 93fc639
Show file tree

Hide file tree

Showing 22 changed files with 98 additions and 84 deletions.
diff --git a/bhm_generative_model.py b/bhm_generative_model.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import numpy as np

diff --git a/bhm_local_search.py b/bhm_local_search.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import time

diff --git a/bhm_model_fitting.py b/bhm_model_fitting.py
@@ -1,12 +1,13 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import time
 import pickle
 import numpy as np
 import dataset_utils
+from os.path import join
 import generative_model_utils as utils
 import model_fitting_utils as model_utils
 import bhm_parameter_estimation as estimate_utils
@@ -86,8 +87,9 @@ def fit_and_eval_block_hawkes(train_tuple, test_tuple, combined_tuple, nodes_not
         print(f"Test ll: {ll_per_event:.3f} - Took: {toc - tic:.2f}s")
 
         # Save results
-        result_file_path = f'{dataset_utils.get_script_path()}/storage/results/fb_bhm_fit'
-        with open(f'{result_file_path}/k{num_classes}-model-params.pckl', 'wb') as handle:
+        result_file_path = join(dataset_utils.get_script_path(), 'storage', 'results', 'fb_bhm_fit',
+                                f'k{num_classes}-model-params.pckl')
+        with open(result_file_path, 'wb') as handle:
             pickle.dump([train_node_membership, train_bp_mu, train_bp_alpha, train_bp_beta, train_block_pair_events],
                         handle, protocol=pickle.HIGHEST_PROTOCOL)
 

diff --git a/bhm_parameter_estimation.py b/bhm_parameter_estimation.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import numpy as np

diff --git a/chip_generative_model.py b/chip_generative_model.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import time

diff --git a/chip_local_search.py b/chip_local_search.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import time

diff --git a/chip_model_fitting.py b/chip_model_fitting.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import time

diff --git a/dataset_utils.py b/dataset_utils.py
@@ -1,13 +1,14 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import os
 import sys
 import urllib
 import numpy as np
 import networkx as nx
+from os.path import join
 from datetime import datetime
 import matplotlib.pyplot as plt
 import generative_model_utils as utils
@@ -22,18 +23,18 @@ def get_script_path():
 
 def load_reality_mining_test_train(remove_nodes_not_in_train=False):
     """
-        Loads Reality Mining dataset.
+    Loads Reality Mining dataset.
 
-        :param remove_nodes_not_in_train: if True, removes the nodes that do not appear in the training set.
+    :param remove_nodes_not_in_train: if True, removes the nodes that do not appear in the training set.
 
-        :return: Three tuples one for each train, test and combined datasets. Each Tuple contains:
-                 ((dict) with (caller_id, receiver_id): [unix_timestamps] (event dict structure),
-                 (int) number of nodes,
-                 (float) duration)
-                 (list) nodes_not_in_train
-        """
-    train_file_path = f'{get_script_path()}/storage/datasets/reality-mining/train_reality.csv'
-    test_file_path = f'{get_script_path()}/storage/datasets/reality-mining/test_reality.csv'
+    :return: Three tuples one for each train, test and combined datasets. Each Tuple contains:
+             ((dict) with (caller_id, receiver_id): [unix_timestamps] (event dict structure),
+             (int) number of nodes,
+             (float) duration)
+             (list) nodes_not_in_train
+    """
+    train_file_path = join(get_script_path(), 'storage', 'datasets', 'reality-mining', 'train_reality.csv')
+    test_file_path = join(get_script_path(), 'storage', 'datasets', 'reality-mining', 'test_reality.csv')
 
     # Timestamps are adjusted to start from 0 and go up to 1000.
     combined_duration = 1000.0
@@ -53,8 +54,8 @@ def load_enron_train_test(remove_nodes_not_in_train=False):
              (float) duration)
              (list) nodes_not_in_train
     """
-    train_file_path = f'{get_script_path()}/storage/datasets/enron/train_enron.csv'
-    test_file_path = f'{get_script_path()}/storage/datasets/enron/test_enron.csv'
+    train_file_path = join(get_script_path(), 'storage', 'datasets', 'enron', 'train_enron.csv')
+    test_file_path = join(get_script_path(), 'storage', 'datasets', 'enron', 'test_enron.csv')
 
     # Timestamps are adjusted to start from 0 and go up to 1000.
     combined_duration = 1000.0
@@ -74,8 +75,8 @@ def load_fb_train_test(remove_nodes_not_in_train=False):
              (float) duration)
              (list) nodes_not_in_train
     """
-    train_file_path = f'{get_script_path()}/storage/datasets/facebook-wallposts/train_FB_event_mat.csv'
-    test_file_path = f'{get_script_path()}/storage/datasets/facebook-wallposts/test_FB_event_mat.csv'
+    train_file_path = join(get_script_path(), 'storage', 'datasets', 'facebook-wallposts', 'train_FB_event_mat.csv')
+    test_file_path = join(get_script_path(), 'storage', 'datasets', 'facebook-wallposts', 'test_FB_event_mat.csv')
 
     # Timestamps are adjusted to start from 0 and go up to 8759.9.
     combined_duration = 8759.9
@@ -321,7 +322,7 @@ def load_facebook_wall(timestamp_max=1000, largest_connected_component_only=Fals
     """
     file_path = download_file_path
     if download_file_path is None:
-        file_path = f"{get_script_path()}/storage/datasets/facebook-wallposts/facebook-wallpost.txt.gz"
+        file_path = join(get_script_path(), 'storage', 'datasets', 'facebook-wallposts', 'facebook-wallpost.txt.gz')
 
     # Downloading the dataset it is not in the storage directory
     if not os.path.exists(file_path):

diff --git a/...analysis/enron_exploratory_analysis.ipynb → examples/enron_exploratory_analysis.ipynb b/...analysis/enron_exploratory_analysis.ipynb → examples/enron_exploratory_analysis.ipynb
@@ -28,8 +28,9 @@
     }
    ],
    "source": [
-    "import sys  \n",
-    "sys.path.insert(0, '../.')\n",
+    "import sys\n",
+    "from os.path import join\n",
+    "sys.path.insert(0, join('..', '.'))\n",
     "\n",
     "import numpy as np\n",
     "import dataset_utils\n",
@@ -73,8 +74,9 @@
     }
    ],
    "source": [
-    "train_file_path = '../storage/datasets/enron/train_enron.csv'\n",
-    "test_file_path = '../storage/datasets/enron/test_enron.csv'\n",
+    "train_file_path = join('..', 'storage', 'datasets', 'enron', 'train_enron.csv')\n",
+    "test_file_path = join('..', 'storage', 'datasets', 'enron', 'test_enron.csv')\n",
+    "\n",
     "*_, enron_combined_tuple, _ = dataset_utils.load_train_test(train_file_path, test_file_path, \n",
     "                                                            combined_duration=1000, \n",
     "                                                            remove_nodes_not_in_train=False)\n",
@@ -552,7 +554,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.4"
+   "version": "3.7.1"
   },
   "toc": {
    "base_numbering": 1,

diff --git a/...book_wallposts_exploratory_analysis.ipynb → ...book_wallposts_exploratory_analysis.ipynb b/...book_wallposts_exploratory_analysis.ipynb → ...book_wallposts_exploratory_analysis.ipynb
@@ -29,8 +29,9 @@
     }
    ],
    "source": [
-    "import sys  \n",
-    "sys.path.insert(0, '../.')\n",
+    "import sys\n",
+    "from os.path import join\n",
+    "sys.path.insert(0, join('..', '.'))\n",
     "\n",
     "import numpy as np\n",
     "import dataset_utils\n",
@@ -74,7 +75,8 @@
     }
    ],
    "source": [
-    "fb_data_path = \"../storage/datasets/facebook-wallposts/facebook-wallpost.txt.gz\"\n",
+    "fb_data_path = join('..', 'storage', 'datasets', 'facebook-wallposts', 'facebook-wallpost.txt.gz')\n",
+    "\n",
     "(data_event_dict, \n",
     " data_num_nodes, \n",
     " data_duration) = dataset_utils.load_facebook_wall(download_file_path=fb_data_path, timestamp_max=1000,\n",
@@ -664,7 +666,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.9"
+   "version": "3.7.1"
   },
   "toc": {
    "base_numbering": 1,

diff --git a/...y_analysis/generating_chip_networks.ipynb → examples/generating_chip_networks.ipynb b/...y_analysis/generating_chip_networks.ipynb → examples/generating_chip_networks.ipynb
@@ -21,7 +21,8 @@
    "outputs": [],
    "source": [
     "import sys  \n",
-    "sys.path.insert(0, '../.')\n",
+    "from os.path import join\n",
+    "sys.path.insert(0, join('..', '.'))\n",
     "\n",
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
@@ -65,11 +66,11 @@
     "\n",
     "`class_prob:` (list) Probability of class memberships from class $0$ to $k - 1$. Make sure it sums up to 1\n",
     "\n",
-    "`bp_mu` $k \\times k$ matrix where entry $ij$ denotes the $\\mu$ of Hawkes process for block pair $(b_i, b_j)$\n",
+    "`bp_mu:` $k \\times k$ matrix where entry $ij$ denotes the $\\mu$ of Hawkes process for block pair $(b_i, b_j)$\n",
     "\n",
     "`bp_alpha:` $k \\times k$ matrix where entry $ij$ denotes the $\\alpha$ of Hawkes process for block pair $(b_i, b_j)$\n",
     "\n",
-    "`bp_beta` $k \\times k$ matrix where entry $ij$ denotes the $\\beta$ of Hawkes process for block pair $(b_i, b_j)$\n",
+    "`bp_beta:` $k \\times k$ matrix where entry $ij$ denotes the $\\beta$ of Hawkes process for block pair $(b_i, b_j)$\n",
     "\n",
     "`end_time:` (float) end_time of hawkes simulation\n",
     "\n",

diff --git a/generative_model_utils.py b/generative_model_utils.py
@@ -1,12 +1,13 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import os
 import pickle
 import numpy as np
 import dataset_utils
+from os.path import join
 import matplotlib.pyplot as plt
 import chip_generative_model as chip
 from tick.hawkes import SimuHawkesExpKernels
@@ -347,7 +348,7 @@ def simulate_community_hawkes(params=None, network_name=None, load_if_exists=Fal
 
     :return: event_dict, node_membership
     """
-    generated_network_path = f'{dataset_utils.get_script_path()}/storage/results/generated_networks/'
+    generated_network_path = join(dataset_utils.get_script_path(), 'storage', 'results', 'generated_networks')
 
     default_params = {'seed': None,
                       'number_of_nodes': 128,
@@ -365,8 +366,8 @@ def simulate_community_hawkes(params=None, network_name=None, load_if_exists=Fal
 
     # Load the network if existed
     if load_if_exists and network_name is not None:
-        if os.path.isfile(generated_network_path + network_name + ".pckl"):
-            with open(generated_network_path + network_name + ".pckl", 'rb') as handle:
+        if os.path.isfile(join(generated_network_path, f'{network_name}.pckl')):
+            with open(join(generated_network_path, f'{network_name}.pckl'), 'rb') as handle:
                 [event_dict, node_membership, params] = pickle.load(handle)
 
                 if verbose:
@@ -409,7 +410,7 @@ def simulate_community_hawkes(params=None, network_name=None, load_if_exists=Fal
     node_membership = one_hot_to_class_assignment(node_membership)
 
     if network_name is not None:
-        with open(generated_network_path + network_name + ".pckl", 'wb') as handle:
+        with open(join(generated_network_path, f'{network_name}.pckl'), 'wb') as handle:
             pickle.dump([event_dict, node_membership, default_params], handle, protocol=pickle.HIGHEST_PROTOCOL)
 
     return event_dict, node_membership
diff --git a/model_fitting_utils.py b/model_fitting_utils.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import numpy as np

diff --git a/parameter_estimation.py b/parameter_estimation.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import warnings

diff --git a/plotting_utils.py b/plotting_utils.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import numpy as np

diff --git a/poisson_baseline_model_fitting.py b/poisson_baseline_model_fitting.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import time

diff --git a/sim_AS2.py b/sim_AS2.py
@@ -10,11 +10,12 @@
 Expectation: We should see accuracy increase in both these cases. When mu_diag/mu_off_diag ratio is low, the algorithms
 will do poorly, but as the ratio increases there is more signal and the algorithm will do well and go all the way to 1.
 
-@author: Anonymous
+@author: Makan Arastuie
 """
 
 import pickle
 import numpy as np
+from os.path import join
 import matplotlib.pyplot as plt
 from joblib import Parallel, delayed
 import generative_model_utils as utils
@@ -23,7 +24,7 @@
 from spectral_clustering import spectral_cluster
 
 
-result_file_path = f'{get_script_path()}/storage/results/AS2'
+result_file_path = join(get_script_path(), 'storage', 'results', 'AS2')
 
 also_use_unweighted_adjacency = True
 
@@ -101,7 +102,7 @@ def test_spectral_clustering_on_generative_model(scalar):
             mean_proportion_ones_in_adj_err.append(2 * np.std(results[:, 2]) / np.sqrt(len(results[:, 2])))
 
             # Save results
-            with open(f'{result_file_path}/all_sims-{sim_type}-w-adj.pckl', 'wb') as handle:
+            with open(join(result_file_path, f'all_sims-{sim_type}-w-adj.pckl'), 'wb') as handle:
                 pickle.dump([agg_adj_mean_sc_rand_scores, agg_adj_mean_sc_rand_scores_err,
                              adj_mean_sc_rand_scores, adj_mean_sc_rand_scores_err,
                              mean_proportion_ones_in_adj,
@@ -111,18 +112,18 @@ def test_spectral_clustering_on_generative_model(scalar):
             agg_adj_mean_sc_rand_scores_err.append(2 * np.std(results) / np.sqrt(len(results)))
 
             # Save results
-            with open(f'{result_file_path}/all_sims-{sim_type}.pckl', 'wb') as handle:
+            with open(join(result_file_path, f'all_sims-{sim_type}.pckl'), 'wb') as handle:
                 pickle.dump([agg_adj_mean_sc_rand_scores,
                              agg_adj_mean_sc_rand_scores_err], handle, protocol=pickle.HIGHEST_PROTOCOL)
 
 
 if also_use_unweighted_adjacency:
-    with open(f'{result_file_path}/all_sims-{sim_type}-w-adj.pckl', 'rb') as handle:
+    with open(join(result_file_path, f'all_sims-{sim_type}-w-adj.pckl'), 'rb') as handle:
         [agg_adj_mean_sc_rand_scores, agg_adj_mean_sc_rand_scores_err,
          adj_mean_sc_rand_scores, adj_mean_sc_rand_scores_err,
          mean_proportion_ones_in_adj, mean_proportion_ones_in_adj_err] = pickle.load(handle)
 else:
-    with open(f'{result_file_path}/all_sims-{sim_type}.pckl', 'rb') as handle:
+    with open(join(result_file_path, f'all_sims-{sim_type}.pckl'), 'rb') as handle:
         [agg_adj_mean_sc_rand_scores, agg_adj_mean_sc_rand_scores_err] = pickle.load(handle)
 
 
@@ -157,7 +158,7 @@ def test_spectral_clustering_on_generative_model(scalar):
 
     ax.autoscale_view()
 
-    plt.savefig(result_file_path + "/plots/" + plot_name + ".pdf")
+    plt.savefig(join(result_file_path, 'plots', f'{plot_name}.pdf'))
     plt.show()
 else:
     w, h = plt.figaspect(.3)
@@ -190,5 +191,5 @@ def test_spectral_clustering_on_generative_model(scalar):
 
     ax.autoscale_view()
 
-    plt.savefig(f"{result_file_path}/plots/agg-vs-adj-density.pdf")
+    plt.savefig(join(result_file_path, 'plots', 'agg-vs-adj-density.pdf'))
     plt.show()