Merge with ICML 2020 supp and update the rest of the documents

IdeasLabUT · Jun 11, 2020 · d4e8aac · d4e8aac
2 parents 83a930d + b5f45d7
commit d4e8aac
Show file tree

Hide file tree

Showing 46 changed files with 144,474 additions and 1,594 deletions.
diff --git a/.gitignore b/.gitignore
@@ -119,4 +119,8 @@ ipython_config.py
 # Remove previous ipynb_checkpoints
 #   git rm -r .ipynb_checkpoints/
 
-# End of https://www.gitignore.io/api/jupyternotebooks
+<<<<<<< HEAD
+# End of https://www.gitignore.io/api/jupyternotebooks
+=======
+# End of https://www.gitignore.io/api/jupyternotebooks
+>>>>>>> icml2020-supp
diff --git a/R/hawkes_sim.R b/R/hawkes_sim.R
diff --git a/README.md b/README.md
@@ -0,0 +1,11 @@
+# Community Hawkes Independent Pairs (CHIP) Network Model
+
+
+
+This repo is the Python implementation of the CHIP network model as part of the supplementary material for the paper 
+submitted to NeurIPS2020, titled "Scalable and Consistent Estimation in Continuous-time Networks of Relational Events".
+
+All datasets used in this repo are either available in the `storage/datasets` directory or 
+will be automatically downloaded by the preprocessing script.
+
+The code was run by Python 3.6.8.
diff --git a/bhm_generative_model.py b/bhm_generative_model.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Makan Arastuie
+@author: Anonymous
 """
 
 import numpy as np

diff --git a/bhm_local_search.py b/bhm_local_search.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Makan Arastuie
+@author: Anonymous
 """
 
 import time

diff --git a/bhm_model_fitting.py b/bhm_model_fitting.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Makan Arastuie
+@author: Anonymous
 """
 
 import time
@@ -86,7 +86,7 @@ def fit_and_eval_block_hawkes(train_tuple, test_tuple, combined_tuple, nodes_not
         print(f"Test ll: {ll_per_event:.3f} - Took: {toc - tic:.2f}s")
 
         # Save results
-        result_file_path = '/shared/Results/CommunityHawkes/fb'
+        result_file_path = f'{dataset_utils.get_script_path()}/storage/results/fb_bhm_fit'
         with open(f'{result_file_path}/k{num_classes}-model-params.pckl', 'wb') as handle:
             pickle.dump([train_node_membership, train_bp_mu, train_bp_alpha, train_bp_beta, train_block_pair_events],
                         handle, protocol=pickle.HIGHEST_PROTOCOL)
@@ -111,17 +111,7 @@ def fit_and_eval_block_hawkes(train_tuple, test_tuple, combined_tuple, nodes_not
         dataset_utils.load_facebook_wall(timestamp_max=1000, largest_connected_component_only=True, train_percentage=0.8)
     fit_and_eval_block_hawkes(fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train,
                               local_search_max_iter=500, local_search_n_cores=25,
-                              k_values_to_test=[1],
-                              plot_fitted_hist=False, verbose=False)
-
-    # # Facebook Dataset
-    # print("Facebook wall-post dataset")
-    # fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train = \
-    #     dataset_utils.load_fb_train_test(remove_nodes_not_in_train=True)
-    # fit_and_eval_block_hawkes(fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train,
-    #                           local_search_max_iter=500, local_search_n_cores=25,
-    #                           k_values_to_test=[1, 2, 3],
-    #                           plot_fitted_hist=False, verbose=False)
+                              k_values_to_test=[1], plot_fitted_hist=False, verbose=False)
 
     # # Enron Dataset
     # print("Enron dataset")

diff --git a/bhm_parameter_estimation.py b/bhm_parameter_estimation.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Makan Arastuie
+@author: Anonymous
 """
 
 import numpy as np

diff --git a/chip_generative_model.py b/chip_generative_model.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Makan Arastuie
+@author: Anonymous
 """
 
 import time
@@ -248,80 +248,13 @@ def degree_corrected_community_generative_model(num_nodes, class_prob,
 
     tic = time.time()
     node_memberships, event_dictss = community_generative_model(number_of_nodes,
-                                                              class_probabilities,
-                                                              bp_mu, bp_alpha, bp_beta,
-                                                              burnin, end_time, n_cores=-1, seed=seed)
+                                                                class_probabilities,
+                                                                bp_mu, bp_alpha, bp_beta,
+                                                                burnin, end_time, n_cores=-1, seed=seed)
     toc = time.time()
     print(toc - tic)
 
     node_membership = utils.one_hot_to_class_assignment(node_membership)
 
     block_pair_events = utils.event_dict_to_block_pair_events(event_dicts, node_membership, num_of_classes)
     print(block_pair_events)
-
-    # theta = utils.generate_theta_params_for_degree_corrected_community(number_of_nodes, dist='dirichlet',
-    #                                                                    norm_sum_to='n')
-    #
-    # node_membership, event_dicts = degree_corrected_community_generative_model(number_of_nodes,
-    #                                                                            class_probabilities,
-    #                                                                            bp_mu, bp_alpha, bp_beta,
-    #                                                                            theta,
-    #                                                                            burnin, end_time, seed=seed)
-    #
-    # dataset_utils.plot_event_count_hist(event_dicts, number_of_nodes, "DC Community Hawkes")
-
-    # Check if the theoretical mean gets closer to empirical by scaling T and Mu
-
-    # for s in [1, 2, 3, 4]:
-    for s in [1]:
-        print("scalar", s)
-        end_time = 150 * s
-        burnin=100
-
-        # bp_mu, bp_alpha, bp_beta = utils.generate_random_hawkes_params(num_of_classes,
-        #                                                                mu_range=(0.1, 0.3),
-        #                                                                alpha_range=(0.2, 0.4),
-        #                                                                beta_range=(0.5, 1),
-        #                                                                seed=seed)
-
-        bp_alpha = np.ones((num_of_classes, num_of_classes), dtype=np.float) * 7500
-        bp_beta = np.ones((num_of_classes, num_of_classes), dtype=np.float) * 8000
-        bp_mu = np.ones((num_of_classes, num_of_classes), dtype=np.float) * 0.6 / s
-        np.fill_diagonal(bp_mu, 1.8 / s)
-
-        bp_mu = utils.scale_parameteres_by_block_pair_size(bp_mu, 128, class_probabilities)
-        bp_alpha = utils.scale_parameteres_by_block_pair_size(bp_alpha, 128, class_probabilities)
-        bp_beta = utils.scale_parameteres_by_block_pair_size(bp_beta, 128, class_probabilities)
-
-        # print(bp_mu)
-        # print(bp_alpha)
-        # print(bp_beta)
-        #
-        # m = (bp_mu * end_time) / (1 - (bp_alpha/bp_beta))
-        #
-        # print(m)
-        # print(np.mean(m))
-
-        event_count_means = []
-
-        for i in range(100):
-            node_membership, event_dicts = community_generative_model(number_of_nodes,
-                                                                      class_probabilities,
-                                                                      bp_mu, bp_alpha, bp_beta,
-                                                                      burnin, end_time, seed=seed)
-
-            # dataset_utils.plot_event_count_hist(event_dicts, number_of_nodes, "Community Hawkes")
-            event_agg_adj = utils.event_dict_to_aggregated_adjacency(number_of_nodes, event_dicts, dtype=np.int)
-
-            # np.savetxt(f"community-hawkes-{i}.txt", event_agg_adj, delimiter=' ', fmt='%d')
-
-            num_events = np.reshape(event_agg_adj, number_of_nodes**2)
-
-            event_count_means.append(np.mean(num_events))
-
-        print("mean:", np.mean(event_count_means))
-        print("95% Error:", 2 * np.std(event_count_means) / np.sqrt(len(event_count_means)))
-
-    # print(node_membership, event_dicts.keys())
-    # print(utils.event_dict_to_adjacency(number_of_nodes, event_dicts))
-    # print(utils.event_dict_to_aggregated_adjacency(number_of_nodes, event_dicts))
diff --git a/chip_local_search.py b/chip_local_search.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Makan Arastuie
+@author: Anonymous
 """
 
 import time

diff --git a/chip_model_fitting.py b/chip_model_fitting.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-@author: Makan Arastuie
+@author: Anonymous
 """
 
 import time
@@ -81,7 +81,6 @@ def fit_and_eval_community_hawkes(train_tuple, test_tuple, combined_tuple, nodes
         print(f"K: {num_classes} - Train ll: {train_log_likelihood / train_n_events:.4f}", end=' - ')
         print(f"Test ll: {ll_per_event:.3f} - Took: {toc - tic:.2f}s")
 
-
         if plot_fitted_hist:
             model_utils.generate_fit_community_hawkes(train_event_dict, train_node_membership,
                                                       train_bp_mu, train_bp_alpha, train_bp_beta,
@@ -98,21 +97,14 @@ def fit_and_eval_community_hawkes(train_tuple, test_tuple, combined_tuple, nodes
 # Examples of fitting CHIP to Facebook, Enron, Reality Mining and simulated data.
 if __name__ == "__main__":
     # Entire Facebook Dataset
-    print("Entire Facebook wall-post dataset")
+    print("Facebook wall-post dataset")
     fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train = \
-        dataset_utils.load_facebook_wall(timestamp_max=1000, largest_connected_component_only=True, train_percentage=0.8)
+        dataset_utils.load_facebook_wall(timestamp_max=1000, largest_connected_component_only=True,
+                                         train_percentage=0.8)
     fit_and_eval_community_hawkes(fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train,
-                                  k_values_to_test=[9],
+                                  k_values_to_test=np.arange(1, 201),
                                   plot_fitted_hist=False, verbose=False)
 
-    # # Facebook Dataset
-    # print("Facebook wall-post dataset")
-    # fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train = \
-    #     dataset_utils.load_fb_train_test(remove_nodes_not_in_train=False)
-    # fit_and_eval_community_hawkes(fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train,
-    #                               k_values_to_test=[6],
-    #                               plot_fitted_hist=False, verbose=False)
-
     # # Enron Dataset
     # print("Enron dataset")
     # enron_train_tuple, enron_test_tuple, enron_combined_tuple, enron_nodes_not_in_train = \