Skip to content

Commit

Permalink
Clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
arastuie committed Jul 17, 2020
1 parent 221e05c commit 93fc639
Show file tree
Hide file tree
Showing 22 changed files with 98 additions and 84 deletions.
2 changes: 1 addition & 1 deletion bhm_generative_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion bhm_local_search.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import time
Expand Down
8 changes: 5 additions & 3 deletions bhm_model_fitting.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import time
import pickle
import numpy as np
import dataset_utils
from os.path import join
import generative_model_utils as utils
import model_fitting_utils as model_utils
import bhm_parameter_estimation as estimate_utils
Expand Down Expand Up @@ -86,8 +87,9 @@ def fit_and_eval_block_hawkes(train_tuple, test_tuple, combined_tuple, nodes_not
print(f"Test ll: {ll_per_event:.3f} - Took: {toc - tic:.2f}s")

# Save results
result_file_path = f'{dataset_utils.get_script_path()}/storage/results/fb_bhm_fit'
with open(f'{result_file_path}/k{num_classes}-model-params.pckl', 'wb') as handle:
result_file_path = join(dataset_utils.get_script_path(), 'storage', 'results', 'fb_bhm_fit',
f'k{num_classes}-model-params.pckl')
with open(result_file_path, 'wb') as handle:
pickle.dump([train_node_membership, train_bp_mu, train_bp_alpha, train_bp_beta, train_block_pair_events],
handle, protocol=pickle.HIGHEST_PROTOCOL)

Expand Down
2 changes: 1 addition & 1 deletion bhm_parameter_estimation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion chip_generative_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import time
Expand Down
2 changes: 1 addition & 1 deletion chip_local_search.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import time
Expand Down
2 changes: 1 addition & 1 deletion chip_model_fitting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import time
Expand Down
33 changes: 17 additions & 16 deletions dataset_utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import os
import sys
import urllib
import numpy as np
import networkx as nx
from os.path import join
from datetime import datetime
import matplotlib.pyplot as plt
import generative_model_utils as utils
Expand All @@ -22,18 +23,18 @@ def get_script_path():

def load_reality_mining_test_train(remove_nodes_not_in_train=False):
"""
Loads Reality Mining dataset.
Loads Reality Mining dataset.
:param remove_nodes_not_in_train: if True, removes the nodes that do not appear in the training set.
:param remove_nodes_not_in_train: if True, removes the nodes that do not appear in the training set.
:return: Three tuples one for each train, test and combined datasets. Each Tuple contains:
((dict) with (caller_id, receiver_id): [unix_timestamps] (event dict structure),
(int) number of nodes,
(float) duration)
(list) nodes_not_in_train
"""
train_file_path = f'{get_script_path()}/storage/datasets/reality-mining/train_reality.csv'
test_file_path = f'{get_script_path()}/storage/datasets/reality-mining/test_reality.csv'
:return: Three tuples one for each train, test and combined datasets. Each Tuple contains:
((dict) with (caller_id, receiver_id): [unix_timestamps] (event dict structure),
(int) number of nodes,
(float) duration)
(list) nodes_not_in_train
"""
train_file_path = join(get_script_path(), 'storage', 'datasets', 'reality-mining', 'train_reality.csv')
test_file_path = join(get_script_path(), 'storage', 'datasets', 'reality-mining', 'test_reality.csv')

# Timestamps are adjusted to start from 0 and go up to 1000.
combined_duration = 1000.0
Expand All @@ -53,8 +54,8 @@ def load_enron_train_test(remove_nodes_not_in_train=False):
(float) duration)
(list) nodes_not_in_train
"""
train_file_path = f'{get_script_path()}/storage/datasets/enron/train_enron.csv'
test_file_path = f'{get_script_path()}/storage/datasets/enron/test_enron.csv'
train_file_path = join(get_script_path(), 'storage', 'datasets', 'enron', 'train_enron.csv')
test_file_path = join(get_script_path(), 'storage', 'datasets', 'enron', 'test_enron.csv')

# Timestamps are adjusted to start from 0 and go up to 1000.
combined_duration = 1000.0
Expand All @@ -74,8 +75,8 @@ def load_fb_train_test(remove_nodes_not_in_train=False):
(float) duration)
(list) nodes_not_in_train
"""
train_file_path = f'{get_script_path()}/storage/datasets/facebook-wallposts/train_FB_event_mat.csv'
test_file_path = f'{get_script_path()}/storage/datasets/facebook-wallposts/test_FB_event_mat.csv'
train_file_path = join(get_script_path(), 'storage', 'datasets', 'facebook-wallposts', 'train_FB_event_mat.csv')
test_file_path = join(get_script_path(), 'storage', 'datasets', 'facebook-wallposts', 'test_FB_event_mat.csv')

# Timestamps are adjusted to start from 0 and go up to 8759.9.
combined_duration = 8759.9
Expand Down Expand Up @@ -321,7 +322,7 @@ def load_facebook_wall(timestamp_max=1000, largest_connected_component_only=Fals
"""
file_path = download_file_path
if download_file_path is None:
file_path = f"{get_script_path()}/storage/datasets/facebook-wallposts/facebook-wallpost.txt.gz"
file_path = join(get_script_path(), 'storage', 'datasets', 'facebook-wallposts', 'facebook-wallpost.txt.gz')

# Downloading the dataset it is not in the storage directory
if not os.path.exists(file_path):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
}
],
"source": [
"import sys \n",
"sys.path.insert(0, '../.')\n",
"import sys\n",
"from os.path import join\n",
"sys.path.insert(0, join('..', '.'))\n",
"\n",
"import numpy as np\n",
"import dataset_utils\n",
Expand Down Expand Up @@ -73,8 +74,9 @@
}
],
"source": [
"train_file_path = '../storage/datasets/enron/train_enron.csv'\n",
"test_file_path = '../storage/datasets/enron/test_enron.csv'\n",
"train_file_path = join('..', 'storage', 'datasets', 'enron', 'train_enron.csv')\n",
"test_file_path = join('..', 'storage', 'datasets', 'enron', 'test_enron.csv')\n",
"\n",
"*_, enron_combined_tuple, _ = dataset_utils.load_train_test(train_file_path, test_file_path, \n",
" combined_duration=1000, \n",
" remove_nodes_not_in_train=False)\n",
Expand Down Expand Up @@ -552,7 +554,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.7.1"
},
"toc": {
"base_numbering": 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@
}
],
"source": [
"import sys \n",
"sys.path.insert(0, '../.')\n",
"import sys\n",
"from os.path import join\n",
"sys.path.insert(0, join('..', '.'))\n",
"\n",
"import numpy as np\n",
"import dataset_utils\n",
Expand Down Expand Up @@ -74,7 +75,8 @@
}
],
"source": [
"fb_data_path = \"../storage/datasets/facebook-wallposts/facebook-wallpost.txt.gz\"\n",
"fb_data_path = join('..', 'storage', 'datasets', 'facebook-wallposts', 'facebook-wallpost.txt.gz')\n",
"\n",
"(data_event_dict, \n",
" data_num_nodes, \n",
" data_duration) = dataset_utils.load_facebook_wall(download_file_path=fb_data_path, timestamp_max=1000,\n",
Expand Down Expand Up @@ -664,7 +666,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.7.1"
},
"toc": {
"base_numbering": 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
"outputs": [],
"source": [
"import sys \n",
"sys.path.insert(0, '../.')\n",
"from os.path import join\n",
"sys.path.insert(0, join('..', '.'))\n",
"\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
Expand Down Expand Up @@ -65,11 +66,11 @@
"\n",
"`class_prob:` (list) Probability of class memberships from class $0$ to $k - 1$. Make sure it sums up to 1\n",
"\n",
"`bp_mu` $k \\times k$ matrix where entry $ij$ denotes the $\\mu$ of Hawkes process for block pair $(b_i, b_j)$\n",
"`bp_mu:` $k \\times k$ matrix where entry $ij$ denotes the $\\mu$ of Hawkes process for block pair $(b_i, b_j)$\n",
"\n",
"`bp_alpha:` $k \\times k$ matrix where entry $ij$ denotes the $\\alpha$ of Hawkes process for block pair $(b_i, b_j)$\n",
"\n",
"`bp_beta` $k \\times k$ matrix where entry $ij$ denotes the $\\beta$ of Hawkes process for block pair $(b_i, b_j)$\n",
"`bp_beta:` $k \\times k$ matrix where entry $ij$ denotes the $\\beta$ of Hawkes process for block pair $(b_i, b_j)$\n",
"\n",
"`end_time:` (float) end_time of hawkes simulation\n",
"\n",
Expand Down
11 changes: 6 additions & 5 deletions generative_model_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import os
import pickle
import numpy as np
import dataset_utils
from os.path import join
import matplotlib.pyplot as plt
import chip_generative_model as chip
from tick.hawkes import SimuHawkesExpKernels
Expand Down Expand Up @@ -347,7 +348,7 @@ def simulate_community_hawkes(params=None, network_name=None, load_if_exists=Fal
:return: event_dict, node_membership
"""
generated_network_path = f'{dataset_utils.get_script_path()}/storage/results/generated_networks/'
generated_network_path = join(dataset_utils.get_script_path(), 'storage', 'results', 'generated_networks')

default_params = {'seed': None,
'number_of_nodes': 128,
Expand All @@ -365,8 +366,8 @@ def simulate_community_hawkes(params=None, network_name=None, load_if_exists=Fal

# Load the network if existed
if load_if_exists and network_name is not None:
if os.path.isfile(generated_network_path + network_name + ".pckl"):
with open(generated_network_path + network_name + ".pckl", 'rb') as handle:
if os.path.isfile(join(generated_network_path, f'{network_name}.pckl')):
with open(join(generated_network_path, f'{network_name}.pckl'), 'rb') as handle:
[event_dict, node_membership, params] = pickle.load(handle)

if verbose:
Expand Down Expand Up @@ -409,7 +410,7 @@ def simulate_community_hawkes(params=None, network_name=None, load_if_exists=Fal
node_membership = one_hot_to_class_assignment(node_membership)

if network_name is not None:
with open(generated_network_path + network_name + ".pckl", 'wb') as handle:
with open(join(generated_network_path, f'{network_name}.pckl'), 'wb') as handle:
pickle.dump([event_dict, node_membership, default_params], handle, protocol=pickle.HIGHEST_PROTOCOL)

return event_dict, node_membership
2 changes: 1 addition & 1 deletion model_fitting_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion parameter_estimation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import warnings
Expand Down
2 changes: 1 addition & 1 deletion plotting_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion poisson_baseline_model_fitting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
@author: Anonymous
@author: Makan Arastuie
"""

import time
Expand Down
17 changes: 9 additions & 8 deletions sim_AS2.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
Expectation: We should see accuracy increase in both these cases. When mu_diag/mu_off_diag ratio is low, the algorithms
will do poorly, but as the ratio increases there is more signal and the algorithm will do well and go all the way to 1.
@author: Anonymous
@author: Makan Arastuie
"""

import pickle
import numpy as np
from os.path import join
import matplotlib.pyplot as plt
from joblib import Parallel, delayed
import generative_model_utils as utils
Expand All @@ -23,7 +24,7 @@
from spectral_clustering import spectral_cluster


result_file_path = f'{get_script_path()}/storage/results/AS2'
result_file_path = join(get_script_path(), 'storage', 'results', 'AS2')

also_use_unweighted_adjacency = True

Expand Down Expand Up @@ -101,7 +102,7 @@ def test_spectral_clustering_on_generative_model(scalar):
mean_proportion_ones_in_adj_err.append(2 * np.std(results[:, 2]) / np.sqrt(len(results[:, 2])))

# Save results
with open(f'{result_file_path}/all_sims-{sim_type}-w-adj.pckl', 'wb') as handle:
with open(join(result_file_path, f'all_sims-{sim_type}-w-adj.pckl'), 'wb') as handle:
pickle.dump([agg_adj_mean_sc_rand_scores, agg_adj_mean_sc_rand_scores_err,
adj_mean_sc_rand_scores, adj_mean_sc_rand_scores_err,
mean_proportion_ones_in_adj,
Expand All @@ -111,18 +112,18 @@ def test_spectral_clustering_on_generative_model(scalar):
agg_adj_mean_sc_rand_scores_err.append(2 * np.std(results) / np.sqrt(len(results)))

# Save results
with open(f'{result_file_path}/all_sims-{sim_type}.pckl', 'wb') as handle:
with open(join(result_file_path, f'all_sims-{sim_type}.pckl'), 'wb') as handle:
pickle.dump([agg_adj_mean_sc_rand_scores,
agg_adj_mean_sc_rand_scores_err], handle, protocol=pickle.HIGHEST_PROTOCOL)


if also_use_unweighted_adjacency:
with open(f'{result_file_path}/all_sims-{sim_type}-w-adj.pckl', 'rb') as handle:
with open(join(result_file_path, f'all_sims-{sim_type}-w-adj.pckl'), 'rb') as handle:
[agg_adj_mean_sc_rand_scores, agg_adj_mean_sc_rand_scores_err,
adj_mean_sc_rand_scores, adj_mean_sc_rand_scores_err,
mean_proportion_ones_in_adj, mean_proportion_ones_in_adj_err] = pickle.load(handle)
else:
with open(f'{result_file_path}/all_sims-{sim_type}.pckl', 'rb') as handle:
with open(join(result_file_path, f'all_sims-{sim_type}.pckl'), 'rb') as handle:
[agg_adj_mean_sc_rand_scores, agg_adj_mean_sc_rand_scores_err] = pickle.load(handle)


Expand Down Expand Up @@ -157,7 +158,7 @@ def test_spectral_clustering_on_generative_model(scalar):

ax.autoscale_view()

plt.savefig(result_file_path + "/plots/" + plot_name + ".pdf")
plt.savefig(join(result_file_path, 'plots', f'{plot_name}.pdf'))
plt.show()
else:
w, h = plt.figaspect(.3)
Expand Down Expand Up @@ -190,5 +191,5 @@ def test_spectral_clustering_on_generative_model(scalar):

ax.autoscale_view()

plt.savefig(f"{result_file_path}/plots/agg-vs-adj-density.pdf")
plt.savefig(join(result_file_path, 'plots', 'agg-vs-adj-density.pdf'))
plt.show()
Loading

0 comments on commit 93fc639

Please sign in to comment.