Skip to content

Commit

Permalink
Before code clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
arastuie committed Jun 11, 2020
1 parent 8ae5df1 commit f56bbcf
Show file tree
Hide file tree
Showing 11 changed files with 640 additions and 33 deletions.
31 changes: 21 additions & 10 deletions bhm_model_fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,22 +105,33 @@ def fit_and_eval_block_hawkes(train_tuple, test_tuple, combined_tuple, nodes_not

# Running Block Hawkes model on Facebook, Enron, Reality Mining, and simulated data
if __name__ == "__main__":
# Entire Facebook Dataset
print("Entire Facebook wall-post dataset")
fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train = \
dataset_utils.load_facebook_wall(timestamp_max=1000, largest_connected_component_only=True, train_percentage=0.8)
fit_and_eval_block_hawkes(fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train,
local_search_max_iter=500, local_search_n_cores=25,
k_values_to_test=[1],
plot_fitted_hist=False, verbose=False)
pass
# # Entire Facebook Dataset
# print("Entire Facebook wall-post dataset 2")
# fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train = \
# dataset_utils.load_facebook_wall_2(timestamp_max=1000, largest_connected_component_only=True,
# train_percentage=0.8, remove_nodes_not_in_train=True)
# fit_and_eval_block_hawkes(fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train,
# local_search_max_iter=500, local_search_n_cores=25,
# k_values_to_test=[1],
# plot_fitted_hist=False, verbose=False)

# # Entire Facebook Dataset
# print("Facebook wall-post dataset")
# fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train = \
# dataset_utils.load_facebook_wall(timestamp_max=1000, largest_connected_component_only=True, train_percentage=0.8)
# fit_and_eval_block_hawkes(fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train,
# local_search_max_iter=500, local_search_n_cores=25,
# k_values_to_test=[1],
# plot_fitted_hist=False, verbose=False)

# # Facebook Dataset
# print("Facebook wall-post dataset")
# fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train = \
# dataset_utils.load_fb_train_test(remove_nodes_not_in_train=False)
# dataset_utils.load_fb_train_test(remove_nodes_not_in_train=True)
# fit_and_eval_block_hawkes(fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train,
# local_search_max_iter=500, local_search_n_cores=25,
# k_values_to_test=[3],
# k_values_to_test=[1, 2, 3],
# plot_fitted_hist=False, verbose=False)

# # Enron Dataset
Expand Down
21 changes: 15 additions & 6 deletions chip_model_fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def fit_and_eval_community_hawkes(train_tuple, test_tuple, combined_tuple, nodes
print(f"K: {num_classes} - Train ll: {train_log_likelihood / train_n_events:.4f}", end=' - ')
print(f"Test ll: {ll_per_event:.3f} - Took: {toc - tic:.2f}s")


if plot_fitted_hist:
model_utils.generate_fit_community_hawkes(train_event_dict, train_node_membership,
train_bp_mu, train_bp_alpha, train_bp_beta,
Expand All @@ -96,21 +97,29 @@ def fit_and_eval_community_hawkes(train_tuple, test_tuple, combined_tuple, nodes

# Examples of fitting CHIP to Facebook, Enron, Reality Mining and simulated data.
if __name__ == "__main__":

# Entire Facebook Dataset
print("Facebook wall-post dataset")
# # Entire Facebook Dataset
print("Entire Facebook wall-post dataset 2")
fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train = \
dataset_utils.load_facebook_wall(timestamp_max=1000, largest_connected_component_only=True, train_percentage=0.8)
dataset_utils.load_facebook_wall_2(timestamp_max=1000, largest_connected_component_only=True,
train_percentage=0.8, remove_nodes_not_in_train=True)
fit_and_eval_community_hawkes(fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train,
k_values_to_test=[20, 50, 100, 200, 500],
k_values_to_test=np.arange(51, 101),
plot_fitted_hist=False, verbose=False)

# # # Entire Facebook Dataset
# print("Entire Facebook wall-post dataset")
# fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train = \
# dataset_utils.load_facebook_wall(timestamp_max=1000, largest_connected_component_only=True, train_percentage=0.8)
# fit_and_eval_community_hawkes(fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train,
# k_values_to_test=[9],
# plot_fitted_hist=False, verbose=False)

# # Facebook Dataset
# print("Facebook wall-post dataset")
# fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train = \
# dataset_utils.load_fb_train_test(remove_nodes_not_in_train=False)
# fit_and_eval_community_hawkes(fb_train_tuple, fb_test_tuple, fb_combined_tuple, fb_nodes_not_in_train,
# k_values_to_test=np.arange(1, 11),
# k_values_to_test=[6],
# plot_fitted_hist=False, verbose=False)

# # Enron Dataset
Expand Down
77 changes: 76 additions & 1 deletion dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,10 @@ def load_facebook_wall(timestamp_max=1000, largest_connected_component_only=Fals
# Scale timestamps to 0 to timestamp_max
data[:, 2] = (data[:, 2] - min(data[:, 2])) / (max(data[:, 2]) - min(data[:, 2])) * timestamp_max


if train_percentage is not None:
return split_event_list_to_train_test(data, train_percentage=train_percentage)

duration = data[-1, 2]

node_set = set(data[:, 0].astype(np.int)).union(data[:, 1].astype(np.int))
Expand All @@ -441,6 +442,80 @@ def load_facebook_wall(timestamp_max=1000, largest_connected_component_only=Fals
return event_dict, len(node_set), duration


def load_facebook_wall_2(timestamp_max=1000, largest_connected_component_only=False, train_percentage=None,
plot_growth=False, remove_nodes_not_in_train=False):
"""
:param timestamp_max: The time unit of the last timestamp. Used to scale all other timestamps.
:param largest_connected_component_only: if True, only the largest connected component will be loaded.
:param train_percentage: If None, returns the entire dataset as a single dataset, else returns a train/test/combined
dataset based on the train_percentage.
"""

file_path = '/shared/DataSets/FacebookViswanath2009/raw/facebook-wall.txt'

# receiver_id sender_id unix_timestamp
data = np.loadtxt(file_path, np.float)

# remove data during the first half
data = data[data[:, 2].argsort()]
data[:, 2] = data[:, 2] - data[0, 2]
data[:, 2] = (data[:, 2] - min(data[:, 2])) / (max(data[:, 2]) - min(data[:, 2])) * timestamp_max

data = data[np.where(data[:, 2] >= 500)[0], :]
data = data[np.where(data[:, 2] <= 900)[0], :]

# remove self-edges
data = data[np.where(data[:, 0] != data[:, 1])[0], :]

if largest_connected_component_only:
# finding the nodes in the largest connected component
fb_net = nx.Graph()
for i in range(data.shape[0]):
fb_net.add_edge(data[i, 1], data[i, 0])

largest_cc = max(nx.connected_components(fb_net), key=len)
edge_idx_in_largest_cc = np.array([node_id in largest_cc for node_id in data[:, 0]])
data = data[edge_idx_in_largest_cc, :]

# Sorting by unix_timestamp and adjusting first timestamp to start from 0
data = data[data[:, 2].argsort()]
data[:, 2] = data[:, 2] - data[0, 2]

if timestamp_max is not None:
# Scale timestamps to 0 to timestamp_max
data[:, 2] = (data[:, 2] - min(data[:, 2])) / (max(data[:, 2]) - min(data[:, 2])) * timestamp_max

duration = data[-1, 2]

if plot_growth:
cum_event_count = [np.sum(data[:, 2] < t) for t in range(int(duration) + 1)]
plt.plot(np.arange(int(duration) + 1), cum_event_count)
plt.ylabel('Cumulative Event Count')
plt.xlabel('Duration')
# plt.savefig(f"/shared/Results/CommunityHawkes/pickles/full_fb_fit/plots/fb_growth.pdf")
plt.show()

if train_percentage is not None:
return split_event_list_to_train_test(data, train_percentage=train_percentage,
remove_nodes_not_in_train=remove_nodes_not_in_train)


node_set = set(data[:, 0].astype(np.int)).union(data[:, 1].astype(np.int))
node_id_map = get_node_map(node_set)

event_dict = {}
for i in range(data.shape[0]):
receiver_id = node_id_map[np.int(data[i, 0])]
sender_id = node_id_map[np.int(data[i, 1])]

if (sender_id, receiver_id) not in event_dict:
event_dict[(sender_id, receiver_id)] = []

event_dict[(sender_id, receiver_id)].append(data[i, 2])

return event_dict, len(node_set), duration


# Various examples of loading datasets
if __name__ == '__main__':

Expand Down
39 changes: 35 additions & 4 deletions full_fb_wall_post_model_fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
plot_hawkes_params = False
plot_node_membership = False
plot_num_events = False
plot_community_structure = True
simulate_chip = False
get_confidence_intervals = False
verbose = False
Expand Down Expand Up @@ -199,9 +200,9 @@
labels = np.arange(1, num_classes + 1)
im, _ = heatmap(hawkes_params[param], labels, labels, ax=ax, cmap="Greys", cbarlabel=cbar_label[param])

# ax.set_title(f"Full Facebook wall-posts {param.capitalize()}")
ax.set_title(f"Full Facebook wall-posts {param.capitalize()}")
fig.tight_layout()
plt.savefig(f"{result_file_path}/plots/{param}-k-{num_classes}.pdf")
# plt.savefig(f"{result_file_path}/plots/{param}-k-{num_classes}.pdf")
plt.show()

# plot m
Expand Down Expand Up @@ -332,5 +333,35 @@
# Total computation time: 168.6s




if plot_community_structure:
# adj = utils.event_dict_to_adjacency(fb_num_node, fb_event_dict, dtype=np.int)
num_nodes = len(node_membership)
community_membership = utils.node_membership_to_community_membership(node_membership, num_classes)
community_size = [len(community) for community in community_membership]
node_ids = np.concatenate(community_membership)
sorting_map = {}
for i in range(node_ids.shape[0]):
sorting_map[node_ids[i]] = i

sorted_adj = np.zeros((num_nodes, num_nodes), dtype=np.int)

for (u, v), event_times in fb_event_dict.items():
if len(event_times) != 0:
sorted_adj[sorting_map[u], sorting_map[v]] = 1

#Plot adjacency matrix in toned-down black and white
plt.spy(sorted_adj, marker='.', markersize=0.1, precision=0)
cumulative_community_size = 0
for com_size in community_size:
cumulative_community_size += com_size
plt.axhline(cumulative_community_size, color='black', linewidth=1)
plt.axvline(cumulative_community_size, color='black', linewidth=1)

# plt.xticks(rotation=45)
ticks = np.arange(0, num_nodes, 5000)
plt.yticks(ticks, [f'{int(t / 1000)}{"K" if t >= 1000 else ""}' for t in ticks], fontsize=13)
plt.xticks(ticks, [f'{int(t / 1000)}{"K" if t >= 1000 else ""}' for t in ticks], fontsize=13)
plt.tight_layout()
# plt.show()
plt.savefig(f"{result_file_path}/plots/community-structure-k-{num_classes}.png", format='png', dpi=200)
# plt.savefig(f"{result_file_path}/plots/community-structure-k-{num_classes}.pdf", format='pdf')
1 change: 0 additions & 1 deletion full_fb_wall_post_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
print("Train: ", "Num Nodes:", train_num_nodes, "Duration:", train_duration, "Num Edges:", train_num_events)
print("Test: ", "Num Nodes:", test_num_nodes, "Duration:", test_duration, "Num Edges:", test_num_events)


# fit Facebook Wall-posts
if fit_chip:
tic = time.time()
Expand Down
Loading

0 comments on commit f56bbcf

Please sign in to comment.