Skip to content

Commit

Permalink
Update end to end sim with log N values and add both known and estima…
Browse files Browse the repository at this point in the history
…ted comm
  • Loading branch information
arastuie committed Jun 1, 2020
1 parent cdbe8ab commit dcf92ee
Showing 1 changed file with 152 additions and 160 deletions.
312 changes: 152 additions & 160 deletions sim_end_to_end_count_based_estimates.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,28 @@
@author: Makan Arastuie
"""

import os
import copy
import pickle
import numpy as np
from os.path import join
from pathlib import Path
import matplotlib.pyplot as plt
from joblib import Parallel, delayed
import generative_model_utils as utils
import model_fitting_utils as model_utils
from sklearn.metrics import adjusted_rand_score
from spectral_clustering import spectral_cluster
from parameter_estimation import estimate_hawkes_from_counts
from sklearn.linear_model import LinearRegression

result_file_path = join(os.sep, '/shared', 'Results', 'CommunityHawkes', 'pickles',
'end_to_end_count_based_estimate_2', '7_values')
Path(join(result_file_path, 'plots')).mkdir(mode=0o777, parents=True, exist_ok=True)

result_file_path = '/shared/Results/CommunityHawkes/pickles/end_to_end_count_based_estimate'

estimate_alpha_beta = True
plot_only = False

no_alpha_name = "_no_alpha" if not estimate_alpha_beta else ""
run_analysis = True
run_plotting = True
run_regression = True

# sim params
end_time = 100
Expand All @@ -37,9 +42,11 @@
beta_off_diag = 0.95

class_probs = [0.25, 0.25, 0.25, 0.25]
num_nodes_to_test = [8, 16, 32, 64, 128, 256]
# num_nodes_to_test = [8, 16, 32, 64, 128, 256]
# num_nodes_to_test = [16, 32, 64, 128, 256, 512]
num_simulations = 100
num_nodes_to_test = np.logspace(4, 7, num=7, dtype=np.int32, base=2)
# num_nodes_to_test = np.logspace(4, 5, num=4, dtype=np.int32, base=2)
num_simulations = 6
n_cores = 6
n_classes = len(class_probs)

Expand All @@ -64,20 +71,18 @@ def calc_mean_and_error_of_count_estiamte(n_nodes):
sc_rand = adjusted_rand_score(true_node_membership, node_membership)
sc_rand = np.zeros((n_classes, n_classes)) + sc_rand # match the shape of other params to retrieve easily

if estimate_alpha_beta:
bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio = model_utils.estimate_bp_hawkes_params(event_dict,
node_membership,
end_time,
n_classes)
return bp_mu, bp_alpha_beta_ratio, bp_alpha, bp_beta, sc_rand

agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes, event_dict)
bp_mu, bp_alpha_beta_ratio = estimate_hawkes_from_counts(agg_adj, node_membership, end_time, 1e-10 / end_time)
# param estimation with estimated communities
bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio = model_utils.estimate_bp_hawkes_params(event_dict,
node_membership,
end_time,
n_classes)
# param estimation with known communities. k_ is for known_
k_bp_mu, k_bp_alpha, k_bp_beta, k_bp_alpha_beta_ratio = model_utils.estimate_bp_hawkes_params(event_dict,
true_node_membership,
end_time,
n_classes)
return bp_mu, bp_alpha_beta_ratio, bp_alpha, bp_beta, sc_rand, k_bp_mu, k_bp_alpha_beta_ratio, k_bp_alpha, k_bp_beta

return bp_mu, bp_alpha_beta_ratio, sc_rand


no_alpha_name = "_no_alpha" if not estimate_alpha_beta else ""

true_mu = np.zeros((n_classes, n_classes)) + mu_off_diag
np.fill_diagonal(true_mu, mu_diag)
Expand All @@ -90,18 +95,25 @@ def calc_mean_and_error_of_count_estiamte(n_nodes):

true_ratio = true_alpha / true_beta

rand_mean = []
rand_mean_err = []
mu_mse = []
mu_mse_err = []
ratio_mse = []
ratio_mse_err = []

alpha_mse = []
alpha_mse_err = []
beta_mse = []
beta_mse_err = []
if not plot_only:
params = {
'mu': ['mu_mse', 'mu_mse_err'],
'm': ['ratio_mse', 'ratio_mse_err'],
'alpha': ['alpha_mse', 'alpha_mse_err'],
'beta': ['beta_mse', 'beta_mse_err']
}

# known_communities_error
kce = {}
for param, err in params.items():
kce[err[0]] = []
kce[err[1]] = []

# estimated_communities_error
ece = copy.deepcopy(kce)
ece['rand_mean'] = []
ece['rand_mean_err'] = []

if run_analysis:
for j, n_nodes in enumerate(num_nodes_to_test):
results = Parallel(n_jobs=n_cores)(delayed(calc_mean_and_error_of_count_estiamte)
(n_nodes) for i in range(num_simulations))
Expand All @@ -110,146 +122,126 @@ def calc_mean_and_error_of_count_estiamte(n_nodes):

results = np.asarray(results, dtype=np.float)

if estimate_alpha_beta:
results = np.reshape(results, (num_simulations, 5, n_classes, n_classes))
else:
results = np.reshape(results, (num_simulations, 3, n_classes, n_classes))
results = np.reshape(results, (num_simulations, 9, n_classes, n_classes))

# estimated communities
mu_mse_temp = np.power(results[:, 0, :, :] - true_mu, 2)
mu_mse.append(np.mean(mu_mse_temp))
mu_mse_err.append(2 * np.std(mu_mse_temp) / np.sqrt(len(mu_mse_temp)))
ece['mu_mse'].append(np.mean(mu_mse_temp))
ece['mu_mse_err'].append(2 * np.std(mu_mse_temp) / np.sqrt(mu_mse_temp.size))

ratio_mse_temp = np.power(results[:, 1, :, :] - true_ratio, 2)
ratio_mse.append(np.mean(ratio_mse_temp))
ratio_mse_err.append(2 * np.std(ratio_mse_temp) / np.sqrt(ratio_mse_temp.size))
ece['ratio_mse'].append(np.mean(ratio_mse_temp))
ece['ratio_mse_err'].append(2 * np.std(ratio_mse_temp) / np.sqrt(ratio_mse_temp.size))

alpha_mse_temp = np.power(results[:, 2, :, :] - true_alpha, 2)
ece['alpha_mse'].append(np.mean(alpha_mse_temp))
ece['alpha_mse_err'].append(2 * np.std(alpha_mse_temp) / np.sqrt(alpha_mse_temp.size))

beta_mse_temp = np.power(results[:, 3, :, :] - true_beta, 2)
ece['beta_mse'].append(np.mean(beta_mse_temp))
ece['beta_mse_err'].append(2 * np.std(beta_mse_temp) / np.sqrt(beta_mse_temp.size))

rand_mean_temp = np.mean(results[:, 4, 0, 0])
rand_mean.append(rand_mean_temp)
rand_mean_err.append(2 * np.std(results[:, 4, 0, 0]) / np.sqrt(results[:, 4, 0, 0].size))

if estimate_alpha_beta:
alpha_mse_temp = np.power(results[:, 2, :, :] - true_alpha, 2)
alpha_mse.append(np.mean(alpha_mse_temp))
alpha_mse_err.append(2 * np.std(alpha_mse_temp) / np.sqrt(alpha_mse_temp.size))

beta_mse_temp = np.power(results[:, 3, :, :] - true_beta, 2)
beta_mse.append(np.mean(beta_mse_temp))
beta_mse_err.append(2 * np.std(beta_mse_temp) / np.sqrt(beta_mse_temp.size))

if estimate_alpha_beta:
with open(f'{result_file_path}/mses.pckl', 'wb') as handle:
pickle.dump([rand_mean, rand_mean_err, mu_mse, mu_mse_err, ratio_mse, ratio_mse_err,
alpha_mse, alpha_mse_err, beta_mse, beta_mse_err], handle, protocol=pickle.HIGHEST_PROTOCOL)
else:
with open(f'{result_file_path}/mses_no_alpha.pckl', 'wb') as handle:
pickle.dump([rand_mean, rand_mean_err, mu_mse, mu_mse_err,
ratio_mse, ratio_mse_err], handle, protocol=pickle.HIGHEST_PROTOCOL)


if estimate_alpha_beta:
with open(f'{result_file_path}/mses.pckl', 'rb') as handle:
[rand_mean, rand_mean_err, mu_mse, mu_mse_err, ratio_mse, ratio_mse_err,
alpha_mse, alpha_mse_err, beta_mse, beta_mse_err] = pickle.load(handle)
else:
with open(f'{result_file_path}/mses_no_alpha.pckl', 'rb') as handle:
rand_mean, rand_mean_err, mu_mse, mu_mse_err, ratio_mse, ratio_mse_err = pickle.load(handle)


print("Rand Mean:")
print(rand_mean)

print("Mu MSE:")
print(mu_mse)

print("\nRatio MSE:")
print(ratio_mse)

if estimate_alpha_beta:
print("\nAlpha MSE:")
print(alpha_mse)

print("\nBeta MSE:")
print(beta_mse)


# plt.title("MSE of Mu estimate using count matrix")
plt.ion()
plt.subplots(figsize=(3.8, 3))
plt.bar(range(len(num_nodes_to_test)), mu_mse, yerr=mu_mse_err, log=True)
plt.xlabel("Number of Nodes", fontsize=16)
plt.ylabel("Mean-squared Error", fontsize=16)
plt.xticks(range(len(num_nodes_to_test)), num_nodes_to_test)
plt.tick_params(labelsize=12)
plt.tight_layout()
#plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0), fontsize=16)
#plt.autoscale()
plt.savefig(f"{result_file_path}/plots/consistent_mu_mse.pdf")
#plt.show()

#plt.clf()

# plt.title("MSE of m estimate using count matrix")
plt.subplots(figsize=(3.8, 3))
plt.bar(range(len(num_nodes_to_test)), ratio_mse, yerr=ratio_mse_err, log=True)
plt.xlabel("Number of Nodes", fontsize=16)
plt.ylabel("Mean-squared Error", fontsize=16)
plt.xticks(range(len(num_nodes_to_test)), num_nodes_to_test)
plt.tick_params(labelsize=12)
plt.tight_layout()
#plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0), fontsize=16)
#plt.autoscale()
plt.savefig(f"{result_file_path}/plots/consistent_m_mse.pdf")
#plt.show()

#plt.clf()


plt.subplots(figsize=(3.8, 3))
plt.bar(range(len(num_nodes_to_test)), rand_mean, yerr=rand_mean_err)
# plt.title("MSE of beta estimate using count matrix")
plt.xlabel("Number of Nodes", fontsize=16)
plt.ylabel("Mean Adjusted Rand Score", fontsize=16)

plt.xticks(range(len(num_nodes_to_test)), num_nodes_to_test)
plt.tick_params(labelsize=12)
plt.tight_layout()
# plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0), fontsize=16)

plt.savefig(f"{result_file_path}/plots/consistent_rand_mean.pdf")
# plt.show()


if estimate_alpha_beta:
plt.subplots(figsize=(3.8, 3))
plt.bar(range(len(num_nodes_to_test)), alpha_mse, yerr=alpha_mse_err, log=True)
# plt.title("MSE of alpha estimate using count matrix")
plt.xlabel("Number of Nodes", fontsize=16)
plt.ylabel("Mean-squared Error", fontsize=16)
ece['rand_mean'].append(rand_mean_temp)
ece['rand_mean_err'].append(2 * np.std(results[:, 4, 0, 0]) / np.sqrt(results[:, 4, 0, 0].size))

plt.xticks(range(len(num_nodes_to_test)), num_nodes_to_test)
plt.tick_params(labelsize=12)
plt.tight_layout()
#plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0), fontsize=16)
# known communities
mu_mse_temp = np.power(results[:, 5, :, :] - true_mu, 2)
kce['mu_mse'].append(np.mean(mu_mse_temp))
kce['mu_mse_err'].append(2 * np.std(mu_mse_temp) / np.sqrt(mu_mse_temp.size))

plt.savefig(f"{result_file_path}/plots/consistent_alpha_mse.pdf")
#plt.show()
ratio_mse_temp = np.power(results[:, 6, :, :] - true_ratio, 2)
kce['ratio_mse'].append(np.mean(ratio_mse_temp))
kce['ratio_mse_err'].append(2 * np.std(ratio_mse_temp) / np.sqrt(ratio_mse_temp.size))

#plt.clf()
alpha_mse_temp = np.power(results[:, 7, :, :] - true_alpha, 2)
kce['alpha_mse'].append(np.mean(alpha_mse_temp))
kce['alpha_mse_err'].append(2 * np.std(alpha_mse_temp) / np.sqrt(alpha_mse_temp.size))

beta_mse_temp = np.power(results[:, 8, :, :] - true_beta, 2)
kce['beta_mse'].append(np.mean(beta_mse_temp))
kce['beta_mse_err'].append(2 * np.std(beta_mse_temp) / np.sqrt(beta_mse_temp.size))

with open(f'{result_file_path}/mses.pckl', 'wb') as handle:
pickle.dump([ece, kce], handle, protocol=pickle.HIGHEST_PROTOCOL)


with open(f'{result_file_path}/mses.pckl', 'rb') as handle:
ece, kce = pickle.load(handle)


print("Estimated communities:")
print('Rand mean:')
print(ece['rand_mean'])
for param, err in params.items():
print(param, "MSE:")
print(ece[err[0]])

print("\nKnown communities:")
for param, err in params.items():
print(param, "MSE:")
print(kce[err[0]])


if run_plotting:
# rand index for estimated communities
plt.ion()
plt.subplots(figsize=(3.8, 3))
plt.bar(range(len(num_nodes_to_test)), beta_mse, yerr=beta_mse_err, log=True)
# plt.title("MSE of beta estimate using count matrix")
plt.bar(range(len(num_nodes_to_test)), ece['rand_mean'], yerr=ece['rand_mean_err'], log=True)
plt.xlabel("Number of Nodes", fontsize=16)
plt.ylabel("Mean-squared Error", fontsize=16)

plt.xticks(range(len(num_nodes_to_test)), num_nodes_to_test)
plt.tick_params(labelsize=12)
plt.tight_layout()
#plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0), fontsize=16)

plt.savefig(f"{result_file_path}/plots/consistent_beta_mse.pdf")
#plt.show()



# [0.6320759231955095, 0.9677787392115078, 1.0, 1.0, 1.0, 1.0]
plt.savefig(f"{result_file_path}/plots/known_consistent_rand_mean.pdf")

for param, err in params.items():
# estimated communities
mse, mse_err = err[0], err[1]
plt.ion()
plt.subplots(figsize=(3.8, 3))
plt.bar(range(len(num_nodes_to_test)), ece[mse], yerr=ece[mse_err], log=True)
plt.xlabel("Number of Nodes", fontsize=16)
plt.ylabel("Mean-squared Error", fontsize=16)
plt.xticks(range(len(num_nodes_to_test)), num_nodes_to_test)
plt.tick_params(labelsize=12)
plt.tight_layout()
plt.savefig(f"{result_file_path}/plots/estimated_consistent_{param}_mse.pdf")

# known communities
plt.ion()
plt.subplots(figsize=(3.8, 3))
plt.bar(range(len(num_nodes_to_test)), kce[mse], yerr=kce[mse_err], log=True)
plt.xlabel("Number of Nodes", fontsize=16)
plt.ylabel("Mean-squared Error", fontsize=16)
plt.xticks(range(len(num_nodes_to_test)), num_nodes_to_test)
plt.tick_params(labelsize=12)
plt.tight_layout()
plt.savefig(f"{result_file_path}/plots/known_consistent_{param}_mse.pdf")


if run_regression:
print("\nRegression: \n")
print("Estimated Communities:\n")
for param, err in params.items():
print(param, '(estimated communities)')
x = np.log(num_nodes_to_test).reshape(len(num_nodes_to_test), 1)
y = np.log(ece[err[0]]).reshape(len(ece[err[0]]), 1)

reg = LinearRegression().fit(x, y)

print("R^2:", reg.score(x, y))
print("coef:", reg.coef_)
print('intercept:', reg.intercept_)
print()

print(param, '(known communities)')
x = np.log(num_nodes_to_test).reshape(len(num_nodes_to_test), 1)
y = np.log(kce[err[0]]).reshape(len(kce[err[0]]), 1)

reg = LinearRegression().fit(x, y)

print("R^2:", reg.score(x, y))
print("coef:", reg.coef_)
print('intercept:', reg.intercept_)
print()

0 comments on commit dcf92ee

Please sign in to comment.