Skip to content

Commit

Permalink
Exploring data about the singular values
Browse files Browse the repository at this point in the history
  • Loading branch information
RylanSchaeffer committed Sep 29, 2023
1 parent d6ce353 commit fdd661e
Show file tree
Hide file tree
Showing 30 changed files with 75 additions and 42 deletions.
117 changes: 75 additions & 42 deletions linear_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
ideal_regr.fit(X_all, y_all)

dataset_loss_list = []
singular_modes_data_list = []
for repeat_idx in range(num_repeats):
# subset_sizes = np.arange(10, X_train.shape[0], X_train.shape[0] // 20)
subset_sizes = np.arange(1, 50, 1)
Expand Down Expand Up @@ -104,6 +105,7 @@
"Smallest Non-Zero Singular Value": min_singular_value,
"Fraction Outside": fraction_outside,
}
dataset_loss_list.append(dataset_loss_results)

U, S, Vh = np.linalg.svd(X_train, full_matrices=False)
# Shape: (num test data, num singular modes)
Expand All @@ -114,16 +116,19 @@
term_three_average_per_mode = np.abs(np.mean(term_three, axis=1))

for mode_idx in range(term_two_average_per_mode.shape[0]):
dataset_loss_results[
f"Term Two Mode={mode_idx+1}"
] = term_two_average_per_mode[mode_idx]
dataset_loss_results[
f"Term Three Mode={mode_idx+1}"
] = term_three_average_per_mode[mode_idx]

dataset_loss_list.append(dataset_loss_results)
singular_modes_data = {
"Dataset": dataset_name,
"Subset Size": subset_size,
"Repeat Index": repeat_idx,
"Singular Index": mode_idx + 1,
"Singular Index From Smallest": len(S) - mode_idx + 1,
"Term Two": term_two_average_per_mode[mode_idx],
"Term Three": term_three_average_per_mode[mode_idx],
}
singular_modes_data_list.append(singular_modes_data)

dataset_loss_df = pd.DataFrame(dataset_loss_list)
singular_modes_df = pd.DataFrame(singular_modes_data_list)

plt.close()
plt.figure(figsize=(6, 5))
Expand Down Expand Up @@ -159,12 +164,13 @@
)
plt.ylim(bottom=ymin, top=ymax)
plt.legend()
plt.tight_layout()
plt.savefig(
os.path.join(dataset_results_dir, f"double_descent_dataset"),
bbox_inches="tight",
dpi=300,
)
plt.show()
# plt.show()

plt.close()
plt.figure(figsize=(6, 5))
Expand All @@ -182,6 +188,7 @@
plt.title(title)
plt.yscale("log")
plt.legend()
plt.tight_layout()
plt.savefig(
os.path.join(
dataset_results_dir,
Expand All @@ -190,8 +197,7 @@
bbox_inches="tight",
dpi=300,
)
plt.show()
plt.close()
# plt.show()

plt.close()
# Set figure size
Expand All @@ -212,6 +218,7 @@
)
plt.title(title)
plt.yscale("log")
plt.tight_layout()
plt.savefig(
os.path.join(
dataset_results_dir,
Expand All @@ -220,27 +227,40 @@
bbox_inches="tight",
dpi=300,
)
plt.show()
term_two_columns = [
col for col in dataset_loss_df.columns.values if col.startswith("Term Two")
]
term_two_df = dataset_loss_df[
["Dataset", "Subset Size", "Repeat Index"] + term_two_columns
]
term_two_melted_df = term_two_df.melt(
id_vars=["Dataset", "Subset Size", "Repeat Index"],
var_name="Singular Mode",
value_name="Term Two",
# plt.show()

plt.close()
g = sns.lineplot(
data=singular_modes_df,
x="Subset Size",
y="Term Two",
hue="Singular Index",
)
term_two_melted_df["Singular Mode"] = term_two_melted_df["Singular Mode"].apply(
lambda x: int(x.split("=")[1])
sns.move_legend(g, "upper left", bbox_to_anchor=(1, 1))
plt.xlabel("Num. Training Samples")
plt.ylabel(r"$|\; \vec{x}_{test} \cdot \vec{v}_r \; |$")
plt.axvline(
x=X_all.shape[1], color="black", linestyle="--", label="Interpolation Threshold"
)
plt.yscale("log")
plt.title(title)
plt.tight_layout()
plt.savefig(
os.path.join(
dataset_results_dir,
f"term_two_singular_mode_contributions_indexed_from_leading",
),
bbox_inches="tight",
dpi=300,
)
# plt.show()

plt.close()
g = sns.lineplot(
data=term_two_melted_df,
data=singular_modes_df,
x="Subset Size",
y="Term Two",
hue="Singular Mode",
hue="Singular Index From Smallest",
)
sns.move_legend(g, "upper left", bbox_to_anchor=(1, 1))
plt.xlabel("Num. Training Samples")
Expand All @@ -250,36 +270,48 @@
)
plt.yscale("log")
plt.title(title)
plt.tight_layout()
plt.savefig(
os.path.join(
dataset_results_dir,
f"term_two_singular_mode_contributions",
f"term_two_singular_mode_contributions_indexed_from_smallest",
),
bbox_inches="tight",
dpi=300,
)
# plt.show()

term_three_columns = [
col for col in dataset_loss_df.columns.values if col.startswith("Term Three")
]
term_three_df = dataset_loss_df[
["Dataset", "Subset Size", "Repeat Index"] + term_three_columns
]
term_three_melted_df = term_three_df.melt(
id_vars=["Dataset", "Subset Size", "Repeat Index"],
var_name="Singular Mode",
value_name="Term Three",
plt.close()
g = sns.lineplot(
data=singular_modes_df,
x="Subset Size",
y="Term Three",
hue="Singular Index",
)
term_three_melted_df["Singular Mode"] = term_three_melted_df["Singular Mode"].apply(
lambda x: int(x.split("=")[1])
sns.move_legend(g, "upper left", bbox_to_anchor=(1, 1))
plt.xlabel("Num. Training Samples")
plt.ylabel(r"$|\; \vec{u}_R \cdot E \; |$")
plt.axvline(
x=X_all.shape[1], color="black", linestyle="--", label="Interpolation Threshold"
)
plt.title(title)
plt.tight_layout()
plt.savefig(
os.path.join(
dataset_results_dir,
f"term_three_singular_mode_contributions_indexed_from_leading",
),
bbox_inches="tight",
dpi=300,
)
# plt.show()

plt.close()
g = sns.lineplot(
data=term_three_melted_df,
data=singular_modes_df,
x="Subset Size",
y="Term Three",
hue="Singular Mode",
hue="Singular Index From Smallest",
)
sns.move_legend(g, "upper left", bbox_to_anchor=(1, 1))
plt.xlabel("Num. Training Samples")
Expand All @@ -288,10 +320,11 @@
x=X_all.shape[1], color="black", linestyle="--", label="Interpolation Threshold"
)
plt.title(title)
plt.tight_layout()
plt.savefig(
os.path.join(
dataset_results_dir,
f"term_three_singular_mode_contributions",
f"term_three_singular_mode_contributions_indexed_from_smallest",
),
bbox_inches="tight",
dpi=300,
Expand Down
Binary file modified results/real_data/California Housing/double_descent_dataset.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified results/real_data/Diabetes/double_descent_dataset.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified results/real_data/Student-Teacher/double_descent_dataset.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit fdd661e

Please sign in to comment.