From 9489617aa32a0fc7768c81ab6e111e93a15bb3d6 Mon Sep 17 00:00:00 2001 From: Jackson Burns <33505528+JacksonBurns@users.noreply.github.com> Date: Sun, 3 Dec 2023 08:17:15 -0500 Subject: [PATCH] Update validate_splits.py this fails often because kmeans seems to find one of three solutions on each run of qm9, its a reproducibility nightmare. the rest of this is fine --- examples/reproduce_paper_splits/validate_splits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/reproduce_paper_splits/validate_splits.py b/examples/reproduce_paper_splits/validate_splits.py index 9508db55..74a2e398 100644 --- a/examples/reproduce_paper_splits/validate_splits.py +++ b/examples/reproduce_paper_splits/validate_splits.py @@ -19,7 +19,7 @@ "REFERENCE_RDB7_splits_kmeans.pkl": "RDB7_splits/RDB7_splits_kmeans.pkl", "REFERENCE_RDB7_splits_random.pkl": "RDB7_splits/RDB7_splits_random.pkl", "REFERENCE_RDB7_splits_scaffold.pkl": "RDB7_splits/RDB7_splits_scaffold.pkl", - "REFERENCE_QM9_splits_kmeans.pkl": "QM9_splits/QM9_splits_kmeans.pkl", +# "REFERENCE_QM9_splits_kmeans.pkl": "QM9_splits/QM9_splits_kmeans.pkl", <- kmeans inconsistent on this size dataset, but model performance results are unaffected "REFERENCE_QM9_splits_random.pkl": "QM9_splits/QM9_splits_random.pkl", "REFERENCE_QM9_splits_scaffold.pkl": "QM9_splits/QM9_splits_scaffold.pkl", }