diff --git a/docs/docs/datasets/eternabench.md b/docs/docs/datasets/eternabench.md
new file mode 100644
index 00000000..833d4b2f
--- /dev/null
+++ b/docs/docs/datasets/eternabench.md
@@ -0,0 +1,9 @@
+---
+authors:
+ - Zhiyuan Chen
+date: 2024-05-04
+---
+
+# EternaBench
+
+--8<-- "multimolecule/datasets/eternabench/README.md:21:"
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index 5cef7518..e6bd4a30 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -23,6 +23,7 @@ nav:
- bpRNA-spot: datasets/bprna-spot.md
- bpRNA-new: datasets/bprna-new.md
- RYOS: datasets/ryos.md
+ - EternaBench: datasets/eternabench.md
- module:
- module/index.md
- heads: module/heads.md
diff --git a/multimolecule/datasets/eternabench_cm/README.md b/multimolecule/datasets/eternabench_cm/README.md
new file mode 100644
index 00000000..7cc71faa
--- /dev/null
+++ b/multimolecule/datasets/eternabench_cm/README.md
@@ -0,0 +1,110 @@
+---
+language: rna
+tags:
+ - Biology
+ - RNA
+license:
+ - agpl-3.0
+size_categories:
+ - 1K.
+
+from __future__ import annotations
+
+import os
+
+import danling as dl
+import pandas as pd
+import torch
+
+from multimolecule.datasets.conversion_utils import ConvertConfig as ConvertConfig_
+from multimolecule.datasets.conversion_utils import save_dataset
+
+torch.manual_seed(1016)
+
+cols = [
+ "id",
+ "design",
+ "sequence",
+ "secondary_structure",
+ "reactivity",
+ "errors",
+ "signal_to_noise",
+]
+
+
+def convert_dataset_(df: pd.DataFrame):
+ df.signal_to_noise = df.signal_to_noise.str.split(":").str[-1].astype(float)
+ df = df.rename(columns={"ID": "id", "design_name": "design", "structure": "secondary_structure"})
+ df = df.sort_values("id")
+ df = df[cols]
+ return df
+
+
+def convert_dataset(convert_config):
+ train = dl.load_pandas(convert_config.train_path)
+ test = dl.load_pandas(convert_config.test_path)
+ save_dataset(convert_config, {"train": convert_dataset_(train), "test": convert_dataset_(test)})
+
+
+class ConvertConfig(ConvertConfig_):
+ root: str = os.path.dirname(__file__)
+ output_path: str = os.path.basename(os.path.dirname(__file__)).replace("_", "-")
+
+
+if __name__ == "__main__":
+ config = ConvertConfig()
+ config.parse() # type: ignore[attr-defined]
+ convert_dataset(config)
diff --git a/multimolecule/datasets/eternabench_switch/README.md b/multimolecule/datasets/eternabench_switch/README.md
new file mode 100644
index 00000000..eb3def04
--- /dev/null
+++ b/multimolecule/datasets/eternabench_switch/README.md
@@ -0,0 +1,139 @@
+---
+language: rna
+tags:
+ - Biology
+ - RNA
+license:
+ - agpl-3.0
+size_categories:
+ - 1K.
+
+from __future__ import annotations
+
+import os
+
+import danling as dl
+import pandas as pd
+import torch
+
+from multimolecule.datasets.conversion_utils import ConvertConfig as ConvertConfig_
+from multimolecule.datasets.conversion_utils import save_dataset
+
+torch.manual_seed(1016)
+
+cols = [
+ "id",
+ "design",
+ "sequence",
+ "activation_ratio",
+ "ligand",
+ "switch",
+ "kd_off",
+ "kd_on",
+ "kd_fmn",
+ "kd_no_fmn",
+ "min_kd_val",
+ "ms2_aptamer",
+ "lig_aptamer",
+ "ms2_lig_aptamer",
+ "log_kd_nolig",
+ "log_kd_lig",
+ "log_kd_nolig_scaled",
+ "log_kd_lig_scaled",
+ "log_AR",
+ "folding_subscore",
+ "num_clusters",
+]
+
+
+def convert_dataset_(df: pd.DataFrame):
+ df = df.rename(
+ columns={
+ "index": "id",
+ "Design": "design",
+ "Activation Ratio": "activation_ratio",
+ "Folding_Subscore": "folding_subscore",
+ "KDOFF": "kd_off",
+ "KDON": "kd_on",
+ "KDFMN": "kd_fmn",
+ "KDnoFMN": "kd_no_fmn",
+ "NumberOfClusters": "num_clusters",
+ "logkd_nolig": "log_kd_nolig",
+ "logkd_lig": "log_kd_lig",
+ "logkd_nolig_scaled": "log_kd_nolig_scaled",
+ "logkd_lig_scaled": "log_kd_lig_scaled",
+ "MS2_aptamer": "ms2_aptamer",
+ "MS2_lig_aptamer": "ms2_lig_aptamer",
+ }
+ )
+ df = df.sort_values("id")
+ df = df[cols]
+ return df
+
+
+def convert_dataset(convert_config):
+ train = dl.load_pandas(convert_config.train_path)
+ test = dl.load_pandas(convert_config.test_path)
+ save_dataset(convert_config, {"train": convert_dataset_(train), "test": convert_dataset_(test)})
+
+
+class ConvertConfig(ConvertConfig_):
+ root: str = os.path.dirname(__file__)
+ output_path: str = os.path.basename(os.path.dirname(__file__)).replace("_", "-")
+
+
+if __name__ == "__main__":
+ config = ConvertConfig()
+ config.parse() # type: ignore[attr-defined]
+ convert_dataset(config)