From 0ac78812f09c56f5d93f6e933ce08e726f2ea66e Mon Sep 17 00:00:00 2001 From: denkle Date: Sat, 31 Dec 2022 00:56:12 +0100 Subject: [PATCH] Add 20 datasets from UCI repository (#103) * Add new datasets * [github-action] formatting fixes * Add tqdm dependency Co-authored-by: github-actions[bot] Co-authored-by: mikeheddes --- docs/datasets.rst | 19 +++++++- docs/requirements.txt | 1 + torchhd/datasets/__init__.py | 36 ++++++++++++++ torchhd/datasets/acute_inflammation.py | 31 ++++++++++++ torchhd/datasets/acute_nephritis.py | 31 ++++++++++++ torchhd/datasets/annealing.py | 30 ++++++++++++ torchhd/datasets/arrhythmia.py | 42 +++++++++++++++++ torchhd/datasets/audiology_std.py | 43 +++++++++++++++++ torchhd/datasets/balance_scale.py | 32 +++++++++++++ torchhd/datasets/balloons.py | 31 ++++++++++++ torchhd/datasets/bank.py | 31 ++++++++++++ torchhd/datasets/blood.py | 31 ++++++++++++ torchhd/datasets/breast_cancer.py | 31 ++++++++++++ torchhd/datasets/breast_cancer_wisc.py | 31 ++++++++++++ torchhd/datasets/breast_cancer_wisc_diag.py | 31 ++++++++++++ torchhd/datasets/breast_cancer_wisc_prog.py | 31 ++++++++++++ torchhd/datasets/breast_tissue.py | 35 ++++++++++++++ torchhd/datasets/car.py | 33 +++++++++++++ torchhd/datasets/cardiotocography_10clases.py | 39 +++++++++++++++ torchhd/datasets/cardiotocography_3clases.py | 32 +++++++++++++ torchhd/datasets/chess_krvk.py | 47 +++++++++++++++++++ 21 files changed, 667 insertions(+), 1 deletion(-) create mode 100644 torchhd/datasets/acute_inflammation.py create mode 100644 torchhd/datasets/acute_nephritis.py create mode 100644 torchhd/datasets/annealing.py create mode 100644 torchhd/datasets/arrhythmia.py create mode 100644 torchhd/datasets/audiology_std.py create mode 100644 torchhd/datasets/balance_scale.py create mode 100644 torchhd/datasets/balloons.py create mode 100644 torchhd/datasets/bank.py create mode 100644 torchhd/datasets/blood.py create mode 100644 torchhd/datasets/breast_cancer.py create mode 100644 torchhd/datasets/breast_cancer_wisc.py create mode 100644 torchhd/datasets/breast_cancer_wisc_diag.py create mode 100644 torchhd/datasets/breast_cancer_wisc_prog.py create mode 100644 torchhd/datasets/breast_tissue.py create mode 100644 torchhd/datasets/car.py create mode 100644 torchhd/datasets/cardiotocography_10clases.py create mode 100644 torchhd/datasets/cardiotocography_3clases.py create mode 100644 torchhd/datasets/chess_krvk.py diff --git a/docs/datasets.rst b/docs/datasets.rst index 40e7276c..4c81dce2 100644 --- a/docs/datasets.rst +++ b/docs/datasets.rst @@ -21,7 +21,24 @@ The Torchhd library provides many popular built-in datasets to work with. CyclePowerPlant Abalone Adult - + AcuteInflammation + AcuteNephritis + Annealing + Arrhythmia + AudiologyStd + BalanceScale + Balloons + Bank + Blood + BreastCancer + BreastCancerWisc + BreastCancerWiscDiag + BreastCancerWiscProg + BreastTissue + Car + Cardiotocography3Clases + Cardiotocography10Clases + ChessKrvk Base classes ------------------------ diff --git a/docs/requirements.txt b/docs/requirements.txt index 0598df95..29fb1327 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,6 +2,7 @@ torch torchvision pandas requests +tqdm numpy sphinx sphinx-rtd-theme \ No newline at end of file diff --git a/torchhd/datasets/__init__.py b/torchhd/datasets/__init__.py index 773f4f79..59d12d5a 100644 --- a/torchhd/datasets/__init__.py +++ b/torchhd/datasets/__init__.py @@ -11,6 +11,24 @@ from torchhd.datasets.dataset import DatasetTrainTest from torchhd.datasets.abalone import Abalone from torchhd.datasets.adult import Adult +from torchhd.datasets.acute_inflammation import AcuteInflammation +from torchhd.datasets.acute_nephritis import AcuteNephritis +from torchhd.datasets.annealing import Annealing +from torchhd.datasets.arrhythmia import Arrhythmia +from torchhd.datasets.audiology_std import AudiologyStd +from torchhd.datasets.balance_scale import BalanceScale +from torchhd.datasets.balloons import Balloons +from torchhd.datasets.bank import Bank +from torchhd.datasets.blood import Blood +from torchhd.datasets.breast_cancer import BreastCancer +from torchhd.datasets.breast_cancer_wisc import BreastCancerWisc +from torchhd.datasets.breast_cancer_wisc_diag import BreastCancerWiscDiag +from torchhd.datasets.breast_cancer_wisc_prog import BreastCancerWiscProg +from torchhd.datasets.breast_tissue import BreastTissue +from torchhd.datasets.car import Car +from torchhd.datasets.cardiotocography_3clases import Cardiotocography3Clases +from torchhd.datasets.cardiotocography_10clases import Cardiotocography10Clases +from torchhd.datasets.chess_krvk import ChessKrvk __all__ = [ @@ -27,4 +45,22 @@ "DatasetTrainTest", "Abalone", "Adult", + "AcuteInflammation", + "AcuteNephritis", + "Annealing", + "Arrhythmia", + "AudiologyStd", + "BalanceScale", + "Balloons", + "Bank", + "Blood", + "BreastCancer", + "BreastCancerWisc", + "BreastCancerWiscDiag", + "BreastCancerWiscProg", + "BreastTissue", + "Car", + "Cardiotocography3Clases", + "Cardiotocography10Clases", + "ChessKrvk", ] diff --git a/torchhd/datasets/acute_inflammation.py b/torchhd/datasets/acute_inflammation.py new file mode 100644 index 00000000..96fea897 --- /dev/null +++ b/torchhd/datasets/acute_inflammation.py @@ -0,0 +1,31 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class AcuteInflammation(DatasetFourFold): + """`Acute Inflammation of urinary bladder `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "acute-inflammation" + classes: List[str] = [ + "yes", + "no", + ] diff --git a/torchhd/datasets/acute_nephritis.py b/torchhd/datasets/acute_nephritis.py new file mode 100644 index 00000000..e18f9453 --- /dev/null +++ b/torchhd/datasets/acute_nephritis.py @@ -0,0 +1,31 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class AcuteNephritis(DatasetFourFold): + """`Acute Nephritis of renal pelvis origin `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "acute-nephritis" + classes: List[str] = [ + "yes", + "no", + ] diff --git a/torchhd/datasets/annealing.py b/torchhd/datasets/annealing.py new file mode 100644 index 00000000..1354b5e2 --- /dev/null +++ b/torchhd/datasets/annealing.py @@ -0,0 +1,30 @@ +from typing import List +from torchhd.datasets import DatasetTrainTest + + +class Annealing(DatasetTrainTest): + """`Annealing `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable. + Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set. + hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "annealing" + classes: List[str] = [ + "1", + "2", + "3", + "4", + "5", + ] diff --git a/torchhd/datasets/arrhythmia.py b/torchhd/datasets/arrhythmia.py new file mode 100644 index 00000000..a5ca1820 --- /dev/null +++ b/torchhd/datasets/arrhythmia.py @@ -0,0 +1,42 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class Arrhythmia(DatasetFourFold): + """`Arrhythmia `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "arrhythmia" + classes: List[str] = [ + "1 - normal", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "14", + "15", + "16 - unclassified", + ] diff --git a/torchhd/datasets/audiology_std.py b/torchhd/datasets/audiology_std.py new file mode 100644 index 00000000..5dcfff39 --- /dev/null +++ b/torchhd/datasets/audiology_std.py @@ -0,0 +1,43 @@ +from typing import List +from torchhd.datasets import DatasetTrainTest + + +class AudiologyStd(DatasetTrainTest): + """`Audiology (Standardized) `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable. + Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set. + hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "audiology-std" + classes: List[str] = [ + "cochlear_age", + "cochlear_age_and_noise", + "cochlear_noise_and_heredity", + "cochlear_poss_noise", + "cochlear_unknown", + "conductive_discontinuity", + "conductive_fixation", + "mixed_cochlear_age_otitis_media", + "mixed_cochlear_age_s_om", + "mixed_cochlear_unk_discontinuity", + "mixed_cochlear_unk_fixation", + "mixed_cochlear_unk_ser_om", + "mixed_poss_noise_om", + "normal_ear", + "otitis_media", + "possible_brainstem_disorder", + "possible_menieres", + "retrocochlear_unknown", + ] diff --git a/torchhd/datasets/balance_scale.py b/torchhd/datasets/balance_scale.py new file mode 100644 index 00000000..5b370a74 --- /dev/null +++ b/torchhd/datasets/balance_scale.py @@ -0,0 +1,32 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class BalanceScale(DatasetFourFold): + """`Balance Scale `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "balance-scale" + classes: List[str] = [ + "B", + "L", + "R", + ] diff --git a/torchhd/datasets/balloons.py b/torchhd/datasets/balloons.py new file mode 100644 index 00000000..a603e8c6 --- /dev/null +++ b/torchhd/datasets/balloons.py @@ -0,0 +1,31 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class Balloons(DatasetFourFold): + """`Balloons `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "balloons" + classes: List[str] = [ + "inflated - F", + "inflated - T", + ] diff --git a/torchhd/datasets/bank.py b/torchhd/datasets/bank.py new file mode 100644 index 00000000..f243333f --- /dev/null +++ b/torchhd/datasets/bank.py @@ -0,0 +1,31 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class Bank(DatasetFourFold): + """`Bank Marketing `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "bank" + classes: List[str] = [ + "no", + "yes", + ] diff --git a/torchhd/datasets/blood.py b/torchhd/datasets/blood.py new file mode 100644 index 00000000..a8aba28b --- /dev/null +++ b/torchhd/datasets/blood.py @@ -0,0 +1,31 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class Blood(DatasetFourFold): + """`Blood Transfusion Service Center `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "blood" + classes: List[str] = [ + "Not donated blood in March 2007", + "Donated blood in March 2007", + ] diff --git a/torchhd/datasets/breast_cancer.py b/torchhd/datasets/breast_cancer.py new file mode 100644 index 00000000..190d1e25 --- /dev/null +++ b/torchhd/datasets/breast_cancer.py @@ -0,0 +1,31 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class BreastCancer(DatasetFourFold): + """`Breast Cancer `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "breast-cancer" + classes: List[str] = [ + "Irradiat - no", + "Irradiat - yes", + ] diff --git a/torchhd/datasets/breast_cancer_wisc.py b/torchhd/datasets/breast_cancer_wisc.py new file mode 100644 index 00000000..f0c83162 --- /dev/null +++ b/torchhd/datasets/breast_cancer_wisc.py @@ -0,0 +1,31 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class BreastCancerWisc(DatasetFourFold): + """`Breast Cancer Wisconsin (Original) `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "breast-cancer-wisc" + classes: List[str] = [ + "Benign", + "Malignant", + ] diff --git a/torchhd/datasets/breast_cancer_wisc_diag.py b/torchhd/datasets/breast_cancer_wisc_diag.py new file mode 100644 index 00000000..72c8467a --- /dev/null +++ b/torchhd/datasets/breast_cancer_wisc_diag.py @@ -0,0 +1,31 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class BreastCancerWiscDiag(DatasetFourFold): + """`Breast Cancer Wisconsin (Diagnostic) `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "breast-cancer-wisc-diag" + classes: List[str] = [ + "Benign", + "Malignant", + ] diff --git a/torchhd/datasets/breast_cancer_wisc_prog.py b/torchhd/datasets/breast_cancer_wisc_prog.py new file mode 100644 index 00000000..2c4973a4 --- /dev/null +++ b/torchhd/datasets/breast_cancer_wisc_prog.py @@ -0,0 +1,31 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class BreastCancerWiscProg(DatasetFourFold): + """`Breast Cancer Wisconsin (Prognostic) `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "breast-cancer-wisc-prog" + classes: List[str] = [ + "Nonrecur", + "Recur", + ] diff --git a/torchhd/datasets/breast_tissue.py b/torchhd/datasets/breast_tissue.py new file mode 100644 index 00000000..d9ef60e3 --- /dev/null +++ b/torchhd/datasets/breast_tissue.py @@ -0,0 +1,35 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class BreastTissue(DatasetFourFold): + """`Breast Tissue `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "breast-tissue" + classes: List[str] = [ + "carcinoma", + "fibro-adenoma", + "mastopathy", + "glandular", + "connective", + "adipose", + ] diff --git a/torchhd/datasets/car.py b/torchhd/datasets/car.py new file mode 100644 index 00000000..8784156d --- /dev/null +++ b/torchhd/datasets/car.py @@ -0,0 +1,33 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class Car(DatasetFourFold): + """`Car Evaluation `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "car" + classes: List[str] = [ + "unacc", + "acc", + "good", + "vgood", + ] diff --git a/torchhd/datasets/cardiotocography_10clases.py b/torchhd/datasets/cardiotocography_10clases.py new file mode 100644 index 00000000..c50a0a9b --- /dev/null +++ b/torchhd/datasets/cardiotocography_10clases.py @@ -0,0 +1,39 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class Cardiotocography10Clases(DatasetFourFold): + """`Cardiotocography `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "cardiotocography-10clases" + classes: List[str] = [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + ] diff --git a/torchhd/datasets/cardiotocography_3clases.py b/torchhd/datasets/cardiotocography_3clases.py new file mode 100644 index 00000000..27e70077 --- /dev/null +++ b/torchhd/datasets/cardiotocography_3clases.py @@ -0,0 +1,32 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class Cardiotocography3Clases(DatasetFourFold): + """`Cardiotocography `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "cardiotocography-3clases" + classes: List[str] = [ + "normal", + "suspect", + "pathologic", + ] diff --git a/torchhd/datasets/chess_krvk.py b/torchhd/datasets/chess_krvk.py new file mode 100644 index 00000000..9825c990 --- /dev/null +++ b/torchhd/datasets/chess_krvk.py @@ -0,0 +1,47 @@ +from typing import List +from torchhd.datasets import DatasetFourFold + + +class ChessKrvk(DatasetFourFold): + """`Chess (King-Rook vs. King) `_ dataset. + + Args: + root (string): Root directory containing the files of the dataset. + train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. + Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset + as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. + fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. + Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. + Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. + hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) + while the second row corresponds to test indices (used if ``train = False``). + transform (callable, optional): A function/transform that takes in an torch.FloatTensor + and returns a transformed version. + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + name = "chess-krvk" + classes: List[str] = [ + "draw", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + ]