-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add 20 datasets from UCI repository (#103)
* Add new datasets * [github-action] formatting fixes * Add tqdm dependency Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: mikeheddes <[email protected]>
- Loading branch information
1 parent
996a71d
commit 0ac7881
Showing
21 changed files
with
667 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ torch | |
torchvision | ||
pandas | ||
requests | ||
tqdm | ||
numpy | ||
sphinx | ||
sphinx-rtd-theme |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from typing import List | ||
from torchhd.datasets import DatasetFourFold | ||
|
||
|
||
class AcuteInflammation(DatasetFourFold): | ||
"""`Acute Inflammation of urinary bladder <https://archive.ics.uci.edu/ml/datasets/Acute+Inflammations>`_ dataset. | ||
Args: | ||
root (string): Root directory containing the files of the dataset. | ||
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. | ||
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset | ||
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. | ||
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. | ||
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. | ||
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. | ||
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) | ||
while the second row corresponds to test indices (used if ``train = False``). | ||
transform (callable, optional): A function/transform that takes in an torch.FloatTensor | ||
and returns a transformed version. | ||
target_transform (callable, optional): A function/transform that takes in the | ||
target and transforms it. | ||
download (bool, optional): If True, downloads the dataset from the internet and | ||
puts it in root directory. If dataset is already downloaded, it is not | ||
downloaded again. | ||
""" | ||
|
||
name = "acute-inflammation" | ||
classes: List[str] = [ | ||
"yes", | ||
"no", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from typing import List | ||
from torchhd.datasets import DatasetFourFold | ||
|
||
|
||
class AcuteNephritis(DatasetFourFold): | ||
"""`Acute Nephritis of renal pelvis origin <https://archive.ics.uci.edu/ml/datasets/Acute+Inflammations>`_ dataset. | ||
Args: | ||
root (string): Root directory containing the files of the dataset. | ||
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. | ||
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset | ||
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. | ||
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. | ||
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. | ||
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. | ||
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) | ||
while the second row corresponds to test indices (used if ``train = False``). | ||
transform (callable, optional): A function/transform that takes in an torch.FloatTensor | ||
and returns a transformed version. | ||
target_transform (callable, optional): A function/transform that takes in the | ||
target and transforms it. | ||
download (bool, optional): If True, downloads the dataset from the internet and | ||
puts it in root directory. If dataset is already downloaded, it is not | ||
downloaded again. | ||
""" | ||
|
||
name = "acute-nephritis" | ||
classes: List[str] = [ | ||
"yes", | ||
"no", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from typing import List | ||
from torchhd.datasets import DatasetTrainTest | ||
|
||
|
||
class Annealing(DatasetTrainTest): | ||
"""`Annealing <https://archive.ics.uci.edu/ml/datasets/Annealing>`_ dataset. | ||
Args: | ||
root (string): Root directory containing the files of the dataset. | ||
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable. | ||
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set. | ||
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) | ||
while the second row corresponds to test indices (used if ``train = False``). | ||
transform (callable, optional): A function/transform that takes in an torch.FloatTensor | ||
and returns a transformed version. | ||
target_transform (callable, optional): A function/transform that takes in the | ||
target and transforms it. | ||
download (bool, optional): If True, downloads the dataset from the internet and | ||
puts it in root directory. If dataset is already downloaded, it is not | ||
downloaded again. | ||
""" | ||
|
||
name = "annealing" | ||
classes: List[str] = [ | ||
"1", | ||
"2", | ||
"3", | ||
"4", | ||
"5", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from typing import List | ||
from torchhd.datasets import DatasetFourFold | ||
|
||
|
||
class Arrhythmia(DatasetFourFold): | ||
"""`Arrhythmia <https://archive.ics.uci.edu/ml/datasets/arrhythmia>`_ dataset. | ||
Args: | ||
root (string): Root directory containing the files of the dataset. | ||
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. | ||
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset | ||
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. | ||
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. | ||
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. | ||
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. | ||
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) | ||
while the second row corresponds to test indices (used if ``train = False``). | ||
transform (callable, optional): A function/transform that takes in an torch.FloatTensor | ||
and returns a transformed version. | ||
target_transform (callable, optional): A function/transform that takes in the | ||
target and transforms it. | ||
download (bool, optional): If True, downloads the dataset from the internet and | ||
puts it in root directory. If dataset is already downloaded, it is not | ||
downloaded again. | ||
""" | ||
|
||
name = "arrhythmia" | ||
classes: List[str] = [ | ||
"1 - normal", | ||
"2", | ||
"3", | ||
"4", | ||
"5", | ||
"6", | ||
"7", | ||
"8", | ||
"9", | ||
"10", | ||
"14", | ||
"15", | ||
"16 - unclassified", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from typing import List | ||
from torchhd.datasets import DatasetTrainTest | ||
|
||
|
||
class AudiologyStd(DatasetTrainTest): | ||
"""`Audiology (Standardized) <https://archive.ics.uci.edu/ml/datasets/Audiology+%28Standardized%29>`_ dataset. | ||
Args: | ||
root (string): Root directory containing the files of the dataset. | ||
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable. | ||
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set. | ||
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) | ||
while the second row corresponds to test indices (used if ``train = False``). | ||
transform (callable, optional): A function/transform that takes in an torch.FloatTensor | ||
and returns a transformed version. | ||
target_transform (callable, optional): A function/transform that takes in the | ||
target and transforms it. | ||
download (bool, optional): If True, downloads the dataset from the internet and | ||
puts it in root directory. If dataset is already downloaded, it is not | ||
downloaded again. | ||
""" | ||
|
||
name = "audiology-std" | ||
classes: List[str] = [ | ||
"cochlear_age", | ||
"cochlear_age_and_noise", | ||
"cochlear_noise_and_heredity", | ||
"cochlear_poss_noise", | ||
"cochlear_unknown", | ||
"conductive_discontinuity", | ||
"conductive_fixation", | ||
"mixed_cochlear_age_otitis_media", | ||
"mixed_cochlear_age_s_om", | ||
"mixed_cochlear_unk_discontinuity", | ||
"mixed_cochlear_unk_fixation", | ||
"mixed_cochlear_unk_ser_om", | ||
"mixed_poss_noise_om", | ||
"normal_ear", | ||
"otitis_media", | ||
"possible_brainstem_disorder", | ||
"possible_menieres", | ||
"retrocochlear_unknown", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from typing import List | ||
from torchhd.datasets import DatasetFourFold | ||
|
||
|
||
class BalanceScale(DatasetFourFold): | ||
"""`Balance Scale <https://archive.ics.uci.edu/ml/datasets/balance+scale>`_ dataset. | ||
Args: | ||
root (string): Root directory containing the files of the dataset. | ||
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. | ||
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset | ||
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. | ||
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. | ||
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. | ||
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. | ||
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) | ||
while the second row corresponds to test indices (used if ``train = False``). | ||
transform (callable, optional): A function/transform that takes in an torch.FloatTensor | ||
and returns a transformed version. | ||
target_transform (callable, optional): A function/transform that takes in the | ||
target and transforms it. | ||
download (bool, optional): If True, downloads the dataset from the internet and | ||
puts it in root directory. If dataset is already downloaded, it is not | ||
downloaded again. | ||
""" | ||
|
||
name = "balance-scale" | ||
classes: List[str] = [ | ||
"B", | ||
"L", | ||
"R", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from typing import List | ||
from torchhd.datasets import DatasetFourFold | ||
|
||
|
||
class Balloons(DatasetFourFold): | ||
"""`Balloons <https://archive.ics.uci.edu/ml/datasets/balloons>`_ dataset. | ||
Args: | ||
root (string): Root directory containing the files of the dataset. | ||
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. | ||
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset | ||
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. | ||
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. | ||
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. | ||
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. | ||
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) | ||
while the second row corresponds to test indices (used if ``train = False``). | ||
transform (callable, optional): A function/transform that takes in an torch.FloatTensor | ||
and returns a transformed version. | ||
target_transform (callable, optional): A function/transform that takes in the | ||
target and transforms it. | ||
download (bool, optional): If True, downloads the dataset from the internet and | ||
puts it in root directory. If dataset is already downloaded, it is not | ||
downloaded again. | ||
""" | ||
|
||
name = "balloons" | ||
classes: List[str] = [ | ||
"inflated - F", | ||
"inflated - T", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from typing import List | ||
from torchhd.datasets import DatasetFourFold | ||
|
||
|
||
class Bank(DatasetFourFold): | ||
"""`Bank Marketing <https://archive.ics.uci.edu/ml/datasets/Bank+Marketing>`_ dataset. | ||
Args: | ||
root (string): Root directory containing the files of the dataset. | ||
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables. | ||
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset | ||
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error. | ||
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file. | ||
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``. | ||
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets. | ||
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``) | ||
while the second row corresponds to test indices (used if ``train = False``). | ||
transform (callable, optional): A function/transform that takes in an torch.FloatTensor | ||
and returns a transformed version. | ||
target_transform (callable, optional): A function/transform that takes in the | ||
target and transforms it. | ||
download (bool, optional): If True, downloads the dataset from the internet and | ||
puts it in root directory. If dataset is already downloaded, it is not | ||
downloaded again. | ||
""" | ||
|
||
name = "bank" | ||
classes: List[str] = [ | ||
"no", | ||
"yes", | ||
] |
Oops, something went wrong.