diff --git a/dev-requirements.txt b/dev-requirements.txt
index f242a5cd..cb8699a8 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -9,4 +9,5 @@ pytest
black
tqdm
openpyxl
-coverage
\ No newline at end of file
+coverage
+gdown
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 61565201..13b260f5 100644
--- a/setup.py
+++ b/setup.py
@@ -3,6 +3,7 @@
https://packaging.python.org/guides/distributing-packages-using-setuptools/
https://github.com/pypa/sampleproject
"""
+
from setuptools import setup, find_packages
# Read the version without importing any dependencies
diff --git a/torchhd/datasets/utils.py b/torchhd/datasets/utils.py
index 05898e84..0b7be1f8 100644
--- a/torchhd/datasets/utils.py
+++ b/torchhd/datasets/utils.py
@@ -23,13 +23,8 @@
#
import zipfile
import requests
-import re
import tqdm
-# Code adapted from:
-# https://github.com/wkentaro/gdown/blob/941200a9a1f4fd7ab903fb595baa5cad34a30a45/gdown/download.py
-# https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url
-
def download_file(url, destination):
response = requests.get(url, allow_redirects=True, stream=True)
@@ -37,79 +32,15 @@ def download_file(url, destination):
def download_file_from_google_drive(file_id, destination):
- URL = "https://docs.google.com/uc"
- params = dict(id=file_id, export="download")
-
- with requests.Session() as session:
- response = session.get(URL, params=params, stream=True)
-
- # downloads right away
- if "Content-Disposition" in response.headers:
- write_response_to_disk(response, destination)
- return
-
- # try to find a confirmation token
- token = get_google_drive_confirm_token(response)
-
- if token:
- params = dict(id=id, confirm=token)
- response = session.get(URL, params=params, stream=True)
-
- # download if confirmation token worked
- if "Content-Disposition" in response.headers:
- write_response_to_disk(response, destination)
- return
-
- # extract download url from confirmation page
- url = get_url_from_gdrive_confirmation(response.text)
- response = session.get(url, stream=True)
-
- write_response_to_disk(response, destination)
-
-
-def get_google_drive_confirm_token(response):
- for key, value in response.cookies.items():
- if key.startswith("download_warning"):
- return value
-
- return None
-
-
-def get_url_from_gdrive_confirmation(contents):
- url = ""
- for line in contents.splitlines():
- m = re.search(r'href="(\/uc\?export=download[^"]+)', line)
- if m:
- url = "https://docs.google.com" + m.groups()[0]
- url = url.replace("&", "&")
- break
- m = re.search('id="downloadForm" action="(.+?)"', line)
- if m:
- url = m.groups()[0]
- url = url.replace("&", "&")
- break
- m = re.search('id="download-form" action="(.+?)"', line)
- if m:
- url = m.groups()[0]
- url = url.replace("&", "&")
- break
- m = re.search('"downloadUrl":"([^"]+)', line)
- if m:
- url = m.groups()[0]
- url = url.replace("\\u003d", "=")
- url = url.replace("\\u0026", "&")
- break
- m = re.search('
(.*)
', line)
- if m:
- error = m.groups()[0]
- raise RuntimeError(error)
- if not url:
- raise RuntimeError(
- "Cannot retrieve the public link of the file. "
- "You may need to change the permission to "
- "'Anyone with the link', or have had many accesses."
+ try:
+ import gdown
+ except ImportError:
+ raise ImportError(
+ "Downloading files from Google drive requires gdown to be installed, see: https://github.com/wkentaro/gdown"
)
- return url
+
+ url = f"https://drive.google.com/uc?id={file_id}"
+ gdown.download(url, destination)
def get_download_progress_bar(response):
diff --git a/torchhd/models.py b/torchhd/models.py
index 0f7de6dc..af6d7b2b 100644
--- a/torchhd/models.py
+++ b/torchhd/models.py
@@ -28,12 +28,8 @@
from torch import Tensor
from torch.nn.parameter import Parameter
import torch.nn.init as init
-import torch.utils.data as data
-from tqdm import tqdm
-
import torchhd.functional as functional
-import torchhd.datasets as datasets
import torchhd.embeddings as embeddings
@@ -71,6 +67,7 @@ class Centroid(nn.Module):
>>> output.size()
torch.Size([128, 30])
"""
+
__constants__ = ["in_features", "out_features"]
in_features: int
out_features: int
@@ -108,6 +105,30 @@ def add(self, input: Tensor, target: Tensor, lr: float = 1.0) -> None:
"""Adds the input vectors scaled by the lr to the target prototype vectors."""
self.weight.index_add_(0, target, input, alpha=lr)
+ @torch.no_grad()
+ def add_adapt(self, input: Tensor, target: Tensor, lr: float = 1.0) -> None:
+ r"""Only updates the prototype vectors on wrongly predicted inputs.
+
+ Implements the iterative training method as described in `AdaptHD: Adaptive Efficient Training for Brain-Inspired Hyperdimensional Computing `_.
+
+ Subtracts the input from the mispredicted class prototype scaled by the learning rate
+ and adds the input to the target prototype scaled by the learning rate.
+ """
+ logit = self(input)
+ pred = logit.argmax(1)
+ is_wrong = target != pred
+
+ # cancel update if all predictions were correct
+ if is_wrong.sum().item() == 0:
+ return
+
+ input = input[is_wrong]
+ target = target[is_wrong]
+ pred = pred[is_wrong]
+
+ self.weight.index_add_(0, target, input, alpha=lr)
+ self.weight.index_add_(0, pred, input, alpha=-lr)
+
@torch.no_grad()
def add_online(self, input: Tensor, target: Tensor, lr: float = 1.0) -> None:
r"""Only updates the prototype vectors on wrongly predicted inputs.
@@ -137,10 +158,9 @@ def add_online(self, input: Tensor, target: Tensor, lr: float = 1.0) -> None:
alpha1 = 1.0 - logit.gather(1, target.unsqueeze(1))
alpha2 = logit.gather(1, pred.unsqueeze(1)) - 1.0
- self.weight.index_add_(0, target, lr * alpha1 * input)
- self.weight.index_add_(0, pred, lr * alpha2 * input)
+ self.weight.index_add_(0, target, alpha1 * input, alpha=lr)
+ self.weight.index_add_(0, pred, alpha2 * input, alpha=lr)
- @torch.no_grad()
def normalize(self, eps=1e-12) -> None:
"""Transforms all the class prototype vectors into unit vectors.
@@ -148,12 +168,20 @@ def normalize(self, eps=1e-12) -> None:
Training further after calling this method is not advised.
"""
norms = self.weight.norm(dim=1, keepdim=True)
+
+ if torch.isclose(norms, torch.zeros_like(norms), equal_nan=True).any():
+ import warnings
+
+ warnings.warn(
+ "The norm of a prototype vector is nearly zero upon normalizing, this could indicate a bug."
+ )
+
norms.clamp_(min=eps)
self.weight.div_(norms)
def extra_repr(self) -> str:
return "in_features={}, out_features={}".format(
- self.in_features, self.out_features is not None
+ self.in_features, self.out_features
)
diff --git a/torchhd/structures.py b/torchhd/structures.py
index ec1f96bf..c381d76e 100644
--- a/torchhd/structures.py
+++ b/torchhd/structures.py
@@ -186,12 +186,10 @@ class Multiset:
@overload
def __init__(
self, dimensions: int, vsa: VSAOptions = "MAP", *, device=None, dtype=None
- ):
- ...
+ ): ...
@overload
- def __init__(self, input: VSATensor, *, size=0):
- ...
+ def __init__(self, input: VSATensor, *, size=0): ...
def __init__(self, dim_or_input: Any, vsa: VSAOptions = "MAP", **kwargs):
self.size = kwargs.get("size", 0)
@@ -334,12 +332,10 @@ class HashTable:
@overload
def __init__(
self, dimensions: int, vsa: VSAOptions = "MAP", *, device=None, dtype=None
- ):
- ...
+ ): ...
@overload
- def __init__(self, input: VSATensor, *, size=0):
- ...
+ def __init__(self, input: VSATensor, *, size=0): ...
def __init__(self, dim_or_input: int, vsa: VSAOptions = "MAP", **kwargs):
self.size = kwargs.get("size", 0)
@@ -501,12 +497,10 @@ class BundleSequence:
@overload
def __init__(
self, dimensions: int, vsa: VSAOptions = "MAP", *, device=None, dtype=None
- ):
- ...
+ ): ...
@overload
- def __init__(self, input: VSATensor, *, size=0):
- ...
+ def __init__(self, input: VSATensor, *, size=0): ...
def __init__(self, dim_or_input: int, vsa: VSAOptions = "MAP", **kwargs):
self.size = kwargs.get("size", 0)
@@ -693,12 +687,10 @@ class BindSequence:
@overload
def __init__(
self, dimensions: int, vsa: VSAOptions = "MAP", *, device=None, dtype=None
- ):
- ...
+ ): ...
@overload
- def __init__(self, input: VSATensor, *, size=0):
- ...
+ def __init__(self, input: VSATensor, *, size=0): ...
def __init__(self, dim_or_input: int, vsa: VSAOptions = "MAP", **kwargs):
self.size = kwargs.get("size", 0)
@@ -861,12 +853,10 @@ def __init__(
directed=False,
device=None,
dtype=None
- ):
- ...
+ ): ...
@overload
- def __init__(self, input: VSATensor, *, directed=False):
- ...
+ def __init__(self, input: VSATensor, *, directed=False): ...
def __init__(self, dim_or_input: int, vsa: VSAOptions = "MAP", **kwargs):
self.is_directed = kwargs.get("directed", False)
diff --git a/torchhd/tensors/bsbc.py b/torchhd/tensors/bsbc.py
index e2c1688e..3f79d0bc 100644
--- a/torchhd/tensors/bsbc.py
+++ b/torchhd/tensors/bsbc.py
@@ -36,6 +36,7 @@ class BSBCTensor(VSATensor):
Because the vectors are sparse and have a fixed magnitude, we only represent the index of the non-zero value.
"""
+
block_size: int
supported_dtypes: Set[torch.dtype] = {
torch.float32,
diff --git a/torchhd/tensors/fhrr.py b/torchhd/tensors/fhrr.py
index 7f8d0fa7..55d0ddf5 100644
--- a/torchhd/tensors/fhrr.py
+++ b/torchhd/tensors/fhrr.py
@@ -395,5 +395,12 @@ def cosine_similarity(self, others: "FHRRTensor", *, eps=1e-08) -> Tensor:
else:
magnitude = self_mag * others_mag
+ if torch.isclose(magnitude, torch.zeros_like(magnitude), equal_nan=True).any():
+ import warnings
+
+ warnings.warn(
+ "The norm of a vector is nearly zero, this could indicate a bug."
+ )
+
magnitude = torch.clamp(magnitude, min=eps)
return self.dot_similarity(others) / magnitude
diff --git a/torchhd/tensors/hrr.py b/torchhd/tensors/hrr.py
index 9fd08815..34ffca4f 100644
--- a/torchhd/tensors/hrr.py
+++ b/torchhd/tensors/hrr.py
@@ -382,5 +382,12 @@ def cosine_similarity(self, others: "HRRTensor", *, eps=1e-08) -> Tensor:
else:
magnitude = self_mag * others_mag
+ if torch.isclose(magnitude, torch.zeros_like(magnitude), equal_nan=True).any():
+ import warnings
+
+ warnings.warn(
+ "The norm of a vector is nearly zero, this could indicate a bug."
+ )
+
magnitude = torch.clamp(magnitude, min=eps)
return self.dot_similarity(others) / magnitude
diff --git a/torchhd/tensors/map.py b/torchhd/tensors/map.py
index 60e8e3ac..b93c4a54 100644
--- a/torchhd/tensors/map.py
+++ b/torchhd/tensors/map.py
@@ -368,5 +368,12 @@ def cosine_similarity(
else:
magnitude = self_mag * others_mag
+ if torch.isclose(magnitude, torch.zeros_like(magnitude), equal_nan=True).any():
+ import warnings
+
+ warnings.warn(
+ "The norm of a vector is nearly zero, this could indicate a bug."
+ )
+
magnitude = torch.clamp(magnitude, min=eps)
return self.dot_similarity(others, dtype=dtype) / magnitude
diff --git a/torchhd/tensors/vtb.py b/torchhd/tensors/vtb.py
index f7bd84de..8329bb86 100644
--- a/torchhd/tensors/vtb.py
+++ b/torchhd/tensors/vtb.py
@@ -411,5 +411,12 @@ def cosine_similarity(self, others: "VTBTensor", *, eps=1e-08) -> Tensor:
else:
magnitude = self_mag * others_mag
+ if torch.isclose(magnitude, torch.zeros_like(magnitude), equal_nan=True).any():
+ import warnings
+
+ warnings.warn(
+ "The norm of a vector is nearly zero, this could indicate a bug."
+ )
+
magnitude = torch.clamp(magnitude, min=eps)
return self.dot_similarity(others) / magnitude
diff --git a/torchhd/tests/test_embeddings.py b/torchhd/tests/test_embeddings.py
index a9abda34..17b6362f 100644
--- a/torchhd/tests/test_embeddings.py
+++ b/torchhd/tests/test_embeddings.py
@@ -74,7 +74,7 @@ def test_dtype(self, vsa):
if vsa == "BSC":
assert emb(idx).dtype == torch.bool
elif vsa == "MAP" or vsa == "HRR":
- assert emb(idx).dtype == torch.float
+ assert emb(idx).dtype == torch.get_default_dtype()
elif vsa == "FHRR":
assert (
emb(idx).dtype == torch.complex64 or emb(idx).dtype == torch.complex32
@@ -142,7 +142,7 @@ def test_dtype(self, vsa):
if vsa == "BSC":
assert emb(idx).dtype == torch.bool
elif vsa in {"MAP", "HRR", "VTB"}:
- assert emb(idx).dtype == torch.float
+ assert emb(idx).dtype == torch.get_default_dtype()
elif vsa == "FHRR":
assert emb(idx).dtype in {torch.complex64, torch.complex32}
@@ -244,7 +244,7 @@ def test_dtype(self, vsa):
if vsa == "BSC":
assert emb(idx).dtype == torch.bool
elif vsa in {"MAP", "HRR", "VTB"}:
- assert emb(idx).dtype == torch.float
+ assert emb(idx).dtype == torch.get_default_dtype()
elif vsa == "FHRR":
assert emb(idx).dtype in {torch.complex64, torch.complex32}
@@ -295,7 +295,7 @@ def test_dtype(self, vsa):
if vsa == "BSC":
assert emb(idx).dtype == torch.bool
elif vsa in {"MAP", "HRR", "VTB"}:
- assert emb(idx).dtype == torch.float
+ assert emb(idx).dtype == torch.get_default_dtype()
elif vsa == "FHRR":
assert emb(idx).dtype in {torch.complex64, torch.complex32}
@@ -365,7 +365,7 @@ def test_dtype(self, vsa):
if vsa == "BSC":
assert emb(angle).dtype == torch.bool
elif vsa == "MAP":
- assert emb(angle).dtype == torch.float
+ assert emb(angle).dtype == torch.get_default_dtype()
elif vsa == "FHRR":
assert (
emb(angle).dtype == torch.complex64
@@ -441,7 +441,7 @@ def test_dtype(self, vsa):
if vsa == "BSC":
assert emb(angle).dtype == torch.bool
elif vsa == "MAP":
- assert emb(angle).dtype == torch.float
+ assert emb(angle).dtype == torch.get_default_dtype()
elif vsa == "FHRR":
assert (
emb(angle).dtype == torch.complex64
@@ -504,7 +504,7 @@ def test_dtype(self, vsa):
emb = embeddings.Projection(in_features, out_features, vsa=vsa)
x = torch.randn(1, in_features)
if vsa == "MAP" or vsa == "HRR":
- assert emb(x).dtype == torch.float
+ assert emb(x).dtype == torch.get_default_dtype()
else:
return
@@ -549,7 +549,7 @@ def test_dtype(self, vsa):
emb = embeddings.Sinusoid(in_features, out_features, vsa=vsa)
x = torch.randn(1, in_features)
if vsa == "MAP" or vsa == "HRR":
- assert emb(x).dtype == torch.float
+ assert emb(x).dtype == torch.get_default_dtype()
else:
return
@@ -611,7 +611,7 @@ def test_dtype(self, vsa):
if vsa == "BSC":
assert emb(x).dtype == torch.bool
elif vsa == "MAP":
- assert emb(x).dtype == torch.float
+ assert emb(x).dtype == torch.get_default_dtype()
elif vsa == "FHRR":
assert emb(x).dtype == torch.complex64 or emb(x).dtype == torch.complex32
else:
@@ -664,9 +664,9 @@ def test_default_dtype(self, vsa):
assert y.shape == (2, dimensions)
if vsa == "HRR":
- assert y.dtype == torch.float32
+ assert y.dtype == torch.get_default_dtype()
elif vsa == "FHRR":
- assert y.dtype == torch.complex64
+ assert fhrr_type_conversion[y.dtype] == torch.get_default_dtype()
else:
return
diff --git a/torchhd/tests/test_encodings.py b/torchhd/tests/test_encodings.py
index 927993b3..af205bb9 100644
--- a/torchhd/tests/test_encodings.py
+++ b/torchhd/tests/test_encodings.py
@@ -141,10 +141,6 @@ def test_dtype(self, dtype):
hv = torch.zeros(23, 1000, dtype=dtype).as_subclass(MAPTensor)
if dtype in {torch.float16}:
- # torch.product is not implemented on CPU for these dtypes
- with pytest.raises(RuntimeError):
- functional.multibind(hv)
-
return
res = functional.multibind(hv)
@@ -288,10 +284,6 @@ def test_dtype(self, dtype):
hv = torch.zeros(23, 1000, dtype=dtype).as_subclass(MAPTensor)
if dtype in {torch.float16}:
- # torch.product is not implemented on CPU for these dtypes
- with pytest.raises(RuntimeError):
- functional.multibind(hv)
-
return
res = functional.bind_sequence(hv)
diff --git a/torchhd/tests/test_models.py b/torchhd/tests/test_models.py
index e4226f50..a721a31f 100644
--- a/torchhd/tests/test_models.py
+++ b/torchhd/tests/test_models.py
@@ -82,6 +82,16 @@ def test_add_online(self):
logits = model(samples)
assert logits.shape == (10, 3)
+ def test_add_adapt(self):
+ samples = torch.randn(10, 12)
+ targets = torch.randint(0, 3, (10,))
+
+ model = models.Centroid(12, 3)
+ model.add_adapt(samples, targets)
+
+ logits = model(samples)
+ assert logits.shape == (10, 3)
+
class TestIntRVFL:
@pytest.mark.parametrize("dtype", torch_dtypes)
@@ -103,7 +113,9 @@ def test_initialization(self, dtype):
assert model.weight.device.type == device.type
def test_fit_ridge_regression(self):
- samples = torch.eye(10, 12)
+ a = torch.randn(10)
+ b = torch.randn(12)
+ samples = torch.outer(a, b)
targets = torch.arange(10)
model = models.IntRVFL(12, 1245, 10)
diff --git a/torchhd/tests/test_similarities.py b/torchhd/tests/test_similarities.py
index eb104885..96f28717 100644
--- a/torchhd/tests/test_similarities.py
+++ b/torchhd/tests/test_similarities.py
@@ -118,7 +118,7 @@ def test_value(self, vsa, dtype):
).as_subclass(BSCTensor)
res = functional.dot_similarity(hv, hv)
- exp = torch.tensor([[10, 4], [4, 10]], dtype=torch.long)
+ exp = torch.tensor([[10, 4], [4, 10]], dtype=res.dtype)
assert torch.all(res == exp).item()
elif vsa == "FHRR":
@@ -339,7 +339,7 @@ def test_value(self, vsa, dtype):
).as_subclass(BSCTensor)
res = functional.cosine_similarity(hv, hv)
- exp = torch.tensor([[1, 0.4], [0.4, 1]], dtype=torch.float)
+ exp = torch.tensor([[1, 0.4], [0.4, 1]], dtype=res.dtype)
assert torch.allclose(res, exp)
elif vsa == "FHRR":
@@ -388,7 +388,7 @@ def test_value(self, vsa, dtype):
).as_subclass(MAPTensor)
res = functional.cosine_similarity(hv, hv)
- exp = torch.tensor([[1, -0.4], [-0.4, 1]], dtype=torch.float)
+ exp = torch.tensor([[1, -0.4], [-0.4, 1]], dtype=res.dtype)
assert torch.allclose(res, exp)
@pytest.mark.parametrize("vsa", vsa_tensors)
@@ -529,7 +529,7 @@ def test_value(self, vsa, dtype):
).as_subclass(BSCTensor)
res = functional.hamming_similarity(hv, hv)
- exp = torch.tensor([[10, 7], [7, 10]], dtype=torch.long)
+ exp = torch.tensor([[10, 7], [7, 10]], dtype=res.dtype)
assert torch.all(res == exp).item()
elif vsa == "FHRR":