diff --git a/equilib/__init__.py b/equilib/__init__.py
index 4ea94725..d4b79f69 100644
--- a/equilib/__init__.py
+++ b/equilib/__init__.py
@@ -4,6 +4,8 @@
 from equilib.equi2cube.base import Equi2Cube, equi2cube
 from equilib.equi2equi.base import Equi2Equi, equi2equi
 from equilib.equi2pers.base import Equi2Pers, equi2pers
+from equilib.equi2ico.base import Equi2Ico, equi2ico
+from equilib.ico2equi.base import Ico2Equi, ico2equi
 from equilib.info import __version__  # noqa
 
 __all__ = [
@@ -11,8 +13,12 @@
     "Equi2Cube",
     "Equi2Equi",
     "Equi2Pers",
+    "Equi2Ico",
+    "Ico2Equi",
     "cube2equi",
     "equi2cube",
     "equi2equi",
     "equi2pers",
+    "equi2ico",
+    "ico2equi"
 ]
diff --git a/equilib/equi2ico/__init__.py b/equilib/equi2ico/__init__.py
new file mode 100644
index 00000000..5f7ce86a
--- /dev/null
+++ b/equilib/equi2ico/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
\ No newline at end of file
diff --git a/equilib/equi2ico/base.py b/equilib/equi2ico/base.py
new file mode 100644
index 00000000..e6fd67ac
--- /dev/null
+++ b/equilib/equi2ico/base.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+
+from typing import Dict, List, Union
+
+import numpy as np
+
+import torch
+
+from .numpy import (
+    run as run_numpy,
+)
+from .torch import (
+    run as run_torch,
+)
+
+__all__ = ["Equi2Ico", "equi2ico"]
+
+ArrayLike = Union[np.ndarray, torch.Tensor]
+SubLvl = Union[int, List[int]]
+IcoMaps = Union[
+    # single
+    np.ndarray,
+    torch.Tensor,
+    # single 'list'
+    List[np.ndarray],
+    List[torch.Tensor],
+    # batch 'list'
+    List[List[np.ndarray]],
+    List[List[torch.Tensor]],
+    # single 'dict'
+    Dict[int, np.ndarray],
+    Dict[int, np.ndarray],
+    # batch 'dict'
+    List[Dict[int, np.ndarray]],
+    List[Dict[int, np.ndarray]],
+]
+
+class Equi2Ico(object):
+    """
+    params:
+    - w_face (int): icosahedron face width
+    - fov_x (float): fov of horizontal axis in degrees
+    - sub_level(int, list[int]): icosahedron subdivision level
+    - ico_format (str): ("dict", "list")
+    - mode (str)
+
+    inputs:
+    - equi (np.ndarray, torch.Tensor)
+
+    returns:
+    - ico_faces (np.ndarray, torch.Tensor, list, dict)
+    """
+
+    def __init__(
+        self,
+        w_face: int,
+        fov_x: float,
+        sub_level: SubLvl, 
+        ico_format: str,
+        mode: str = "bilinear",
+    ) -> None:
+        self.w_face = w_face
+        self.fov_x = fov_x
+        self.sub_level = sub_level
+        self.ico_format = ico_format
+        self.mode = mode
+
+    def __call__(
+        self,
+        equi: ArrayLike,
+    ) -> IcoMaps:
+        return equi2ico(
+            equi=equi,
+            w_face=self.w_face,
+            fov_x=self.fov_x,
+            sub_level=self.sub_level,
+            ico_format=self.ico_format,
+            mode=self.mode,
+        )
+
+def equi2ico(
+    equi: ArrayLike,
+    w_face: int,
+    fov_x: float,
+    sub_level: SubLvl,
+    ico_format: str,
+    mode: str = "bilinear",
+    **kwargs,
+) -> IcoMaps:
+    """
+    params:
+    - equi (np.ndarray, torch.Tensor)
+    - w_face (int): icosahedron face width
+    - fov_x (float): fov of horizontal axis in degrees
+    - sub_level(int, list[int]): icosahedron subdivision level
+    - ico_format (str): ("dict", "list")
+    - mode (str)
+
+    returns:
+    - ico_faces (np.ndarray, torch.Tensor, dict, list)
+
+    """
+
+    _type = None
+    if isinstance(equi, np.ndarray):
+        _type = "numpy"
+    elif torch.is_tensor(equi):
+        _type = "torch"
+    else:
+        raise ValueError
+
+    is_single = False
+    if len(equi.shape) == 3 and isinstance(sub_level, int):
+        # probably the input was a single image
+        equi = equi[None, ...]
+        sub_level = [sub_level]
+        is_single = True
+    elif len(equi.shape) == 3:
+        # probably a grayscale image
+        equi = equi[:, None, ...]
+
+    assert isinstance(sub_level, list), "ERR: rots is not a list"
+    if _type == "numpy":
+        out = run_numpy(
+            equi=equi,
+            sub_level=sub_level,
+            w_face=w_face,
+            fov_x=fov_x,
+            ico_format=ico_format,
+            mode=mode,
+            **kwargs,
+        )
+    elif _type == "torch":
+        out = run_torch(
+            equi=equi,
+            sub_level=sub_level,
+            w_face=w_face,
+            fov_x=fov_x,
+            ico_format=ico_format,
+            mode=mode,
+            **kwargs,
+        )
+    else:
+        raise ValueError
+
+    # make sure that the output batch dim is removed if it's only a single icomap
+    if is_single:
+        out = out[0]
+
+    return out
\ No newline at end of file
diff --git a/equilib/equi2ico/numpy.py b/equilib/equi2ico/numpy.py
new file mode 100644
index 00000000..3c7fa9f3
--- /dev/null
+++ b/equilib/equi2ico/numpy.py
@@ -0,0 +1,288 @@
+#!/usr/bin/env python3
+
+from functools import lru_cache
+from typing import Any, Callable, Dict, List, Tuple, Optional, Union
+
+import numpy as np
+
+from equilib.grid_sample import numpy_grid_sample
+from equilib.numpy_utils import (
+    create_global2camera_rotation_matrix,
+    create_grid,
+    create_intrinsic_matrix,
+    create_rotation_matrices,
+    calculate_tangent_rots
+)
+
+
+def ico2dict(icos: np.ndarray) -> Dict[str, np.ndarray]:
+    ico_dict = {}
+    for i, ico in enumerate(icos):
+        ico_dict[i] = ico
+    return ico_dict
+
+
+@lru_cache(maxsize=128)
+def create_cam2global_matrix(
+    height: int,
+    width: int,
+    fov_x: float,
+    skew: float = 0.0,
+    dtype: np.dtype = np.dtype(np.float32),
+) -> np.ndarray:
+
+    K = create_intrinsic_matrix(
+        height=height,
+        width=width,
+        fov_x=fov_x,
+        skew=skew,
+        dtype=dtype,
+    )
+    g2c_rot = create_global2camera_rotation_matrix(
+        dtype=dtype,
+    )
+
+    # FIXME: change to faster inverse
+    K_inv = np.linalg.inv(K)
+
+    return g2c_rot @ K_inv
+
+
+def prep_matrices(
+    height: int,
+    width: int,
+    batch: int,
+    fov_x: float,
+    skew: float = 0.0,
+    dtype: np.dtype = np.dtype(np.float32),
+) -> Tuple[np.ndarray, np.ndarray]:
+
+    m = create_grid(
+        height=height,
+        width=width,
+        batch=batch,
+        dtype=dtype,
+    )
+    m = m[..., np.newaxis]
+    G = create_cam2global_matrix(
+        height=height,
+        width=width,
+        fov_x=fov_x,
+        skew=skew,
+        dtype=dtype,
+    )
+
+    return m, G
+
+
+def matmul(
+    m: np.ndarray,
+    G: np.ndarray,
+    R: np.ndarray,
+    method: str = "faster",
+) -> np.ndarray:
+
+    if method == "robust":
+        # When target image size is smaller, it might be faster with `matmul`
+        # but not by much
+        M = np.matmul(np.matmul(R, G)[:, np.newaxis, np.newaxis, ...], m)
+    elif method == "faster":
+        # `einsum` is probably fastest, but it might not be accurate
+        # I've tested it, and it's really close when it is float64,
+        # but loses precision for float32
+        # trade off between precision and speed i guess
+        # around x3 ~ x10 faster (faster when batch size is high)
+        batch_size = m.shape[0]
+        M = np.empty_like(m)
+        C = np.einsum("bik,kj->bij", R, G, optimize=True)
+        for b in range(batch_size):
+            M[b, ...] = np.einsum(
+                "ik,...kj->...ij", C[b, ...], m[b, ...], optimize=True
+            )
+    else:
+        raise ValueError(f"ERR: {method} is not supported")
+
+    M = M.squeeze(-1)
+    return M
+
+
+def convert_grid(
+    M: np.ndarray,
+    h_equi: int,
+    w_equi: int,
+    method: str = "robust",
+) -> np.ndarray:
+
+    # convert to rotation
+    phi = np.arcsin(M[..., 2] / np.linalg.norm(M, axis=-1))
+    theta = np.arctan2(M[..., 1], M[..., 0])
+
+    if method == "robust":
+        # convert to pixel
+        # I thought it would be faster if it was done all at once,
+        # but it was faster separately
+        ui = (theta - np.pi) * w_equi / (2 * np.pi)
+        uj = (phi - np.pi / 2) * h_equi / np.pi
+        ui += 0.5
+        uj += 0.5
+        ui %= w_equi
+        uj %= h_equi
+    elif method == "faster":
+        # NOTE: this asserts that theta and phi are in range
+        # the range of theta is -pi ~ pi
+        # the range of phi is -pi/2 ~ pi/2
+        # this means that if the input `rots` have rotations that are
+        # out of range, it will not work with `faster`
+        ui = (theta - np.pi) * w_equi / (2 * np.pi)
+        uj = (phi - np.pi / 2) * h_equi / np.pi
+        ui += 0.5
+        uj += 0.5
+        ui = np.where(ui < 0, ui + w_equi, ui)
+        ui = np.where(ui >= w_equi, ui - w_equi, ui)
+        uj = np.where(uj < 0, uj + h_equi, uj)
+        uj = np.where(uj >= h_equi, uj - h_equi, uj)
+    else:
+        raise ValueError(f"ERR: {method} is not supported")
+
+    # stack the pixel maps into a grid
+    grid = np.stack((uj, ui), axis=-3)
+
+    return grid
+
+
+def run(
+    equi: np.ndarray,
+    sub_level: List[int],
+    w_face: int,
+    fov_x: float,
+    ico_format: str,
+    mode: str,
+    override_func: Optional[Callable[[], Any]] = None,
+) -> Union[List[np.ndarray], List[Dict[str, np.ndarray]]]:
+    """Run Equi2Ico
+
+    params:
+    - equi (np.ndarray): 4 dims (b, c, h, w)
+    - sub_level (List[int]): list of subdivision levels
+    - rot (List[dict]): dict of ('yaw', 'pitch', 'roll')
+    - w_face (int): icosahedron face width
+    - fov_x (float): fov of horizontal axis in degrees
+    - mode (str): sampling mode for grid_sample
+    - override_func (Callable): function for overriding `grid_sample`
+
+    return:
+    - out (np.ndarray)
+
+    NOTE: acceptable dtypes for `equi` are currently uint8, float32, and float64.
+    Floats are prefered since numpy calculations are optimized for floats.
+
+    NOTE: output array has the same dtype as `equi`
+
+    NOTE: you can override `equilib`'s grid_sample with over grid sampling methods
+    using `override_func`. The input to this function have to match `grid_sample`.
+
+    """
+
+    # NOTE: Assume that the inputs `equi` and `rots` are already batched up
+    assert (
+        len(equi.shape) == 4
+    ), f"ERR: input `equi` should be 4-dim (b, c, h, w), but got {len(equi.shape)}"
+    assert len(equi) == len(
+        sub_level
+    ), f"ERR: batch size of equi and rot differs: {len(equi)} vs {len(sub_level)}"
+
+    equi_dtype = equi.dtype
+    assert equi_dtype in (np.uint8, np.float32, np.float64), (
+        f"ERR: input equirectangular image has dtype of {equi_dtype}\n"
+        f"which is incompatible: try {(np.uint8, np.float32, np.float64)}"
+    )
+
+    # NOTE: we don't want to use uint8 as output array's dtype yet since
+    # floating point calculations (matmul, etc) are faster
+    # NOTE: we are also assuming that uint8 is in range of 0-255 (obviously)
+    # and float is in range of 0.0-1.0; later we will refine it
+    # NOTE: for the sake of consistency, we will try to use the same dtype as equi
+    dtype = (
+        np.dtype(np.float32) if equi_dtype == np.dtype(np.uint8) else equi_dtype
+    )
+    assert dtype in (np.float32, np.float64), (
+        f"ERR: argument `dtype` is {dtype} which is incompatible:\n"
+        f"try {(np.float32, np.float64)}"
+    )
+
+    rots = calculate_tangent_rots(
+        subdivision_level=sub_level
+    )
+    skew = 0.0
+    z_down = False
+
+    bs, c, h_equi, w_equi = equi.shape
+
+    out_batch = [None for _ in range(bs)]
+
+    # iterate over each input image
+    for bn, (rot, img) in enumerate(zip(rots, equi)):
+        # number of icosahedron faces
+        fn = len(rot)
+        # initialize output array
+        out = np.empty((fn, c, w_face, w_face), dtype=dtype)
+
+        # create grid and transfrom matrix
+        m, G = prep_matrices(
+            height=w_face,
+            width=w_face,
+            batch=fn,
+            fov_x=fov_x,
+            skew=skew,
+            dtype=dtype,
+        )
+
+        # create batched rotation matrices
+        R = create_rotation_matrices(
+            rots=rot,
+            z_down=z_down,
+            dtype=dtype,
+        )
+
+        # rotate and transform the grid
+        M = matmul(m, G, R)
+
+        # create a pixel map grid
+        grid = convert_grid(
+            M=M,
+            h_equi=h_equi,
+            w_equi=w_equi,
+            method="robust",
+        )
+
+        # grid sample
+        func = (
+            override_func 
+            if override_func is not None
+            else numpy_grid_sample
+        )
+
+        # iterate image transformation over all grids
+        for i, grid in enumerate(grid):
+            img_b = img[None, ...]
+            grid = grid[None, ...]
+            out[i] = func(  # type: ignore
+                        img=img_b,
+                        grid=grid,
+                        out=out[None, i, ...],
+                        mode=mode,
+                    ).squeeze()
+
+        out = (
+            out.astype(equi_dtype)
+            if equi_dtype == np.dtype(np.uint8)
+            else np.clip(out, 0.0, 1.0)
+        )
+
+        # reformat the output
+        if ico_format == 'dict':
+            out = ico2dict(out)
+
+        out_batch[bn] = out
+
+    return out_batch
diff --git a/equilib/equi2ico/torch.py b/equilib/equi2ico/torch.py
new file mode 100644
index 00000000..e4c1889b
--- /dev/null
+++ b/equilib/equi2ico/torch.py
@@ -0,0 +1,329 @@
+
+from functools import lru_cache
+from typing import Dict, List, Union, Tuple
+
+import torch
+
+from equilib.grid_sample import torch_grid_sample
+from equilib.torch_utils import (
+    create_global2camera_rotation_matrix,
+    create_grid,
+    create_intrinsic_matrix,
+    create_rotation_matrices,
+    get_device,
+    calculate_tangent_rots
+)
+
+
+def ico2dict(icos: torch.Tensor) -> Dict[str, torch.Tensor]:
+    ico_dict = {}
+    for i, ico in enumerate(icos):
+        ico_dict[i] = ico
+    return ico_dict
+
+
+@lru_cache(maxsize=128)
+def create_cam2global_matrix(
+    height: int,
+    width: int,
+    fov_x: float,
+    skew: float = 0.0,
+    dtype: torch.dtype = torch.float32,
+    device: torch.device = torch.device("cpu"),
+) -> torch.Tensor:
+
+    K = create_intrinsic_matrix(
+        height=height,
+        width=width,
+        fov_x=fov_x,
+        skew=skew,
+        dtype=dtype,
+        device=device,
+    )
+    g2c_rot = create_global2camera_rotation_matrix(
+        dtype=dtype,
+        device=device,
+    )
+
+    return g2c_rot @ K.inverse()
+
+
+def prep_matrices(
+    height: int,
+    width: int,
+    batch: int,
+    fov_x: float,
+    skew: float = 0.0,
+    dtype: torch.dtype = torch.float32,
+    device: torch.device = torch.device("cpu"),
+) -> Tuple[torch.Tensor, torch.Tensor]:
+
+    m = create_grid(
+        height=height,
+        width=width,
+        batch=batch,
+        dtype=dtype,
+        device=device,
+    )
+    m = m.unsqueeze(-1)
+    G = create_cam2global_matrix(
+        height=height,
+        width=width,
+        fov_x=fov_x,
+        skew=skew,
+        dtype=dtype,
+        device=device,
+    )
+
+    return m, G
+
+
+def matmul(
+    m: torch.Tensor,
+    G: torch.Tensor,
+    R: torch.Tensor,
+) -> torch.Tensor:
+
+    M = torch.matmul(torch.matmul(R, G)[:, None, None, ...], m)
+    M = M.squeeze(-1)
+
+    return M
+
+
+def convert_grid(
+    M: torch.Tensor,
+    h_equi: int,
+    w_equi: int,
+    method: str = "robust",
+    device: torch.device = torch.device("cpu")
+) -> torch.Tensor:
+
+    # convert to rotation
+    phi = torch.asin(M[..., 2] / torch.norm(M, dim=-1))
+    theta = torch.atan2(M[..., 1], M[..., 0])
+    pi = torch.Tensor([3.14159265358979323846]).to(device=device)
+
+    if method == "robust":
+        ui = (theta - pi) * w_equi / (2 * pi)
+        uj = (phi - pi / 2) * h_equi / pi
+        ui += 0.5
+        uj += 0.5
+        ui %= w_equi
+        uj %= h_equi
+    elif method == "faster":
+        ui = (theta - pi) * w_equi / (2 * pi)
+        uj = (phi - pi / 2) * h_equi / pi
+        ui += 0.5
+        uj += 0.5
+        ui = torch.where(ui < 0, ui + w_equi, ui)
+        ui = torch.where(ui >= w_equi, ui - w_equi, ui)
+        uj = torch.where(uj < 0, uj + h_equi, uj)
+        uj = torch.where(uj >= h_equi, uj - h_equi, uj)
+    else:
+        raise ValueError(f"ERR: {method} is not supported")
+
+    # stack the pixel maps into a grid
+    grid = torch.stack((uj, ui), dim=-3)
+
+    return grid
+
+
+def run(
+    equi: torch.Tensor,
+    sub_level: List[int],
+    w_face: int,
+    fov_x: float,
+    ico_format: str,
+    mode: str,
+    backend: str = "native",
+) -> Union[List[torch.Tensor], List[Dict[str, torch.Tensor]]]:
+    """Run Equi2Pers
+
+    params:
+    - equi (np.ndarray): 4 dims (b, c, h, w)
+    - sub_level (List[int]): list of subdivision levels
+    - w_face (int): icosahedron face width
+    - fov_x (float): fov of horizontal axis in degrees
+    - mode (str): sampling mode for grid_sample
+    - override_func (Callable): function for overriding `grid_sample`
+
+    return:
+    - out (np.ndarray)
+
+    NOTE: acceptable dtypes for `equi` are currently uint8, float32, and float64.
+    Floats are prefered since numpy calculations are optimized for floats.
+
+    NOTE: output array has the same dtype as `equi`
+
+    NOTE: you can override `equilib`'s grid_sample with over grid sampling methods
+    using `override_func`. The input to this function have to match `grid_sample`.
+
+    """
+
+    # NOTE: Assume that the inputs `equi` and `rots` are already batched up
+    assert (
+        len(equi.shape) == 4
+    ), f"ERR: input `equi` should be 4-dim (b, c, h, w), but got {len(equi.shape)}"
+    assert len(equi) == len(
+        sub_level
+    ), f"ERR: batch size of equi and rot differs: {len(equi)} vs {len(sub_level)}"
+
+    equi_dtype = equi.dtype
+    assert equi_dtype in (
+        torch.uint8,
+        torch.float16,
+        torch.float32,
+        torch.float64,
+    ), (
+        f"ERR: input equirectangular image has dtype of {equi_dtype}which is\n"
+        f"incompatible: try {(torch.uint8, torch.float16, torch.float32, torch.float64)}"
+    )
+
+    # NOTE: we don't want to use uint8 as output array's dtype yet since
+    # floating point calculations (matmul, etc) are faster
+    # NOTE: we are also assuming that uint8 is in range of 0-255 (obviously)
+    # and float is in range of 0.0-1.0; later we will refine it
+    # NOTE: for the sake of consistency, we will try to use the same dtype as equi
+    if equi.device.type == "cuda":
+        dtype = torch.float32 if equi_dtype == torch.uint8 else equi_dtype
+        assert dtype in (torch.float16, torch.float32, torch.float64), (
+            f"ERR: argument `dtype` is {dtype} which is incompatible:\n"
+            f"try {(torch.float16, torch.float32, torch.float64)}"
+        )
+    else:
+        # NOTE: for cpu, it can't use half-precision
+        dtype = torch.float32 if equi_dtype == torch.uint8 else equi_dtype
+        assert dtype in (torch.float32, torch.float64), (
+            f"ERR: argument `dtype` is {dtype} which is incompatible:\n"
+            f"try {(torch.float32, torch.float64)}"
+        )
+    if backend == "native" and equi_dtype == torch.uint8:
+        # FIXME: hacky way of dealing with images that are uint8 when using
+        # torch.grid_sample
+        equi = equi.type(torch.float32)
+
+    bs, c, h_equi, w_equi = equi.shape
+    img_device = get_device(equi)
+    cpu_device = torch.device("cpu")
+
+    # FIXME: for now, calculate the grid in cpu
+    # I need to benchmark performance of it when grid is created on cuda
+    if equi.device.type == "cuda" and dtype == torch.float16:
+        tmp_dtype = torch.float32
+    else:
+        tmp_dtype = dtype
+
+    rots = calculate_tangent_rots(
+        subdivision_level=sub_level,
+        device=img_device
+    )
+    skew = 0.0
+    z_down = False
+
+    out_batch = [None for _ in range(bs)]
+
+    for bn, (rot, img) in enumerate(zip(rots, equi)):
+        # number of icosahedron faces
+        fn = len(rot)
+
+        # switch device in case if whole batch can't be allocated in gpu
+        # added to avoid out of memory error on high subdivision levels
+        device = img_device
+        mem_dtype_size = (
+            32 if tmp_dtype == torch.float32
+            else 64
+        )
+        mem_reserved = torch.cuda.memory_reserved(img_device)
+        mem_allocated = torch.cuda.memory_allocated(img_device)
+        mem_available = mem_reserved - mem_allocated
+        batch_mem_alloc_size = fn*c*w_face*w_face*mem_dtype_size
+        if mem_available <= batch_mem_alloc_size:
+            device = cpu_device
+
+        out = torch.empty(
+            (fn, c, w_face, w_face), dtype=dtype, device=device
+        )
+
+
+        # create grid and transfrom matrix
+        m, G = prep_matrices(
+            height=w_face,
+            width=w_face,
+            batch=fn,
+            fov_x=fov_x,
+            skew=skew,
+            dtype=tmp_dtype,
+            device=device,
+        )
+
+        # create batched rotation matrices
+        R = create_rotation_matrices(
+            rots=rot,
+            z_down=z_down,
+            dtype=tmp_dtype,
+            device=device,
+        )
+        
+        # In case of using gpu "matmul" fells down with CUDA out of memory error
+        #
+        # RuntimeError: CUDA out of memory. 
+        # Tried to allocate 720.00 MiB 
+        # (GPU 0; 2.00 GiB total capacity; 
+        #       486.01 MiB already allocated; 
+        #       632.93 MiB free; 
+        #       502.00 MiB reserved 
+        # in total by PyTorch)
+
+        # m = m.to(cpu_device)
+        # G = G.to(cpu_device)
+        # R = R.to(cpu_device)
+
+        # rotate and transform the grid
+        M = matmul(m, G, R)#.to(device)
+
+        # create a pixel map grid
+        grid = convert_grid(
+            M=M,
+            h_equi=h_equi,
+            w_equi=w_equi,
+            method="robust",
+            device=device
+        )
+
+        # if backend == "native":
+        #     grid = grid.to(img_device)
+        # FIXME: putting `grid` to device since `pure`'s bilinear interpolation requires it
+        # FIXME: better way of forcing `grid` to be the same dtype?
+        if equi.dtype != grid.dtype:
+            grid = grid.type(equi.dtype)
+        if equi.device != grid.device:
+            grid = grid.to(equi.device)
+
+        # iterate image transformation over all grids
+        for i, grid in enumerate(grid):
+            img_b = img[None, ...]
+            grid = grid[None, ...]
+            out[i] = torch.squeeze(
+                torch_grid_sample(  # type: ignore
+                        img=img_b,
+                        grid=grid,
+                        out=out[None, i, ...],
+                        mode=mode,
+                        backend=backend,
+                )
+            )
+
+        out = (
+            out.type(equi_dtype)
+            if equi_dtype == torch.uint8
+            else torch.clip(out, 0.0, 1.0)
+        )
+
+        # reformat the output
+        if ico_format == 'dict':
+            out = ico2dict(out)
+
+        out_batch[bn] = out.to(cpu_device)
+        #torch.cuda.empty_cache()
+
+    return out_batch
\ No newline at end of file
diff --git a/equilib/ico2equi/__init__.py b/equilib/ico2equi/__init__.py
new file mode 100644
index 00000000..5f7ce86a
--- /dev/null
+++ b/equilib/ico2equi/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
\ No newline at end of file
diff --git a/equilib/ico2equi/base.py b/equilib/ico2equi/base.py
new file mode 100644
index 00000000..a0df6720
--- /dev/null
+++ b/equilib/ico2equi/base.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+
+from typing import Dict, List, Union
+
+import numpy as np
+import torch
+
+from .numpy import (
+    run as run_numpy,
+    dict2list as dict2list_numpy
+)
+from .torch import (
+    run as run_torch,
+    dict2list as dict2list_torch
+)
+
+__all__ = ["Ico2Equi", "ico2equi"]
+
+ArrayLike = Union[np.ndarray, torch.Tensor]
+IcoMaps = Union[
+    # single
+    np.ndarray,
+    torch.Tensor,
+    # single 'list'
+    List[np.ndarray],
+    List[torch.Tensor],
+    # batch 'list'
+    List[List[np.ndarray]],
+    List[List[torch.Tensor]],
+    # single 'dict'
+    Dict[int, np.ndarray],
+    Dict[int, np.ndarray],
+    # batch 'dict'
+    List[Dict[int, np.ndarray]],
+    List[Dict[int, np.ndarray]],
+]
+
+
+class Ico2Equi(object):
+    """
+    params:
+    - height, width (int): equirectangular image size
+    - fov_x (float): fov of horizontal axis in degrees
+    - ico_format (str): input cube format("dict", "list")
+    - mode (str): interpolation mode, defaults to "bilinear"
+
+    inputs:
+    - icomap (dict, list)
+
+    returns:
+    - equi (np.ndarray, torch.Tensor)
+    """
+
+    def __init__(
+        self,
+        height: int,
+        width: int,
+        fov_x: float,
+        ico_format: str,
+        mode: str = "bilinear",
+    ) -> None:
+        self.height = height
+        self.width = width
+        self.fov_x = fov_x
+        self.ico_format = ico_format
+        self.mode = mode
+
+    def __call__(
+        self,
+        icomap: IcoMaps,
+        **kwargs,
+    ) -> ArrayLike:
+        return ico2equi(
+            icomap=icomap,
+            ico_format=self.ico_format,
+            width=self.width,
+            height=self.height,
+            fov_x=self.fov_x,
+            mode=self.mode,
+            **kwargs,
+        )
+
+
+def ico2equi(
+    icomap: IcoMaps,
+    ico_format: str,
+    height: int,
+    width: int,
+    fov_x: float,
+    mode: str = "bilinear",
+    **kwargs,
+) -> ArrayLike:
+    """
+    params:
+    - icomaps
+    - ico_format (str): ("dict", "list")
+    - height, width (int): output size
+    - fov_x (float): fov of horizontal axis in degrees
+    - mode (str): "bilinear"
+
+    return:
+    - equi (np.ndarray, torch.Tensor)
+    """
+
+    assert width % 8 == 0 and height % 8 == 0
+
+    # Try and detect which type it is ("numpy" or "torch")
+    # FIXME: any cleaner way of detecting?
+    _type = None
+    if ico_format == "dict":
+        if isinstance(icomap, dict):
+            if isinstance(icomap[0], np.ndarray):
+                _type = "numpy"
+            elif isinstance(icomap[0], torch.Tensor):
+                _type = "torch"
+        elif isinstance(icomap, list):
+            assert isinstance(icomap[0], dict)
+            if isinstance(icomap[0][0], np.ndarray):
+                _type = "numpy"
+            elif isinstance(icomap[0][0], torch.Tensor):
+                _type = "torch"
+    elif ico_format == "list":
+        assert isinstance(icomap, list)
+        if isinstance(icomap[0], list):
+            if isinstance(icomap[0][0], np.ndarray):
+                _type = "numpy"
+            elif isinstance(icomap[0][0], torch.Tensor):
+                _type = "torch"
+        else:
+            if isinstance(icomap[0], np.ndarray):
+                _type = "numpy"
+            elif isinstance(icomap[0], torch.Tensor):
+                _type = "torch"
+    assert _type is not None, "ERR: input type is not numpy or torch"
+    if _type == "numpy":
+        #TODO: add in future
+        #icomaps_batch = convert2batches(icomap, ico_format)
+        if ico_format == "dict":
+            icomap = dict2list_numpy(icomap)
+        out = run_numpy(
+            icomaps=icomap,
+            height=height,
+            width=width,
+            fov_x=fov_x,
+            mode=mode,
+            **kwargs,
+        )
+    elif _type == "torch":
+        if ico_format == "dict":
+            icomap = dict2list_torch(icomap)
+        out = run_torch(
+            icomaps=icomap,
+            height=height,
+            width=width,
+            fov_x=fov_x,
+            mode=mode,
+            **kwargs,
+        )
+    else:
+        raise ValueError("Oops something went wrong here")
+
+    if out.shape[0] == 1:
+        out = out.squeeze(0)
+
+    return out
diff --git a/equilib/ico2equi/numpy.py b/equilib/ico2equi/numpy.py
new file mode 100644
index 00000000..bdcb9ccf
--- /dev/null
+++ b/equilib/ico2equi/numpy.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import collections
+
+import numpy as np
+
+from equilib.numpy_utils import calculate_tangent_angles
+
+from equilib.grid_sample import numpy_grid_sample
+
+__all__ = ["run"]
+
+
+def dict2list(batch: List[Dict[int, np.array]]):
+    output = []
+    for d in batch:
+        od = collections.OrderedDict(sorted(d.items()))
+        output.extend(list(od.values()))
+    return output
+
+
+def ceil_max(a: np.array):
+    """ Method for rounding digits
+
+        For a > 0 returns higher, e.g. a = 1.1 output = 2
+        For a < 0 returns lower, e.g. a = -0.1 output = 1
+    """
+    return np.sign(a)*np.ceil(np.abs(a))
+
+def rodrigues(rot_vector: np.ndarray):
+    """ Rodrigues transformation 
+        https://docs.opencv.org/4.5.3/d9/d0c/group__calib3d.html#ga61585db663d9da06b68e70cfbf6a1eac
+    """
+    if rot_vector.shape == (3,):
+        # do if input is a vector
+        theta = np.linalg.norm(rot_vector)
+        i = np.eye(3)
+        if theta < 1e-9:
+            return i
+        r = rot_vector / theta
+        rr = np.tile(r, (3,1))*np.tile(r, (3,1)).T
+        rmap = np.array([
+            [0, -r[2], r[1]],
+            [r[2], 0, -r[0]],
+            [-r[1], r[0], 0]
+        ])
+        return (np.cos(theta)*i + (1-np.cos(theta))*rr + np.sin(theta)*rmap).astype(np.float32)
+    else:
+        # do if vector is a matrix
+        R = rot_vector
+        r = np.array([R[2,1]-R[1,2], R[0,2]-R[2,0], R[1,0]-R[0,1]])
+        s = np.sqrt(np.sum(r**2)/4)
+        c = (np.sum(np.eye(3)*R)-1)/2
+        c = np.clip(c, -1, 1)
+        theta_ = np.arccos(c)
+        
+        if c > 0:
+            return np.zeros(3, np.float32)
+        
+        if s < 1e-5:
+            t = (R[0,0]+1)/2
+            r[0] = np.sqrt(np.max([t, 0]))
+            t = (R[1,1]+1)/2
+            r[1] = np.sqrt(np.max([t,0]))*ceil_max(R[0,1])
+            t = (R[2,2]+1)/2
+            r[2] = np.sqrt(np.max([t,0]))*ceil_max(R[0,2])
+            abs_r = np.abs(r)
+            abs_r -= abs_r[0]
+            if (abs_r[1] > 0) and (abs_r[2] > 0) and (R[1,2] > 0 != r[1]*r[2]>0):
+                r[2] = -r[2]
+            theta_ /= np.linalg.norm(r)
+            r *= theta_
+        else:
+            vth = 1/(2*s) * theta_
+            r *= vth
+            
+        return r.reshape(3,1).astype(np.float32)
+
+
+def get_equirec(
+    img: np.ndarray, 
+    fov_x: float,
+    theta: int,
+    phi: int,
+    height: int,
+    width: int,
+    mode:str,
+):
+    _img = img
+    _height, _width = _img.shape[1:]
+    wFOV = fov_x
+    THETA = theta
+    PHI = phi
+    hFOV = float(_height) / _width * fov_x
+
+    w_len = np.tan(np.radians(wFOV / 2.0))
+    h_len = np.tan(np.radians(hFOV / 2.0))
+    #
+    # THETA is left/right angle, PHI is up/down angle, both in degree
+    #
+
+    x,y = np.meshgrid(np.linspace(-180, 180,width),np.linspace(90,-90,height))
+    
+    x_map = np.cos(np.radians(x)) * np.cos(np.radians(y))
+    y_map = np.sin(np.radians(x)) * np.cos(np.radians(y))
+    z_map = np.sin(np.radians(y))
+
+    xyz = np.stack((x_map,y_map,z_map),axis=2)
+
+    y_axis = np.array([0.0, 1.0, 0.0], np.float32)
+    z_axis = np.array([0.0, 0.0, 1.0], np.float32)
+    R1 = rodrigues(z_axis * np.radians(THETA))
+    R2 = rodrigues(np.dot(R1, y_axis) * np.radians(-PHI))
+
+    R1 = np.linalg.inv(R1)
+    R2 = np.linalg.inv(R2)
+
+    xyz = xyz.reshape([height * width, 3]).T
+    xyz = np.dot(R2, xyz)
+    xyz = np.dot(R1, xyz).T
+
+    xyz = xyz.reshape([height , width, 3])
+    inverse_mask = np.where(xyz[:,:,0]>0,1,0)
+
+    xyz[:,:] = xyz[:,:]/np.repeat(xyz[:,:,0][:, :, np.newaxis], 3, axis=2)
+    
+    
+    lon_map = np.where((-w_len<xyz[:,:,1])&(xyz[:,:,1]<w_len)&(-h_len<xyz[:,:,2])
+                &(xyz[:,:,2]<h_len),(xyz[:,:,1]+w_len)/2/w_len*_width,0)
+    lat_map = np.where((-w_len<xyz[:,:,1])&(xyz[:,:,1]<w_len)&(-h_len<xyz[:,:,2])
+                &(xyz[:,:,2]<h_len),(-xyz[:,:,2]+h_len)/2/h_len*_height,0)
+    mask = np.where((-w_len<xyz[:,:,1])&(xyz[:,:,1]<w_len)&(-h_len<xyz[:,:,2])
+                &(xyz[:,:,2]<h_len),1,0)
+
+    # TODO: FIX shapes and pipeline in sourcecode
+
+    dtype = np.float32
+    out = np.empty((1, 3, height, width), dtype=dtype)
+    grid = np.stack((lat_map, lon_map), axis=0)
+    grid = np.concatenate([grid[np.newaxis, ...]] * 1)
+    imgt = np.concatenate([_img[np.newaxis, ...]] * 1)
+    out = numpy_grid_sample(imgt, grid, out, mode=mode)
+    out = out.squeeze()
+    
+    mask = mask * inverse_mask
+    mask = np.repeat(mask[:, :, np.newaxis], 3, axis=2)
+    mask = np.transpose(mask, (2,0,1))
+    persp = out * mask
+    
+    return persp, mask
+
+
+def run(
+    icomaps: List[np.ndarray],
+    height: int,
+    width: int,
+    fov_x: float,
+    mode: str
+) -> np.ndarray:
+    """Run Ico2Equi
+
+    params:
+    - icomaps (np.ndarray)
+    - height, widht (int): output equirectangular image's size
+    - fov_x (float): fov of horizontal axis in degrees
+    - mode (str)
+
+    returns:
+    - equi (np.ndarray)
+
+    NOTE: we assume that the input `horizon` is a 4 dim array
+
+    """
+
+    assert (
+        len(icomaps) >= 1 and len(icomaps[0].shape)==4
+    ), f"ERR: `icomaps` should be 4-dim (b, fn, c, h, w), but got {icomaps.shape}"
+
+    icomaps_dtype = icomaps[0].dtype
+    assert icomaps_dtype in (np.uint8, np.float32, np.float64), (
+        f"ERR: input horizon has dtype of {icomaps_dtype}\n"
+        f"which is incompatible: try {(np.uint8, np.float32, np.float64)}"
+    )
+
+    # NOTE: we don't want to use uint8 as output array's dtype yet since
+    # floating point calculations (matmul, etc) are faster
+    # NOTE: we are also assuming that uint8 is in range of 0-255 (obviously)
+    # and float is in range of 0.0-1.0; later we will refine it
+    # NOTE: for the sake of consistency, we will try to use the same dtype as horizon
+    dtype = (
+        np.dtype(np.float32)
+        if icomaps_dtype == np.dtype(np.uint8)
+        else icomaps_dtype
+    )
+    assert dtype in (np.float32, np.float64), (
+        f"ERR: argument `dtype` is {dtype} which is incompatible:\n"
+        f"try {(np.float32, np.float64)}"
+    )
+
+    bs,c = len(icomaps), icomaps[0].shape[1]
+
+    # initialize output equi
+    out_batch = np.empty((bs, c, height, width), dtype=icomaps_dtype)
+    # calculate subdision level of input bathces
+    subdivision_levels = [int(np.log(icomap.shape[0]/20)/np.log(4)) for icomap in icomaps]
+    # calculate angles for target subdivion levels
+    angles = calculate_tangent_angles(subdivision_levels)
+
+    for bn, (imgs, angle) in enumerate(zip(icomaps, angles)):
+        angle *= -1*180/np.pi
+        out = np.empty((c, height, width), dtype=dtype)
+        # merge_image is sum of reconstructed images
+        # merge_mask is sum of latitude-longtitude masks
+        merge_image = np.zeros((c,height,width))
+        merge_mask = np.zeros((c,height,width))
+        for img,[T,P] in zip(imgs, angle):
+            img, mask = get_equirec(img,fov_x,T,P,height, width, mode=mode)
+            merge_image += img
+            merge_mask += mask
+        # result image equals to dividing sum of images on sum of masks
+        merge_mask = np.where(merge_mask==0,1,merge_mask)
+        out = np.divide(merge_image,merge_mask)
+
+        out = (
+            out.astype(icomaps_dtype)
+            if icomaps_dtype == np.dtype(np.uint8)
+            else np.clip(out, 0.0, 1.0)
+        )
+        out_batch[bn] = out
+
+    return out_batch
diff --git a/equilib/ico2equi/torch.py b/equilib/ico2equi/torch.py
new file mode 100644
index 00000000..42635d7e
--- /dev/null
+++ b/equilib/ico2equi/torch.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python3
+
+from typing import Any, Callable, Dict, List, Optional, Union
+import collections
+
+import numpy as np
+import torch
+from torch import tensor
+from torch import cos, sin, tan, arccos, sqrt, sum
+from torch import deg2rad as radians
+
+from equilib.grid_sample import torch_grid_sample
+from equilib.torch_utils import (
+    get_device,
+    pi,
+    calculate_tangent_angles
+)
+
+__all__ = ["convert2batches", "run"]
+
+def dict2list(batch: List[Dict[int, torch.Tensor]]):
+    output = []
+    for d in batch:
+        od = collections.OrderedDict(sorted(d.items()))
+        output.extend(list(od.values()))
+    return output
+
+def ceil_max(a: torch.Tensor):
+    return torch.sign(a)*torch.ceil(torch.abs(a))
+
+def rodrigues(
+    rot_vector: torch.Tensor,
+    device: torch.device = torch.device("cpu")
+    ):
+    if len(rot_vector.shape) == 1:
+        theta = torch.linalg.norm(rot_vector)
+        i = torch.eye(3, device=device)
+        if theta < 1e-9:
+            return i
+        r = rot_vector / theta
+        rr = r.tile((3,1))*r.tile((3,1)).T
+        rmap = tensor([
+            [0, -r[2], r[1]],
+            [r[2], 0, -r[0]],
+            [-r[1], r[0], 0]
+        ], device=device)
+        return (cos(theta)*i + (1-cos(theta))*rr + sin(theta)*rmap)
+    else:
+        R = torch.clone(rot_vector)
+        r = tensor([R[2,1]-R[1,2], R[0,2]-R[2,0], R[1,0]-R[0,1]])
+        s = sqrt(sum(r**2)/4)
+        c = (sum(torch.eye(3, device=device)*R)-1)/2
+        c = torch.clip(c, -1, 1)
+        theta_ = arccos(c)
+        
+        if c > 0:
+            return torch.zeros(3, dtype=torch.float32, device=device)
+        
+        if s < 1e-5:
+            t = (R[0,0]+1)/2
+            r[0] = sqrt(torch.max([t, 0]))
+            t = (R[1,1]+1)/2
+            r[1] = sqrt(torch.max([t,0]))*ceil_max(R[0,1])
+            t = (R[2,2]+1)/2
+            r[2] = sqrt(torch.max([t,0]))*ceil_max(R[0,2])
+            abs_r = torch.abs(r)
+            abs_r -= abs_r[0]
+            if (abs_r[1] > 0) and (abs_r[2] > 0) and (R[1,2] > 0 != r[1]*r[2]>0):
+                r[2] = -r[2]
+            theta_ /= torch.linalg.norm(r)
+            r *= theta_
+        else:
+            vth = 1/(2*s) * theta_
+            r *= vth
+            
+        return r.reshape(3,1)
+
+
+def get_equirec(
+    img: torch.Tensor, 
+    fov_x: float,
+    theta: int,
+    phi: int,
+    height: int,
+    width: int,
+    device: torch.device = torch.device("cpu")
+):
+    device = img.device
+    _img = img
+    _height = tensor(_img.shape[1], dtype=torch.float32, device=device)
+    _width = tensor(_img.shape[2], dtype=torch.float32, device=device) 
+    wFOV = tensor(fov_x, dtype=torch.float32, device=device)
+    hFOV = _height / _width * fov_x
+
+    w_len = tan(radians(wFOV / 2.0))
+    h_len = tan(radians(hFOV / 2.0))
+    #
+    # THETA is left/right angle, PHI is up/down angle, both in degree
+    #
+
+    x,y = torch.meshgrid(
+        torch.linspace(-180, 180,width, device=device),
+        torch.linspace(90,-90,height, device=device)
+    )
+    
+    x_map = cos(radians(x)) * cos(radians(y))
+    y_map = sin(radians(x)) * cos(radians(y))
+    z_map = sin(radians(y))
+
+    xyz = torch.stack((x_map,y_map,z_map))
+    xyz = torch.permute(xyz, (2,1,0))
+
+    y_axis = tensor([0.0, 1.0, 0.0], dtype=torch.float32, device=device)
+    z_axis = tensor([0.0, 0.0, 1.0], dtype=torch.float32, device=device)
+    R1 = rodrigues(z_axis * radians(theta), device=device)
+    R2 = rodrigues(torch.mv(R1, y_axis) * radians(-phi), device=device)
+
+    R1 = torch.linalg.inv(R1)
+    R2 = torch.linalg.inv(R2)
+
+    xyz = xyz.reshape([height * width, 3]).T
+    xyz = torch.mm(R2, xyz)
+    xyz = torch.mm(R1, xyz).T
+
+    xyz = xyz.reshape([height , width, 3])
+    inverse_mask = torch.where(xyz[:,:,0]>0,1,0)
+    
+    xyz[:,:] = xyz[:,:]/xyz[:,:,0].unsqueeze(2).repeat(1,1,3)
+    
+    
+    lon_map = torch.where((-w_len<xyz[:,:,1])&(xyz[:,:,1]<w_len)&(-h_len<xyz[:,:,2])
+                &(xyz[:,:,2]<h_len),(xyz[:,:,1]+w_len)/2/w_len*_width,tensor(0, dtype=torch.float32, device=device))
+    lat_map = torch.where((-w_len<xyz[:,:,1])&(xyz[:,:,1]<w_len)&(-h_len<xyz[:,:,2])
+                &(xyz[:,:,2]<h_len),(-xyz[:,:,2]+h_len)/2/h_len*_height,tensor(0, dtype=torch.float32, device=device))
+    mask = torch.where((-w_len<xyz[:,:,1])&(xyz[:,:,1]<w_len)&(-h_len<xyz[:,:,2])
+                &(xyz[:,:,2]<h_len),tensor(1, dtype=torch.float32, device=device),tensor(0, dtype=torch.float32, device=device))
+
+    # TODO: FIX shapes and pipeline in sourcecode
+
+    out = torch.empty((1, 3, height, width), dtype=torch.float32, device=device)
+    grid = torch.stack((lat_map, lon_map))
+    grid = grid.unsqueeze(0)
+    imgt = _img.unsqueeze(0)
+    out = torch_grid_sample(imgt, grid, out, "bilinear")
+    out = out.squeeze()
+    
+    mask = mask * inverse_mask
+    mask = mask.unsqueeze(2).repeat(1,1,3)
+    mask = torch.permute(mask, (2,0,1))
+    persp = out * mask
+    
+    return persp, mask
+
+
+def run(
+    icomaps: List[torch.Tensor],
+    height: int,
+    width: int,
+    fov_x: float,
+    mode: str = 'list',
+    backend: str = 'native'
+) -> torch.Tensor:
+    """Run Ico2Equi
+
+    params:
+    - icomaps (List[torch.Tensor])
+    - height, widht (int): output equirectangular image's size
+    - fov_x (float): fov of horizontal axis in degrees
+    - mode (str)
+
+    returns:
+    - equi (torch.Tensor)
+
+    NOTE: we assume that the input `horizon` is a 4 dim array
+
+    """
+
+    assert (
+        len(icomaps[0].shape) == 4
+    ), f"ERR: `horizon` should be 4-dim (b, c, h, w), but got {icomaps[0].shape}"
+
+    icomaps_dtype = icomaps[0].dtype
+    assert icomaps_dtype in (
+        torch.uint8,
+        torch.float16,
+        torch.float32,
+        torch.float64,
+    ), (
+        f"ERR: input horizon has dtype of {icomaps_dtype}which is\n"
+        f"incompatible: try {(torch.uint8, torch.float16, torch.float32, torch.float64)}"
+    )
+
+    # NOTE: we don't want to use uint8 as output array's dtype yet since
+    # floating point calculations (matmul, etc) are faster
+    # NOTE: we are also assuming that uint8 is in range of 0-255 (obviously)
+    # and float is in range of 0.0-1.0; later we will refine it
+    # NOTE: for the sake of consistency, we will try to use the same dtype as horizon
+    if icomaps[0].device.type == "cuda":
+        dtype = torch.float32 if icomaps_dtype == torch.uint8 else icomaps_dtype
+        assert dtype in (torch.float16, torch.float32, torch.float64), (
+            f"ERR: argument `dtype` is {dtype} which is incompatible:\n"
+            f"try {(torch.float16, torch.float32, torch.float64)}"
+        )
+    else:
+        # NOTE: for cpu, it can't use half-precision
+        dtype = torch.float32 if icomaps_dtype == torch.uint8 else icomaps_dtype
+        assert dtype in (torch.float32, torch.float64), (
+            f"ERR: argument `dtype` is {dtype} which is incompatible:\n"
+            f"try {(torch.float32, torch.float64)}"
+        )
+    if backend == "native" and icomaps_dtype == torch.uint8:
+        # FIXME: hacky way of dealing with images that are uint8 when using
+        # torch.grid_sample
+        for i, ico in enumerate(icomaps):
+            icomaps[i] = ico.type(torch.float32)
+
+    bs, c = len(icomaps), icomaps[0].shape[1]
+    device = get_device(icomaps[0])
+    cpu_device = torch.device("cpu")
+
+    # initialize output equi
+    out_batch = torch.empty((bs, c, height, width), dtype=icomaps_dtype, device=device)
+    # calculate subdision level of input bathces
+    subdivision_levels = [int(np.log(icomap.shape[0]/20)/np.log(4)) for icomap in icomaps]
+    # calculate angles for target subdivion levels
+    angles = calculate_tangent_angles(subdivision_levels, device=device)
+    # torch routine
+    zero = tensor(0, dtype=dtype, device=device)
+    one = tensor(1, dtype=dtype, device=device)
+
+    for bn, (imgs, angle) in enumerate(zip(icomaps, angles)):
+        angle *= -1*180/torch.clone(pi).to(dtype=dtype, device=device) 
+        out = torch.empty((c, height, width), dtype=dtype, device=device)
+        # merge_image is sum of reconstructed images
+        # merge_mask is sum of latitude-longtitude masks
+        merge_image = torch.zeros((c,height,width), dtype=dtype, device=device)
+        merge_mask = torch.zeros((c,height,width), dtype=dtype, device=device)
+        for img,[T,P] in zip(imgs, angle):
+            img, mask = get_equirec(img,fov_x,T,P,height, width, device=device)
+            merge_image += img
+            merge_mask += mask
+        # result image equals to dividing sum of images on sum of masks
+        merge_mask = torch.where(merge_mask==zero,one,merge_mask)
+        out = np.divide(merge_image,merge_mask)
+
+        out = (
+            out.type(icomaps_dtype)
+            if icomaps_dtype == torch.uint8
+            else torch.clip(out, 0.0, 1.0)
+        )
+        out_batch[bn] = out
+
+    return out_batch
diff --git a/equilib/numpy_utils/__init__.py b/equilib/numpy_utils/__init__.py
index 140ea301..5e4f028c 100644
--- a/equilib/numpy_utils/__init__.py
+++ b/equilib/numpy_utils/__init__.py
@@ -12,6 +12,10 @@
     create_rotation_matrix,
     create_rotation_matrix_at_once,
 )
+from .icosahedron import (
+    calculate_tangent_rots,
+    calculate_tangent_angles
+)
 
 __all__ = [
     "create_grid",
@@ -22,4 +26,6 @@
     "create_rotation_matrix",
     "create_rotation_matrix_at_once",
     "create_xyz_grid",
+    "calculate_tangent_rots",
+    "calculate_tangent_angles",
 ]
diff --git a/equilib/numpy_utils/icosahedron.py b/equilib/numpy_utils/icosahedron.py
new file mode 100644
index 00000000..0ec606b7
--- /dev/null
+++ b/equilib/numpy_utils/icosahedron.py
@@ -0,0 +1,209 @@
+from typing import List, Dict
+import numpy as np
+
+def calculate_tangent_rots(
+    subdivision_level: List[int]
+    ) -> List[Dict[str, float]]:
+    """ Calculate rotation parameters for transformation
+
+        params:
+            subdivision_level (int): set number of faces, which equals to: 
+                    num_of_faces = 20*(4^b), where b=subdivision_level
+        returns:
+            rots (List[[dict]]): list of dicts with rots parameters.
+
+    """
+    rots = [[] for _ in subdivision_level]
+    radius = 10
+    for i, sub_lvl in enumerate(subdivision_level):
+        faces = init_20_faces(radius)
+        for _ in range(0, sub_lvl):
+            faces = subdivide_faces(faces)
+        angles = calculate_angles(faces)
+        for ang in angles:
+            rots[i].append(
+                {
+                    'roll': 0.,
+                    'pitch': ang[1],
+                    'yaw': ang[0]
+                }
+            )
+    return rots
+
+def calculate_tangent_angles(
+    subdivision_level: List[int]
+    ) -> List[np.ndarray]:
+    """ Calculate angles to icosahedron faces
+
+        params:
+            subdivision_level (int): set number of faces, which equals to: 
+                    num_of_faces = 20*(4^b), where b=subdivision_level
+        returns:
+            angles (List[[np.ndarray]]): list of angles.
+
+    """
+    angles = [[] for _ in subdivision_level]
+    radius = 10
+    for i, sub_lvl in enumerate(subdivision_level):
+        faces = init_20_faces(radius)
+        for _ in range(0, sub_lvl):
+            faces = subdivide_faces(faces)
+        angles[i] = calculate_angles(faces)
+    return angles
+
+def calculate_angles(faces:np.ndarray) -> np.ndarray:
+    """ Calculate angles from (0,0,0) point to centroid of input faces
+
+        params:
+            faces: array of triplets with coordinates of icosahedron faces. Each face
+            represented as array of shape [n,3,3], where n - number of faces.
+
+        returns:
+            angles: array of longtitude and latitude angles to input faces. Returns 
+            array of shape [n,2]
+    
+    """
+    centroids = calculate_centroids(faces)
+    angles = np.zeros((centroids.shape[0], 2))
+    for i, c in enumerate(centroids):
+        lat = calculate_lat(c, [c[0], c[1], 0])
+        lon = calculate_lon(c[:2], [0,1]) 
+        angles[i] = [lon, lat]
+    angles[:, 1] *= np.sign(centroids[:, 2])
+    return angles * -1
+
+def subdivide_faces(faces: np.ndarray):
+    """ Subdivide faces of icosahedron on next level.
+
+        params:
+            faces: array of triplets with coordinates of icosahedron faces. Each face
+            represented as array of shape [n,3,3], where n - number of faces.
+
+        returns:
+            new_faces: array of subdivided faces. Returns array of shape [n*4,3,3]
+    
+    """
+    new_faces = np.zeros((faces.shape[0]*4,3,3))
+    for i, face in enumerate(faces):
+        v1,v2,v3 = face
+
+        v1_new = __computeHalfVertex(v1, v2)
+        v2_new = __computeHalfVertex(v2, v3)
+        v3_new = __computeHalfVertex(v1, v3)
+
+        # add 4 new triangles to vertex array
+        new_faces[i*4] = np.array([v1,    v1_new, v3_new])
+        new_faces[i*4+1] = np.array([v1_new, v2,    v2_new])
+        new_faces[i*4+2] = np.array([v1_new, v2_new, v3_new])
+        new_faces[i*4+3] = np.array([v3_new, v2_new, v3])
+    return new_faces
+
+def init_icosahedron_vertices(radius:int=10) -> np.ndarray:
+    """ Calculate icosahedron vertices at zero subdivision_level. 
+        Used for icosahedron initialization
+
+        params:
+            radius: radius of icosahedron sphere. Can be removed with constant in all methods.
+
+        returns:
+            vertices: points of initialized icosahedron, array of shape [20,3]
+
+    """
+    PI, r = np.pi, radius
+    H_ANGLE, V_ANGLE = PI / 180 * 72, np.arctan(1.0 / 2)
+    vertices = np.zeros((12,3))                         
+    hAngle1, hAngle2 = -PI / 2 - H_ANGLE / 2, -PI / 2
+
+    vertices[0] = (0,0,r)
+    vertices[-1] = (0,0,-r)
+    
+    xy,z = np.ones((10,2)), np.ones((10,1))
+    xy *= r*np.cos(V_ANGLE)
+    z *= r*np.sin(V_ANGLE)
+    h1 = (np.arange(0,5) * H_ANGLE) + hAngle1
+    h2 = (np.arange(0,5) * H_ANGLE) + hAngle2
+    xy[:5,0] = xy[:5,0] * np.cos(h1)
+    xy[:5,1] = xy[:5,1] * np.sin(h1)
+    xy[5:,0] = xy[5:,0] * np.cos(h2)
+    xy[5:,1] = xy[5:,1] * np.sin(h2)
+    z[5:] = -z[5:]
+    xyz = np.hstack([xy,z])
+    
+    vertices[1:-1] = xyz
+    return vertices
+
+def init_20_faces(radius:int=10):
+    """ Calculate icosahedron faces at zero subdivision level. 
+        Used for icosahedron initialization
+
+        params:
+            radius: radius of icosahedron sphere. Can be removed with constant in all methods.
+
+        returns:
+            faces: coordinates of icosahedron faces, array of shape [20,3,3]
+
+    """
+    vertices = init_icosahedron_vertices(radius)
+    faces = np.zeros((20,3,3))
+    upper_pent = vertices[1:6]
+    down_pent = vertices[-6:]
+    for i in range(1, 6):
+        faces[i-1] = np.array([vertices[0], upper_pent[i%5], upper_pent[i-1]]).reshape((1,3,3))
+        faces[-i] = np.array([vertices[-1], down_pent[i%5], down_pent[i-1]]).reshape((1,3,3))
+    # head, middle and tail pointers for triangles
+    ptr_h, ptr_m, ptr_t = -1, 0, 0
+    for i in range(1, 11):
+        if i%2 != 0:
+            ptr_h += 1
+            ptr_t += 1
+            faces[i+4] = np.array([upper_pent[ptr_h], down_pent[ptr_m%5], upper_pent[ptr_t%5]]).reshape((1,3,3))
+        if i%2 == 0:
+            ptr_m += 1
+            faces[i+4] = np.array([down_pent[ptr_h], upper_pent[ptr_m%5], down_pent[ptr_t%5]]).reshape((1,3,3))
+    return faces
+
+
+def calculate_centroids(faces:np.ndarray):
+    """ Calculate centroids of faces
+
+        params:
+            faces: array of faces
+        
+        returns:
+            centroids: array of centroid cordinates for input faces. Output array
+            with shape of [n,3], where n - number of input faces
+    """
+    centroids = np.zeros((faces.shape[0],3))
+    for i, face in enumerate(faces):
+        centroids[i] = face.sum(axis=0)/3
+    return centroids
+    
+def __computeHalfVertex(v1, v2, radius:int = 10):
+    """ Compute half of icosahedron vertices
+
+        params:
+            v1,v2 (np.ndarray): two vertices points
+            radius: radius of icosahedron sphere. Can be removed with constant in all methods.
+
+        returns:
+            new_v (np.ndarray): coordinates of half verice scaled on scale factor 
+    """
+    new_v = v1+v2
+    scale = radius / np.sqrt(np.sum(new_v**2))
+    return new_v * scale
+    
+
+def calculate_lat(vec1: np.ndarray, vec2: np.ndarray):
+    """ Calculate latitude between two vectors
+    """
+    unit_vector_1 = vec1 / np.linalg.norm(vec1)
+    unit_vector_2 = vec2 / np.linalg.norm(vec2)
+    dot_product = np.dot(unit_vector_1, unit_vector_2)
+    return np.arccos(dot_product)
+
+def calculate_lon(vec1: np.ndarray, vec2: np.ndarray):
+    """ Calculate longtitude between two vectors
+    """
+    dot = np.dot(vec1, vec2)
+    det = np.linalg.det(np.vstack([vec1, vec2]))
+    return np.arctan2(det, dot)
\ No newline at end of file
diff --git a/equilib/torch_utils/__init__.py b/equilib/torch_utils/__init__.py
index 07fd071b..52f34fa9 100644
--- a/equilib/torch_utils/__init__.py
+++ b/equilib/torch_utils/__init__.py
@@ -13,6 +13,10 @@
     create_rotation_matrix_at_once,
 )
 from .func import get_device, sizeof
+from .icosahedron import (
+    calculate_tangent_rots,
+    calculate_tangent_angles
+)
 
 __all__ = [
     "create_global2camera_rotation_matrix",
@@ -26,4 +30,6 @@
     "get_device",
     "sizeof",
     "pi",
+    "calculate_tangent_rots",
+    "calculate_tangent_angles",
 ]
diff --git a/equilib/torch_utils/grid.py b/equilib/torch_utils/grid.py
index cb4070cd..38cba691 100644
--- a/equilib/torch_utils/grid.py
+++ b/equilib/torch_utils/grid.py
@@ -4,7 +4,7 @@
 
 import torch
 
-from equilib.torch_utils.intrinsic import pi
+from  torch_utils.intrinsic import pi
 
 
 def create_grid(
diff --git a/equilib/torch_utils/icosahedron.py b/equilib/torch_utils/icosahedron.py
new file mode 100644
index 00000000..dae1549b
--- /dev/null
+++ b/equilib/torch_utils/icosahedron.py
@@ -0,0 +1,238 @@
+from typing import List, Dict
+import torch
+from torch import tensor
+from torch._C import device
+from torch_utils import pi
+
+def calculate_tangent_rots(
+    subdivision_level: List[int],
+    device: torch.device = torch.device("cpu")
+    ) -> List[Dict[str, float]]:
+    """ Calculate rotation parameters for transformation
+
+        params:
+            subdivision_level (int): set number of faces, which equals to: 
+                    num_of_faces = 20*(4^b), where b=subdivision_level
+        returns:
+            rots (List[[dict]]): list of dicts with rots parameters.
+
+    """
+    rots = [[] for _ in subdivision_level]
+    radius = 10
+    for i, sub_lvl in enumerate(subdivision_level):
+        faces = init_20_faces(radius, device=device)
+        for _ in range(0, sub_lvl):
+            faces = subdivide_faces(faces, radius, device=device)
+        angles = calculate_angles(faces, device=device)
+        for ang in angles:
+            rots[i].append(
+                {
+                    'roll': 0.,
+                    'pitch': ang[1],
+                    'yaw': ang[0]
+                }
+            )
+    return rots
+
+def calculate_tangent_angles(
+    subdivision_level: List[int],
+    device: torch.device = torch.device("cpu")
+    ) -> List[torch.Tensor]:
+    """ Calculate angles to icosahedron faces
+
+        params:
+            subdivision_level (int): set number of faces, which equals to: 
+                    num_of_faces = 20*(4^b), where b=subdivision_level
+        returns:
+            angles (List[[np.ndarray]]): list of angles.
+
+    """
+    angles = [[] for _ in subdivision_level]
+    radius = 10
+    for i, sub_lvl in enumerate(subdivision_level):
+        faces = init_20_faces(radius, device=device)
+        for _ in range(0, sub_lvl):
+            faces = subdivide_faces(faces, radius, device=device)
+        angles[i] = calculate_angles(faces, device=device)
+    return angles
+
+def calculate_angles(
+    faces: torch.Tensor,
+    device: torch.device=torch.device("cpu")
+    ) -> torch.Tensor:
+    """ Calculate angles from (0,0,0) point to centroid of input faces
+
+        params:
+            faces: array of triplets with coordinates of icosahedron faces. Each face
+            represented as array of shape [n,3,3], where n - number of faces.
+
+        returns:
+            angles: array of longtitude and latitude angles to input faces. Returns 
+            array of shape [n,2]
+    
+    """
+    centroids = calculate_centroids(faces, device)
+    angles = torch.zeros(centroids.shape[0], 2, device=device)
+    for i, c in enumerate(centroids):
+        lat = calculate_lat(c, tensor([c[0], c[1], 0], dtype=torch.float32, device=device))
+        lon = calculate_lon(c[:2], tensor([0,1], dtype=torch.float32, device=device)) 
+        angles[i] = tensor([lon, lat])
+    angles[:, 1] *= torch.sign(centroids[:, 2])
+    return angles * -1
+
+def subdivide_faces(
+    faces: torch.Tensor, 
+    radius: int=10, 
+    device: torch.device = torch.device("cpu")
+    ):
+    """ Subdivide faces of icosahedron on next level.
+
+        params:
+            faces: array of triplets with coordinates of icosahedron faces. Each face
+            represented as array of shape [n,3,3], where n - number of faces.
+
+        returns:
+            new_faces: array of subdivided faces. Returns array of shape [n*4,3,3]
+    
+    """
+    new_faces = torch.zeros(faces.shape[0]*4,3,3, device=device)
+    for i, face in enumerate(faces):
+        v1,v2,v3 = face
+
+        v1_new = __computeHalfVertex(v1, v2)
+        v2_new = __computeHalfVertex(v2, v3)
+        v3_new = __computeHalfVertex(v1, v3)
+
+        # add 4 new triangles to vertex array
+        new_faces[i*4] = torch.vstack([v1,    v1_new, v3_new])
+        new_faces[i*4+1] = torch.vstack([v1_new, v2,    v2_new])
+        new_faces[i*4+2] = torch.vstack([v1_new, v2_new, v3_new])
+        new_faces[i*4+3] = torch.vstack([v3_new, v2_new, v3])
+    return new_faces
+
+def init_icosahedron_vertices(
+    radius: int = 10,
+    device: torch.device = torch.device("cpu")
+    ) -> torch.Tensor:
+    """ Calculate icosahedron vertices at zero subdivision_level. 
+        Used for icosahedron initialization
+
+        params:
+            radius: radius of icosahedron sphere. Can be removed with constant in all methods.
+
+        returns:
+            vertices: points of initialized icosahedron, array of shape [20,3]
+
+    """
+    PI, r = pi.to(device), radius
+    H_ANGLE = torch.clone(PI) / 180 * 72
+    V_ANGLE = torch.arctan(tensor(1.0 / 2, device=device))
+    vertices = torch.zeros(12,3, device=device)                        
+    hAngle1, hAngle2 = -PI / 2 - H_ANGLE / 2, -PI / 2
+
+    vertices[0] = tensor([0,0,r], device=device)
+    vertices[-1] = tensor([0,0,-r], device=device)
+    
+    xy,z = torch.ones(10,2, device=device), torch.ones(10,1, device=device)
+    xy *= r*torch.cos(V_ANGLE)
+    z *= r*torch.sin(V_ANGLE)
+    h1 = (torch.arange(0,5, device=device) * H_ANGLE) + hAngle1
+    h2 = (torch.arange(0,5, device=device) * H_ANGLE) + hAngle2
+    xy[:5,0] = xy[:5,0] * torch.cos(h1)
+    xy[:5,1] = xy[:5,1] * torch.sin(h1)
+    xy[5:,0] = xy[5:,0] * torch.cos(h2)
+    xy[5:,1] = xy[5:,1] * torch.sin(h2)
+    z[5:] = -z[5:]
+    xyz = torch.hstack([xy,z])
+    
+    vertices[1:-1] = xyz
+    return vertices
+
+def init_20_faces(
+    radius: int = 10,
+    device: torch.device = torch.device("cpu")
+    ):
+    """ Calculate icosahedron faces at zero subdivision level. 
+        Used for icosahedron initialization
+
+        params:
+            radius: radius of icosahedron sphere. Can be removed with constant in all methods.
+
+        returns:
+            faces: coordinates of icosahedron faces, array of shape [20,3,3]
+
+    """
+    vertices = init_icosahedron_vertices(radius, device)
+    faces = torch.zeros(20,3,3, device=device)
+    upper_pent = vertices[1:6]
+    down_pent = vertices[-6:]
+    for i in range(1, 6):
+        faces[i-1] = torch.vstack([vertices[0], upper_pent[i%5], upper_pent[i-1]])
+        faces[-i] = torch.vstack([vertices[-1], down_pent[i%5], down_pent[i-1]])
+    # head, middle and tail pointers for triangles
+    ptr_h, ptr_m, ptr_t = -1, 0, 0
+    for i in range(1, 11):
+        if i%2 != 0:
+            ptr_h += 1
+            ptr_t += 1
+            faces[i+4] = torch.vstack([upper_pent[ptr_h], down_pent[ptr_m%5], upper_pent[ptr_t%5]]).reshape((1,3,3))
+        if i%2 == 0:
+            ptr_m += 1
+            faces[i+4] = torch.vstack([down_pent[ptr_h], upper_pent[ptr_m%5], down_pent[ptr_t%5]]).reshape((1,3,3))
+    return faces
+
+
+def calculate_centroids(
+    faces: torch.Tensor,
+    device: torch.device = torch.device("cpu")
+    ):
+    """ Calculate centroids of faces
+
+        params:
+            faces: array of faces
+        
+        returns:
+            centroids: array of centroid cordinates for input faces. Output array
+            with shape of [n,3], where n - number of input faces
+    """
+    centroids = torch.zeros((faces.shape[0],3), device=device)
+    for i, face in enumerate(faces):
+        centroids[i] = face.sum(axis=0)/3
+    return centroids
+    
+def __computeHalfVertex(v1, v2, radius:int = 10):
+    """ Compute half of icosahedron vertices
+
+        params:
+            v1,v2 (np.ndarray): two vertices points
+            radius: radius of icosahedron sphere. Can be removed with constant in all methods.
+
+        returns:
+            new_v (np.ndarray): coordinates of half verice scaled on scale factor 
+    """
+    new_v = v1+v2
+    scale = radius / torch.sqrt(torch.sum(new_v**2))
+    return new_v * scale
+    
+
+def calculate_lat(
+    vec1: torch.Tensor,
+    vec2: torch.Tensor, 
+    device: torch.device = torch.device("cpu")
+    ):
+    """ Calculate latitude between two vectors
+    """
+    unit_vector_1 = (vec1 / torch.linalg.norm(vec1)).to(device)
+    unit_vector_2 = (vec2 / torch.linalg.norm(vec2)).to(device)
+    dot_product = torch.dot(unit_vector_1, unit_vector_2)
+    return torch.arccos(dot_product)
+
+def calculate_lon(
+    vec1: torch.Tensor, 
+    vec2: torch.Tensor
+    ):
+    """ Calculate longtitude between two vectors
+    """
+    dot = torch.dot(vec1, vec2)
+    det = torch.linalg.det(torch.vstack([vec1, vec2]))
+    return torch.atan2(det, dot)
\ No newline at end of file
diff --git a/equilib/torch_utils/rotation.py b/equilib/torch_utils/rotation.py
index 7230f67b..a3568990 100644
--- a/equilib/torch_utils/rotation.py
+++ b/equilib/torch_utils/rotation.py
@@ -7,6 +7,13 @@
 import torch
 
 
+def sin(digit):
+    return torch.sin(torch.tensor([digit])).item()
+
+def cos(digit):
+    return torch.cos(torch.tensor([digit])).item()
+
+
 def create_global2camera_rotation_matrix(
     dtype: torch.dtype = torch.float32,
     device: torch.device = torch.device("cpu"),
@@ -55,8 +62,8 @@ def create_rotation_matrix(
     R_x = torch.tensor(
         [
             [1.0, 0.0, 0.0],
-            [0.0, np.cos(roll), -np.sin(roll)],
-            [0.0, np.sin(roll), np.cos(roll)],
+            [0.0, cos(roll), -sin(roll)],
+            [0.0, sin(roll), cos(roll)],
         ],
         dtype=dtype,
     )
@@ -65,9 +72,9 @@ def create_rotation_matrix(
         pitch = -pitch
     R_y = torch.tensor(
         [
-            [np.cos(pitch), 0.0, np.sin(pitch)],
+            [cos(pitch), 0.0, sin(pitch)],
             [0.0, 1.0, 0.0],
-            [-np.sin(pitch), 0.0, np.cos(pitch)],
+            [-sin(pitch), 0.0, cos(pitch)],
         ],
         dtype=dtype,
     )
@@ -76,8 +83,8 @@ def create_rotation_matrix(
         yaw = -yaw
     R_z = torch.tensor(
         [
-            [np.cos(yaw), -np.sin(yaw), 0.0],
-            [np.sin(yaw), np.cos(yaw), 0.0],
+            [cos(yaw), -sin(yaw), 0.0],
+            [sin(yaw), cos(yaw), 0.0],
             [0.0, 0.0, 1.0],
         ],
         dtype=dtype,
@@ -115,23 +122,23 @@ def create_rotation_matrix_at_once(
     return torch.tensor(
         [
             [
-                np.cos(yaw) * np.cos(pitch),
-                np.cos(yaw) * np.sin(pitch) * np.sin(roll)
-                - np.sin(yaw) * np.cos(roll),
-                np.cos(yaw) * np.sin(pitch) * np.cos(roll)
-                + np.sin(yaw) * np.sin(roll),
+                cos(yaw) * cos(pitch),
+                cos(yaw) * sin(pitch) * sin(roll)
+                - sin(yaw) * cos(roll),
+                cos(yaw) * sin(pitch) * cos(roll)
+                + sin(yaw) * sin(roll),
             ],
             [
-                np.sin(yaw) * np.cos(pitch),
-                np.sin(yaw) * np.sin(yaw) * np.sin(pitch) * np.sin(roll)
-                + np.cos(yaw) * np.cos(roll),
-                np.sin(yaw) * np.sin(pitch) * np.cos(roll)
-                - np.cos(yaw) * np.sin(roll),
+                sin(yaw) * cos(pitch),
+                sin(yaw) * sin(yaw) * sin(pitch) * sin(roll)
+                + cos(yaw) * cos(roll),
+                sin(yaw) * sin(pitch) * cos(roll)
+                - cos(yaw) * sin(roll),
             ],
             [
-                -np.sin(pitch),
-                np.cos(pitch) * np.sin(roll),
-                np.cos(pitch) * np.cos(roll),
+                -sin(pitch),
+                cos(pitch) * sin(roll),
+                cos(pitch) * cos(roll),
             ],
         ],
         dtype=dtype,
@@ -181,8 +188,8 @@ def create_rotation_matrix_dep(
     R_x = torch.tensor(
         [
             [1.0, 0.0, 0.0],
-            [0.0, np.cos(x), -np.sin(x)],
-            [0.0, np.sin(x), np.cos(x)],
+            [0.0, cos(x), -sin(x)],
+            [0.0, sin(x), cos(x)],
         ],
         dtype=dtype,
     )
@@ -191,9 +198,9 @@ def create_rotation_matrix_dep(
         y = -y
     R_y = torch.tensor(
         [
-            [np.cos(y), 0.0, -np.sin(y)],
+            [cos(y), 0.0, -sin(y)],
             [0.0, 1.0, 0.0],
-            [np.sin(y), 0.0, np.cos(y)],
+            [sin(y), 0.0, cos(y)],
         ],
         dtype=dtype,
     )
@@ -202,8 +209,8 @@ def create_rotation_matrix_dep(
         z = -z
     R_z = torch.tensor(
         [
-            [np.cos(z), np.sin(z), 0.0],
-            [-np.sin(z), np.cos(z), 0.0],
+            [cos(z), sin(z), 0.0],
+            [-sin(z), cos(z), 0.0],
             [0.0, 0.0, 1.0],
         ],
         dtype=dtype,