Skip to content

Commit

Permalink
Updated base to return np.array instead of memmap
Browse files Browse the repository at this point in the history
  • Loading branch information
shenoynikhil committed Sep 26, 2023
1 parent bd2e008 commit 77f1efe
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 5 deletions.
14 changes: 10 additions & 4 deletions src/openqdc/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import numpy as np
import torch
from loguru import logger
from sklearn.utils import Bunch
from tqdm import tqdm

Expand Down Expand Up @@ -124,6 +125,7 @@ def collate_list(self, list_entries):

def save_preprocess(self, data_dict):
# save memmaps
logger.info("Preprocessing data and saving it to cache.")
for key in self.data_keys:
local_path = p_join(self.preprocess_path, f"{key}.mmap")
out = np.memmap(local_path, mode="w+", dtype=data_dict[key].dtype, shape=data_dict[key].shape)
Expand All @@ -140,6 +142,7 @@ def save_preprocess(self, data_dict):
push_remote(local_path)

def read_preprocess(self):
logger.info("Reading preprocessed data")
self.data = {}
for key in self.data_keys:
filename = p_join(self.preprocess_path, f"{key}.mmap")
Expand Down Expand Up @@ -172,14 +175,17 @@ def __len__(self):
def __getitem__(self, idx: int):
p_start, p_end = self.data["position_idx_range"][idx]
input = self.data["atomic_inputs"][p_start:p_end]
z, c, positions = input[:, 0], input[:, 1], input[:, -3:]
z, c = z.astype(np.int32), c.astype(np.int32)
energies = self.data["energies"][idx]
z, c, positions, energies = (
np.array(input[:, 0], dtype=np.int32),
np.array(input[:, 1], dtype=np.int32),
np.array(input[:, -3:], dtype=np.float32),
np.array(self.data["energies"][idx], dtype=np.float32),
)
name = self.data["name"]["uniques"][self.data["name"]["inv_indices"][idx]]
subset = self.data["subset"]["uniques"][self.data["subset"]["inv_indices"][idx]]

if "forces" in self.data:
forces = self.data["forces"][p_start:p_end]
forces = np.array(self.data["forces"][p_start:p_end], dtype=np.float32)
else:
forces = None

Expand Down
2 changes: 1 addition & 1 deletion src/openqdc/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def push_remote(local_path, overwrite=True):
return remote_path


def pull_locally(local_path, overwrite=True):
def pull_locally(local_path, overwrite=False):
remote_path = local_path.replace(get_local_cache(), get_remote_cache())
os.makedirs(os.path.dirname(local_path), exist_ok=True)
if not os.path.exists(local_path) or overwrite:
Expand Down

0 comments on commit 77f1efe

Please sign in to comment.