Skip to content

Commit

Permalink
alternative scratch
Browse files Browse the repository at this point in the history
  • Loading branch information
hczhai committed Dec 9, 2024
1 parent ac5badc commit 5cc88cd
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 14 deletions.
23 changes: 23 additions & 0 deletions pyblock2/driver/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,8 @@ def __init__(
restart_dir=None,
restart_dir_per_sweep=None,
mps_dir=None,
scratch_quota=None,
alt_scratch=None,
n_threads=None,
n_mkl_threads=1,
symm_type=SymmetryTypes.SU2,
Expand Down Expand Up @@ -596,6 +598,11 @@ def __init__(
mps_dir : None or str
If not None, MPS will be stored in the given directory instead of the scratch directory.
Default is None (MPS will be stored in the scratch directory).
scratch_quota : None or int
If not None, will save intermediates to "alt_scratch" when the intermediate size in "scratch" is
above or equal to this number (in bytes). Default is None (no disk quota).
alt_scratch : None or str
Alternative scratch directory. Default is None.
n_threads : None or int
Number of threads. When MPI is used, this is the number of threads for each MPI processor.
Default is None, and the max number of threads available on this node will be used.
Expand Down Expand Up @@ -636,6 +643,8 @@ def __init__(
self._mps_dir = mps_dir
self._restart_dir = restart_dir
self._restart_dir_per_sweep = restart_dir_per_sweep
self._scratch_quota = 0 if scratch_quota is None else scratch_quota
self._alt_scratch = alt_scratch
self.stack_mem = stack_mem
self.stack_mem_ratio = stack_mem_ratio
self.fp_codec_cutoff = fp_codec_cutoff
Expand Down Expand Up @@ -770,6 +779,7 @@ def set_symm_type(self, symm_type, reset_frame=True):
self.frame.use_main_stack = False
self.frame.compressed_sparse_tensor_storage = self.compressed_mps_storage
self.frame.minimal_memory_usage = self.min_mpo_mem
self.frame.save_dir_quota = self._scratch_quota

if self.mpi:
self.mpi = bw.brs.MPICommunicator()
Expand Down Expand Up @@ -797,6 +807,19 @@ def set_symm_type(self, symm_type, reset_frame=True):
self.mpi.barrier()
self.frame.mps_dir = self.mps_dir

if self._alt_scratch is not None:
import os

if self.mpi is None or self.mpi.rank == self.mpi.root:
if not os.path.isdir(self._alt_scratch):
os.makedirs(self._alt_scratch)
if self.mpi is not None:
self.mpi.barrier()
if not os.path.isdir(self._alt_scratch):
os.makedirs(self._alt_scratch)
self.mpi.barrier()
self.frame.alt_save_dir = self._alt_scratch

if self.restart_dir_per_sweep is not None:
self.frame.restart_dir_per_sweep = self.restart_dir_per_sweep

Expand Down
2 changes: 2 additions & 0 deletions src/core/allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,8 @@ template <typename FL> struct DataFrame {
string restart_dir_optimal_mps_per_sweep =
""; //!< If not empty, save the optimal MPS from each sweep to this dir
//!< with sweep index as suffix.
size_t save_dir_quota = 0; //!< Disk quota for save_dir (in bytes).
string alt_save_dir = ""; //!< Alternative scartch folder.
string prefix = "F", //!< Filename prefix for common scratch files (such as
//!< MPS tensors).
prefix_distri =
Expand Down
8 changes: 6 additions & 2 deletions src/core/fp_codec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,9 @@ struct FPCodec {
mutable size_t
ndata =
0, //!< Length of the array of the data that has been compressed.
ncpsd = 0; //!< Length of the array for the compressed data.
ncpsd = 0; //!< Length of the array for the compressed data.
mutable size_t ncpsd_last =
0; //!< Length of the array written in the last call.
size_t chunk_size = 4096; //!< Length of the array elements that should be
//!< processed at one time.
size_t n_parallel_chunks =
Expand Down Expand Up @@ -269,6 +271,7 @@ struct FPCodec {
T *pdata = new T[(chunk_size + 1) * min(nchunk, n_parallel_chunks)];
vector<size_t> cplens(n_parallel_chunks);
int ntg = threading->activate_global();
ncpsd_last = 0;
#pragma omp parallel num_threads(ntg)
for (size_t ib = 0; ib < nbatch; ib++) {
size_t n_this_chunk =
Expand All @@ -288,9 +291,10 @@ struct FPCodec {
size_t cplen = cplens[ic];
ofs.write((char *)&cplen, sizeof(cplen));
ofs.write((char *)(pdata + offset + ic), sizeof(T) * cplen);
ncpsd += cplen;
ncpsd_last += cplen;
}
}
ncpsd += ncpsd_last;
delete[] pdata;
threading->activate_normal();
ofs.write((char *)tail.c_str(), 4);
Expand Down
75 changes: 67 additions & 8 deletions src/dmrg/moving_environment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ template <typename S, typename FL, typename FLS> struct MovingEnvironment {
int fuse_center;
// Set this to false for non-propagate expectation
bool save_environments = true;
mutable map<int, pair<string, size_t>> left_part_files;
mutable map<int, pair<string, size_t>> right_part_files;
MovingEnvironment(const shared_ptr<MPO<S, FL>> &mpo,
const shared_ptr<MPS<S, FLS>> &bra,
const shared_ptr<MPS<S, FLS>> &ket,
Expand Down Expand Up @@ -425,6 +427,11 @@ template <typename S, typename FL, typename FLS> struct MovingEnvironment {
Partition<S, FL>::deallocate_op_infos_notrunc(left_op_infos_notrunc);
if (save_environments) {
frame_<FP>()->save_data(1, get_left_partition_filename(i));
left_part_files[i] = make_pair(get_left_partition_filename(i),
renormal_mem * sizeof(FL));
if (frame_<FP>()->fp_codec != nullptr)
left_part_files[i].second =
frame_<FPS>()->fp_codec->ncpsd_last * sizeof(FP);
if (save_partition_info) {
frame_<FP>()->activate(1);
envs[i]->save_data(true, get_left_partition_filename(i, true));
Expand Down Expand Up @@ -648,6 +655,11 @@ template <typename S, typename FL, typename FLS> struct MovingEnvironment {
Partition<S, FL>::deallocate_op_infos_notrunc(right_op_infos_notrunc);
if (save_environments) {
frame_<FP>()->save_data(1, get_right_partition_filename(i));
right_part_files[i] = make_pair(get_right_partition_filename(i),
renormal_mem * sizeof(FL));
if (frame_<FP>()->fp_codec != nullptr)
right_part_files[i].second =
frame_<FPS>()->fp_codec->ncpsd_last * sizeof(FP);
if (save_partition_info) {
frame_<FP>()->activate(1);
envs[i]->save_data(false,
Expand Down Expand Up @@ -831,17 +843,39 @@ template <typename S, typename FL, typename FLS> struct MovingEnvironment {
<< ".AR." << tag << ".RIGHT." << Parsing::to_string(i);
return ss.str();
}
size_t get_used_save_dir_size() const {
size_t used = 0;
const string xdir = frame_<FP>()->save_dir + "/";
for (const auto &p : left_part_files)
if (p.second.first.rfind(xdir, 0) == 0)
used += p.second.second;
for (const auto &p : right_part_files)
if (p.second.first.rfind(xdir, 0) == 0)
used += p.second.second;
return used;
}
string get_left_partition_filename(int i, bool info = false) const {
stringstream ss;
ss << frame_<FP>()->save_dir << "/" << frame_<FP>()->prefix_distri
<< ".PART." << (info ? "INFO." : "") << tag << ".LEFT."
<< Parsing::to_string(i);
string xdir = frame_<FP>()->save_dir;
if (!info && left_part_files.count(i))
return left_part_files.at(i).first;
else if (!info && frame_<FP>()->save_dir_quota != 0 &&
get_used_save_dir_size() >= frame_<FP>()->save_dir_quota)
xdir = frame_<FP>()->alt_save_dir;
ss << xdir << "/" << frame_<FP>()->prefix_distri << ".PART."
<< (info ? "INFO." : "") << tag << ".LEFT." << Parsing::to_string(i);
return ss.str();
}
string get_right_partition_filename(int i, bool info = false) const {
stringstream ss;
ss << frame_<FP>()->save_dir << "/" << frame_<FP>()->prefix_distri
<< ".PART." << (info ? "INFO." : "") << tag << ".RIGHT."
string xdir = frame_<FP>()->save_dir;
if (!info && right_part_files.count(i))
return right_part_files.at(i).first;
else if (!info && frame_<FP>()->save_dir_quota != 0 &&
get_used_save_dir_size() >= frame_<FP>()->save_dir_quota)
xdir = frame_<FP>()->alt_save_dir;
ss << xdir << "/" << frame_<FP>()->prefix_distri << ".PART."
<< (info ? "INFO." : "") << tag << ".RIGHT."
<< Parsing::to_string(i);
return ss.str();
}
Expand All @@ -856,19 +890,31 @@ template <typename S, typename FL, typename FLS> struct MovingEnvironment {
me->envs[i] = make_shared<Partition<S, FL>>(*envs[i]);
me->envs[i]->left_op_infos = envs[i]->left_op_infos;
me->envs[i]->right_op_infos = envs[i]->right_op_infos;
if (envs[i]->left != nullptr)
if (envs[i]->left != nullptr) {
Parsing::link_file(get_left_partition_filename(i),
me->get_left_partition_filename(i));
if (envs[i]->right != nullptr)
if (left_part_files.count(i))
me->left_part_files[i] =
make_pair(me->get_left_partition_filename(i),
left_part_files[i].second);
}
if (envs[i]->right != nullptr) {
Parsing::link_file(get_right_partition_filename(i),
me->get_right_partition_filename(i));
if (right_part_files.count(i))
me->right_part_files[i] =
make_pair(me->get_right_partition_filename(i),
right_part_files[i].second);
}
}
}
virtual shared_ptr<MovingEnvironment>
shallow_copy(const string &new_tag) const {
shared_ptr<MovingEnvironment> me =
make_shared<MovingEnvironment>(*this);
me->tag = new_tag;
me->left_part_files.clear();
me->right_part_files.clear();
shallow_copy_to(me);
return me;
}
Expand Down Expand Up @@ -1417,10 +1463,15 @@ template <typename S, typename FL, typename FLS> struct MovingEnvironment {
left_contract_rotate(center);
}
for (int i = n_sites - 1; i >= center; i--)
if (envs[i]->right != nullptr)
if (envs[i]->right != nullptr) {
frame_<FP>()->rename_data(
get_right_partition_filename(i),
get_right_partition_filename(i + 1));
if (right_part_files.count(i))
right_part_files[i + 1] =
make_pair(get_right_partition_filename(i + 1),
right_part_files[i].second);
}
for (int i = n_sites - 1; i >= 0; i--) {
envs[i]->middle.resize(1);
if (i > start_site) {
Expand Down Expand Up @@ -1470,9 +1521,13 @@ template <typename S, typename FL, typename FLS> struct MovingEnvironment {
string left_data_name = get_left_partition_filename(i, info);
if (Parsing::file_exists(left_data_name))
Parsing::remove_file(left_data_name);
if (info == 0 && left_part_files.count(i))
left_part_files.erase(i);
string right_data_name = get_right_partition_filename(i, info);
if (Parsing::file_exists(right_data_name))
Parsing::remove_file(right_data_name);
if (info == 0 && right_part_files.count(i))
right_part_files.erase(i);
}
}
// Move the center site by one
Expand Down Expand Up @@ -1503,6 +1558,8 @@ template <typename S, typename FL, typename FLS> struct MovingEnvironment {
string old_data_name = get_right_partition_filename(center - 1);
if (Parsing::file_exists(old_data_name))
Parsing::remove_file(old_data_name);
if (right_part_files.count(center - 1))
right_part_files.erase(center - 1);
}
} else if (i < center) {
if (envs[center]->right != nullptr &&
Expand All @@ -1519,6 +1576,8 @@ template <typename S, typename FL, typename FLS> struct MovingEnvironment {
string old_data_name = get_left_partition_filename(center + 1);
if (Parsing::file_exists(old_data_name))
Parsing::remove_file(old_data_name);
if (left_part_files.count(center + 1))
left_part_files.erase(center + 1);
}
}
if (para_rule != nullptr)
Expand Down
20 changes: 16 additions & 4 deletions src/dmrg/sweep_algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3178,17 +3178,21 @@ template <typename S, typename FL, typename FLS> struct DMRG {
<< " | Twrite = " << frame_<FPS>()->twrite
<< " | Tfpread = " << frame_<FPS>()->fpread
<< " | Tfpwrite = " << frame_<FPS>()->fpwrite
<< " | Tmporead = " << me->mpo->tread
<< " | Tasync = " << frame_<FPS>()->tasync << endl;
<< " | Tmporead = " << me->mpo->tread << endl;
if (frame_<FPS>()->fp_codec != nullptr)
sout
<< " | data = "
<< Parsing::to_size_string(
frame_<FPS>()->fp_codec->ndata * sizeof(FPS))
<< " | cpsd = "
<< Parsing::to_size_string(
frame_<FPS>()->fp_codec->ncpsd * sizeof(FPS))
<< endl;
frame_<FPS>()->fp_codec->ncpsd *
sizeof(FPS));
if (frame_<FPS>()->save_dir_quota != 0)
sout << " | quota-used = "
<< Parsing::to_size_string(
me->get_used_save_dir_size());
sout << " | Tasync = " << frame_<FPS>()->tasync << endl;
sout << " | Trot = " << me->trot << " | Tctr = " << me->tctr
<< " | Tint = " << me->tint << " | Tmid = " << me->tmid
<< " | Tdctr = " << me->tdctr
Expand Down Expand Up @@ -5181,6 +5185,10 @@ template <typename S, typename FL, typename FLS> struct Linear {
<< Parsing::to_size_string(
frame_<FPS>()->fp_codec->ncpsd *
sizeof(FPS));
if (frame_<FPS>()->save_dir_quota != 0)
cout << " | quota-used = "
<< Parsing::to_size_string(
lme->get_used_save_dir_size());
cout << " | Tasync = " << frame_<FPS>()->tasync << endl;
if (lme != nullptr)
cout << " | Trot = " << lme->trot
Expand Down Expand Up @@ -6506,6 +6514,10 @@ struct Expect {
<< " | cpsd = "
<< Parsing::to_size_string(
frame_<FPS>()->fp_codec->ncpsd * sizeof(FPS));
if (frame_<FPS>()->save_dir_quota != 0)
cout << " | quota-used = "
<< Parsing::to_size_string(
me->get_used_save_dir_size());
cout << " | Tasync = " << frame_<FPS>()->tasync << endl;
if (me != nullptr)
cout << " | Trot = " << me->trot << " | Tctr = " << me->tctr
Expand Down
5 changes: 5 additions & 0 deletions src/pybind/pybind_core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ PYBIND11_MAKE_OPAQUE(vector<pair<long long int, long long int>>);
PYBIND11_MAKE_OPAQUE(unordered_map<int, int>);
PYBIND11_MAKE_OPAQUE(vector<unordered_map<int, int>>);
PYBIND11_MAKE_OPAQUE(unordered_map<int, pair<int, int>>);
PYBIND11_MAKE_OPAQUE(map<int, pair<string, size_t>>);
PYBIND11_MAKE_OPAQUE(vector<unordered_map<int, pair<int, int>>>);
PYBIND11_MAKE_OPAQUE(vector<pair<SpinOperator, uint16_t>>);
PYBIND11_MAKE_OPAQUE(vector<SpinPermTerm>);
Expand Down Expand Up @@ -1503,6 +1504,7 @@ template <typename S = void> void bind_data(py::module &m) {
py::bind_map<unordered_map<int, int>>(m, "MapIntInt");
py::bind_vector<vector<unordered_map<int, int>>>(m, "VectorMapIntInt");
py::bind_map<unordered_map<int, pair<int, int>>>(m, "MapIntPIntInt");
py::bind_map<map<int, pair<string, size_t>>>(m, "MapIntPStrULLInt");
py::bind_vector<vector<unordered_map<int, pair<int, int>>>>(
m, "VectorMapIntPIntInt");
py::bind_vector<vector<pair<double, string>>>(m, "VectorPDoubleStr");
Expand Down Expand Up @@ -2668,6 +2670,7 @@ template <typename FL> void bind_fl_io(py::module &m, const string &name) {
.def(py::init<FL, size_t>())
.def_readwrite("ndata", &FPCodec<FL>::ndata)
.def_readwrite("ncpsd", &FPCodec<FL>::ncpsd)
.def_readwrite("ncpsd_last", &FPCodec<FL>::ncpsd_last)
.def("encode",
[](FPCodec<FL> *self, py::array_t<FL> arr) {
FL *tmp = new FL[arr.size() + 2];
Expand Down Expand Up @@ -2761,6 +2764,8 @@ template <typename FL> void bind_fl_io(py::module &m, const string &name) {
&DataFrame<FL>::restart_dir_optimal_mps)
.def_readwrite("restart_dir_optimal_mps_per_sweep",
&DataFrame<FL>::restart_dir_optimal_mps_per_sweep)
.def_readwrite("save_dir_quota", &DataFrame<FL>::save_dir_quota)
.def_readwrite("alt_save_dir", &DataFrame<FL>::alt_save_dir)
.def_readwrite("prefix", &DataFrame<FL>::prefix)
.def_readwrite("prefix_distri", &DataFrame<FL>::prefix_distri)
.def_readwrite("prefix_can_write", &DataFrame<FL>::prefix_can_write)
Expand Down
6 changes: 6 additions & 0 deletions src/pybind/pybind_dmrg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,10 @@ void bind_fl_moving_environment(py::module &m, const string &name) {
&MovingEnvironment<S, FL, FLS>::lowmem_numerical_transform)
.def_readwrite("save_environments",
&MovingEnvironment<S, FL, FLS>::save_environments)
.def_readwrite("left_part_files",
&MovingEnvironment<S, FL, FLS>::left_part_files)
.def_readwrite("right_part_files",
&MovingEnvironment<S, FL, FLS>::right_part_files)
.def("left_contract_rotate",
&MovingEnvironment<S, FL, FLS>::left_contract_rotate)
.def("right_contract_rotate",
Expand Down Expand Up @@ -879,6 +883,8 @@ void bind_fl_moving_environment(py::module &m, const string &name) {
&MovingEnvironment<S, FL, FLS>::get_middle_archive_filename)
.def("get_right_archive_filename",
&MovingEnvironment<S, FL, FLS>::get_right_archive_filename)
.def("get_used_save_dir_size",
&MovingEnvironment<S, FL, FLS>::get_used_save_dir_size)
.def("get_left_partition_filename",
&MovingEnvironment<S, FL, FLS>::get_left_partition_filename)
.def("get_right_partition_filename",
Expand Down

0 comments on commit 5cc88cd

Please sign in to comment.