From 372f4175c602913b2e50fa117feee24a438c85a7 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 17:26:31 +0800 Subject: [PATCH 01/46] create ilp_tmd_sw cuda files --- src/force/ilp_tmd_sw.cu | 0 src/force/ilp_tmd_sw.cuh | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/force/ilp_tmd_sw.cu create mode 100644 src/force/ilp_tmd_sw.cuh diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu new file mode 100644 index 000000000..e69de29bb diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh new file mode 100644 index 000000000..e69de29bb From bec67a563c8bf9f47ba609ef3e568afdfe85d46e Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 17:44:14 +0800 Subject: [PATCH 02/46] add ilp_tmd_sw_para in cuh --- src/force/ilp_tmd_sw.cuh | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index e69de29bb..106f793f0 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -0,0 +1,39 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +#pragma once +#include "potential.cuh" +#include +#include + +// MoS2 MoSe2 WSe2 MoSe2 +#define MAX_TYPE_ILP_TMD_SW 4 +#define CUDA_MAX_NL_TMD 2048 +#define MAX_ILP_NEIGHBOR_TMD 6 +#define MAX_BIG_ILP_NEIGHBOR_TMD 128 + +struct ILP_TMD_SW_Para { + float rcutsq_ilp[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; + float d[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; + float d_Seff[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; // d / S_R / r_eff + float C_6[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; + float z0[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; // beta + float lambda[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; // alpha / beta + float epsilon[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; + float C[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; + float delta2inv[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; // 1 / delta ^ 2 + float S[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; // scale + float rcut_global[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; // scale +}; \ No newline at end of file From 5053a0f1911a3ed2839c7510f6aa0a583a262b22 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 17:45:16 +0800 Subject: [PATCH 03/46] add ilp_tmd_sw_data in cuh --- src/force/ilp_tmd_sw.cuh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index 106f793f0..6a2bb17d7 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -36,4 +36,20 @@ struct ILP_TMD_SW_Para { float delta2inv[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; // 1 / delta ^ 2 float S[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; // scale float rcut_global[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; // scale +}; + +struct ILP_TMD_SW_Data { + GPU_Vector NN, NL; + GPU_Vector reduce_NL; + GPU_Vector big_ilp_NN, big_ilp_NL; + GPU_Vector ilp_NN, ilp_NL; + GPU_Vector cell_count; + GPU_Vector cell_count_sum; + GPU_Vector cell_contents; + GPU_Vector f12x; + GPU_Vector f12y; + GPU_Vector f12z; + GPU_Vector f12x_ilp_neigh; + GPU_Vector f12y_ilp_neigh; + GPU_Vector f12z_ilp_neigh; }; \ No newline at end of file From 8cbacb83cf5cdeb604c96f44dff9b287f84dcc06 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 17:48:25 +0800 Subject: [PATCH 04/46] add ilp_tmd_sw class in cuh, but notes that there is no SW para and data --- src/force/ilp_tmd_sw.cuh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index 6a2bb17d7..f88e01b4c 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -52,4 +52,31 @@ struct ILP_TMD_SW_Data { GPU_Vector f12x_ilp_neigh; GPU_Vector f12y_ilp_neigh; GPU_Vector f12z_ilp_neigh; +}; + +class ILP_TMD_SW : public Potential +{ +public: + ILP_TMD_SW(FILE*, int, int); + virtual ~ILP_TMD_SW(void); + virtual void compute( + Box& box, + const GPU_Vector& type, + const GPU_Vector& position, + GPU_Vector& potential, + GPU_Vector& force, + GPU_Vector& virial); + + virtual void compute( + Box& box, + const GPU_Vector& type, + const GPU_Vector& position, + GPU_Vector& potential, + GPU_Vector& force, + GPU_Vector& virial, + std::vector &group); + +protected: + ILP_TMD_SW_Para ilp_para; + ILP_TMD_SW_Data ilp_data; }; \ No newline at end of file From d4b67f0ef84ca9e0d7682d0968ed4efc35431bd1 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 18:21:31 +0800 Subject: [PATCH 05/46] add a new overloaded func: compute because ILPs need group message --- src/force/potential.cuh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/force/potential.cuh b/src/force/potential.cuh index 2d3e81efc..e7df273fb 100644 --- a/src/force/potential.cuh +++ b/src/force/potential.cuh @@ -46,6 +46,16 @@ public: GPU_Vector& force, GPU_Vector& virial){}; + // add group message for ILP + virtual void compute( + Box& box, + const GPU_Vector& type, + const GPU_Vector& position, + GPU_Vector& potential, + GPU_Vector& force, + GPU_Vector& virial, + std::vector& group); + protected: void find_properties_many_body( Box& box, From 205b135abe9d1c4b583bb0cfc4d655e3ff611ae8 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 18:30:24 +0800 Subject: [PATCH 06/46] add ilp flag to check if computing ilps potential --- src/force/potential.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/force/potential.cuh b/src/force/potential.cuh index e7df273fb..e7fc10c0d 100644 --- a/src/force/potential.cuh +++ b/src/force/potential.cuh @@ -26,6 +26,7 @@ public: double rc; // maximum cutoff distance int nep_model_type = -1; // -1 for non_nep, 0 for potential, 1 for dipole, 2 for polarizability, 3 for temperature + int ilp_flag = 0; // 0 for non_ilp, 1 for ilp Potential(void); virtual ~Potential(void); From b9750288d5a247e95768fcc07887f9119c7be531 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 18:39:59 +0800 Subject: [PATCH 07/46] add ilp_tmd_sw to force.cu and identify ilps by ilp_flag --- src/force/force.cu | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/force/force.cu b/src/force/force.cu index 8901fa81c..5e2382b1b 100644 --- a/src/force/force.cu +++ b/src/force/force.cu @@ -25,6 +25,7 @@ The driver class calculating force and related quantities. #include "tersoff1988.cuh" #include "tersoff1989.cuh" #include "tersoff_mini.cuh" +#include "ilp_tmd_sw.cuh" #include "utilities/common.cuh" #include "utilities/error.cuh" #include "utilities/read_file.cuh" @@ -131,6 +132,8 @@ void Force::parse_potential( check_types(param[1]); } else if (strcmp(potential_name, "lj") == 0) { potential.reset(new LJ(fid_potential, num_types, number_of_atoms)); + } else if (strcmp(potential_name, "ilp_tmd_sw") == 0) { + potential.reset(new ILP_TMD_SW(fid_potential, num_types, number_of_atoms)); } else { PRINT_INPUT_ERROR("illegal potential model.\n"); } @@ -477,6 +480,10 @@ void Force::compute( potential_per_atom, force_per_atom, virial_per_atom); + } else if (1 == potentials[0]->ilp_flag) { + // compute the potential with ILP + potentials[0]->compute( + box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom, group); } else { potentials[0]->compute( box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom); @@ -494,6 +501,10 @@ void Force::compute( potential_per_atom, force_per_atom, virial_per_atom); + } else if (1 == potentials[i]->ilp_flag) { + // compute the potential with ILP + potentials[i]->compute( + box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom, group); } else { potentials[i]->compute( box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom); @@ -764,6 +775,10 @@ void Force::compute( potential_per_atom, force_per_atom, virial_per_atom); + } else if (1 == potentials[0]->ilp_flag) { + // compute the potential with ILP + potentials[0]->compute( + box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom, group); } else { potentials[0]->compute( box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom); @@ -781,6 +796,10 @@ void Force::compute( potential_per_atom, force_per_atom, virial_per_atom); + } else if (1 == potentials[i]->ilp_flag) { + // compute the potential with ILP + potentials[i]->compute( + box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom, group); } else { potentials[i]->compute( box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom); From d0b9d7ada1cd6d5d66f035f366c3f8564300f38c Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 18:41:17 +0800 Subject: [PATCH 08/46] add tapper func for ilp_tmd_sw --- src/force/ilp_tmd_sw.cuh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index f88e01b4c..84f2ef468 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -79,4 +79,6 @@ public: protected: ILP_TMD_SW_Para ilp_para; ILP_TMD_SW_Data ilp_data; -}; \ No newline at end of file +}; + +static __constant__ float Tap_coeff_tmd[8]; \ No newline at end of file From 8d1b3dbcdfca9dde66981c33afba138344107c1e Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 18:53:21 +0800 Subject: [PATCH 09/46] add construct func for ilp_tmd_sw but no sw para --- src/force/ilp_tmd_sw.cu | 103 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index e69de29bb..a84efc7a0 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -0,0 +1,103 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +/*----------------------------------------------------------------------------80 +The class dealing with the interlayer potential(ILP) and SW. +TODO: +------------------------------------------------------------------------------*/ + +#include "ilp_tmd_sw.cuh" +#include "neighbor.cuh" +#include "utilities/error.cuh" + +#define BLOCK_SIZE_FORCE 128 + +// there are most 6 intra-layer neighbors for TMD +#define NNEI 6 + +ILP_TMD_SW::ILP_TMD_SW(FILE* fid, int num_types, int num_atoms) +{ + printf("Use %d-element ILP potential with elements:\n", num_types); + if (!(num_types >= 1 && num_types <= MAX_TYPE_ILP_TMD_SW)) { + PRINT_INPUT_ERROR("Incorrect type number of ILP_TMD_SW parameters.\n"); + } + for (int n = 0; n < num_types; ++n) { + char atom_symbol[10]; + int count = fscanf(fid, "%s", atom_symbol); + PRINT_SCANF_ERROR(count, 1, "Reading error for ILP_TMD_SW potential."); + printf(" %s", atom_symbol); + } + printf("\n"); + + // read parameters + float beta, alpha, delta, epsilon, C, d, sR; + float reff, C6, S, rcut_ilp, rcut_global; + rc = 0.0; + for (int n = 0; n < num_types; ++n) { + for (int m = 0; m < num_types; ++m) { + int count = fscanf(fid, "%f%f%f%f%f%f%f%f%f%f%f%f", \ + &beta, &alpha, &delta, &epsilon, &C, &d, &sR, &reff, &C6, &S, \ + &rcut_ilp, &rcut_global); + PRINT_SCANF_ERROR(count, 12, "Reading error for ILP_TMD_SW potential."); + + ilp_para.C[n][m] = C; + ilp_para.C_6[n][m] = C6; + ilp_para.d[n][m] = d; + ilp_para.d_Seff[n][m] = d / sR / reff; + ilp_para.epsilon[n][m] = epsilon; + ilp_para.z0[n][m] = beta; + ilp_para.lambda[n][m] = alpha / beta; + ilp_para.delta2inv[n][m] = 1.0 / (delta * delta); + ilp_para.S[n][m] = S; + ilp_para.rcutsq_ilp[n][m] = rcut_ilp * rcut_ilp; + ilp_para.rcut_global[n][m] = rcut_global; + float meV = 1e-3 * S; + ilp_para.C[n][m] *= meV; + ilp_para.C_6[n][m] *= meV; + ilp_para.epsilon[n][m] *= meV; + + if (rc < rcut_global) + rc = rcut_global; + } + } + + int max_neighbor_number = min(num_atoms, CUDA_MAX_NL_TMD); + ilp_data.NN.resize(num_atoms); + ilp_data.NL.resize(num_atoms * max_neighbor_number); + ilp_data.cell_count.resize(num_atoms); + ilp_data.cell_count_sum.resize(num_atoms); + ilp_data.cell_contents.resize(num_atoms); + + // init ilp neighbor list + ilp_data.ilp_NN.resize(num_atoms); + ilp_data.ilp_NL.resize(num_atoms * MAX_ILP_NEIGHBOR_TMD); + ilp_data.reduce_NL.resize(num_atoms * max_neighbor_number); + ilp_data.big_ilp_NN.resize(num_atoms); + ilp_data.big_ilp_NL.resize(num_atoms * MAX_BIG_ILP_NEIGHBOR_TMD); + + ilp_data.f12x.resize(num_atoms * max_neighbor_number); + ilp_data.f12y.resize(num_atoms * max_neighbor_number); + ilp_data.f12z.resize(num_atoms * max_neighbor_number); + + ilp_data.f12x_ilp_neigh.resize(num_atoms * MAX_ILP_NEIGHBOR_TMD); + ilp_data.f12y_ilp_neigh.resize(num_atoms * MAX_ILP_NEIGHBOR_TMD); + ilp_data.f12z_ilp_neigh.resize(num_atoms * MAX_ILP_NEIGHBOR_TMD); + + // init constant cutoff coeff + float h_tap_coeff[8] = \ + {1.0f, 0.0f, 0.0f, 0.0f, -35.0f, 84.0f, -70.0f, 20.0f}; + cudaMemcpyToSymbol(Tap_coeff_tmd, h_tap_coeff, 8 * sizeof(float)); + CUDA_CHECK_KERNEL +} \ No newline at end of file From ff2e169a38c10793b9743ca43414866bd57ab320 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 18:54:32 +0800 Subject: [PATCH 10/46] set ilp flag to 1 in construction func --- src/force/ilp_tmd_sw.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index a84efc7a0..093907236 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -100,4 +100,7 @@ ILP_TMD_SW::ILP_TMD_SW(FILE* fid, int num_types, int num_atoms) {1.0f, 0.0f, 0.0f, 0.0f, -35.0f, 84.0f, -70.0f, 20.0f}; cudaMemcpyToSymbol(Tap_coeff_tmd, h_tap_coeff, 8 * sizeof(float)); CUDA_CHECK_KERNEL + + // set ilp_flag to 1 + ilp_flag = 1; } \ No newline at end of file From 4a2e6da3c202a63dfbd4e88786fac803858dc208 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 22:05:45 +0800 Subject: [PATCH 11/46] add destruction func for ilp_tmd_sw --- src/force/ilp_tmd_sw.cu | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 093907236..11f316b2b 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -103,4 +103,9 @@ ILP_TMD_SW::ILP_TMD_SW(FILE* fid, int num_types, int num_atoms) // set ilp_flag to 1 ilp_flag = 1; -} \ No newline at end of file +} + +ILP_TMD_SW::~ILP_TMD_SW(void) +{ + // nothing +} From 92723dce1e96c63e442ea84e5d7d89f6decfd6d4 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 7 Oct 2024 23:00:21 +0800 Subject: [PATCH 12/46] add tapper func for ilp_tmd_sw --- src/force/ilp_tmd_sw.cu | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 11f316b2b..2ddefb481 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -109,3 +109,40 @@ ILP_TMD_SW::~ILP_TMD_SW(void) { // nothing } + +// calculate the long-range cutoff term +static __device__ __forceinline__ float calc_Tap(const float r_ij, const float Rcutinv) +{ + float Tap, r; + + r = r_ij * Rcutinv; + if (r >= 1.0f) { + Tap = 0.0f; + } else { + Tap = Tap_coeff_tmd[7]; + for (int i = 6; i >= 0; --i) { + Tap = Tap * r + Tap_coeff_tmd[i]; + } + } + + return Tap; +} + +// calculate the derivatives of long-range cutoff term +static __device__ __forceinline__ float calc_dTap(const float r_ij, const float Rcut, const float Rcutinv) +{ + float dTap, r; + + r = r_ij * Rcutinv; + if (r >= Rcut) { + dTap = 0.0f; + } else { + dTap = 7.0f * Tap_coeff_tmd[7]; + for (int i = 6; i > 0; --i) { + dTap = dTap * r + i * Tap_coeff_tmd[i]; + } + dTap *= Rcutinv; + } + + return dTap; +} \ No newline at end of file From 40cf0b77a934257540e618c136ea2f62c9ed587a Mon Sep 17 00:00:00 2001 From: bhk Date: Tue, 8 Oct 2024 20:26:27 +0800 Subject: [PATCH 13/46] add ilp_neighbor func for ilp_tmd_sw --- src/force/ilp_tmd_sw.cu | 128 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 2ddefb481..1badf00e7 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -145,4 +145,132 @@ static __device__ __forceinline__ float calc_dTap(const float r_ij, const float } return dTap; +} + +// create ILP neighbor list from main neighbor list to calculate normals +static __global__ void ILP_neighbor( + const int number_of_particles, + const int N1, + const int N2, + const Box box, + const int *g_neighbor_number, + const int *g_neighbor_list, + const int *g_type, + ILP_TMD_SW_Para ilp_para, + const double* __restrict__ g_x, + const double* __restrict__ g_y, + const double* __restrict__ g_z, + int *ilp_neighbor_number, + int *ilp_neighbor_list, + const int *group_label) +{ + int n1 = blockIdx.x * blockDim.x + threadIdx.x + N1; // particle index + + if (n1 < N2) { + // TMD + int neighptr[10], check[10], neighsort[10]; + for (int ll = 0; ll < 10; ++ll) { + neighptr[ll] = -1; + neighsort[ll] = -1; + check[ll] = -1; + } + + int count = 0; + int neighbor_number = g_neighbor_number[n1]; + int type1 = g_type[n1]; + double x1 = g_x[n1]; + double y1 = g_y[n1]; + double z1 = g_z[n1]; + + for (int i1 = 0; i1 < neighbor_number; ++i1) { + int n2 = g_neighbor_list[n1 + number_of_particles * i1]; + int type2 = g_type[n2]; + + double x12 = g_x[n2] - x1; + double y12 = g_y[n2] - y1; + double z12 = g_z[n2] - z1; + apply_mic(box, x12, y12, z12); + double d12sq = x12 * x12 + y12 * y12 + z12 * z12; + double rcutsq = ilp_para.rcutsq_ilp[type1][type2]; + + + if (group_label[n1] == group_label[n2] && d12sq < rcutsq && type1 == type2 && d12sq != 0) { + // ilp_neighbor_list[count++ * number_of_particles + n1] = n2; + neighptr[count++] = n2; + } + } + + // TMD + for (int ll = 0; ll < count; ++ll) { + neighsort[ll] = neighptr[ll]; + check[ll] = neighptr[ll]; + } + + // TMD + if (count == NNEI) { + neighsort[0] = neighptr[0]; + check[0] = -1; + } else if (count < NNEI && count > 0) { + for (int jj = 0; jj < count; ++jj) { + int j = neighptr[jj]; + int jtype = g_type[j]; + int count_temp = 0; + for (int ll = 0; ll < count; ++ll) { + int l = neighptr[ll]; + int ltype = g_type[l]; + if (l == j) continue; + double deljx = g_x[l] - g_x[j]; + double deljy = g_y[l] - g_y[j]; + double deljz = g_z[l] - g_z[j]; + apply_mic(box, deljx, deljy, deljz); + double rsqlj = deljx * deljx + deljy * deljy + deljz * deljz; + if (rsqlj != 0 && rsqlj < ilp_para.rcutsq_ilp[ltype][jtype]) { + ++count_temp; + } + + } + if (count_temp == 1) { + neighsort[0] = neighptr[jj]; + check[jj] = -1; + break; + } + } + } else if (count > NNEI) { + printf("ERROR in ILP NEIGHBOR LIST\n"); + printf("\n===== ILP neighbor number[%d] is greater than 6 =====\n", count); + return; + } + + // TMD + // sort the order of neighbors of atom n1 + for (int jj = 0; jj < count; ++jj) { + int j = neighsort[jj]; + int jtype = g_type[j]; + int ll = 0; + while (ll < count) { + int l = neighptr[ll]; + if (check[ll] == -1) { + ++ll; + continue; + } + int ltype = g_type[l]; + double deljx = g_x[l] - g_x[j]; + double deljy = g_y[l] - g_y[j]; + double deljz = g_z[l] - g_z[j]; + apply_mic(box, deljx, deljy, deljz); + double rsqlj = deljx * deljx + deljy * deljy + deljz * deljz; + + if (abs(rsqlj) >= 1e-6 && rsqlj < ilp_para.rcutsq_ilp[ltype][jtype]) { + neighsort[jj + 1] = l; + check[ll] = -1; + break; + } + ++ll; + } + } + ilp_neighbor_number[n1] = count; + for (int jj = 0; jj < count; ++jj) { + ilp_neighbor_list[jj * number_of_particles + n1] = neighsort[jj]; + } + } } \ No newline at end of file From a8098a6441bb933569302f4e918709b9bce83cec Mon Sep 17 00:00:00 2001 From: bhk Date: Tue, 8 Oct 2024 20:28:22 +0800 Subject: [PATCH 14/46] add modulo func for ilp_tmd_sw --- src/force/ilp_tmd_sw.cu | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 1badf00e7..b1d2fd14d 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -273,4 +273,10 @@ static __global__ void ILP_neighbor( ilp_neighbor_list[jj * number_of_particles + n1] = neighsort[jj]; } } +} + +// modulo func to change atom index +static __device__ __forceinline__ int modulo(int k, int range) +{ + return (k + range) % range; } \ No newline at end of file From 8fb393a377b3d6d336de45ada947d51c62e35231 Mon Sep 17 00:00:00 2001 From: bhk Date: Tue, 8 Oct 2024 20:36:40 +0800 Subject: [PATCH 15/46] add calc_norm for ilp_tmd_sw --- src/force/ilp_tmd_sw.cu | 249 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 249 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index b1d2fd14d..881297990 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -279,4 +279,253 @@ static __global__ void ILP_neighbor( static __device__ __forceinline__ int modulo(int k, int range) { return (k + range) % range; +} + +// calculate the normals and its derivatives +static __device__ void calc_normal( + float (&vect)[NNEI][3], + int cont, + float (&normal)[3], + float (&dnormdri)[3][3], + float (&dnormal)[3][NNEI][3]) +{ + int id, ip, m; + float dni[3]; + float dnn[3][3], dpvdri[3][3]; + float Nave[3], pvet[NNEI][3], dpvet1[NNEI][3][3], dpvet2[NNEI][3][3], dNave[3][NNEI][3]; + + float nninv; + + // initialize the arrays + for (id = 0; id < 3; id++) { + dni[id] = 0.0f; + + Nave[id] = 0.0f; + for (ip = 0; ip < 3; ip++) { + dpvdri[ip][id] = 0.0f; + for (m = 0; m < NNEI; m++) { + dnn[m][id] = 0.0f; + pvet[m][id] = 0.0f; + dpvet1[m][ip][id] = 0.0f; + dpvet2[m][ip][id] = 0.0f; + dNave[id][m][ip] = 0.0f; + } + } + } + + if (cont <= 1) { + normal[0] = 0.0f; + normal[1] = 0.0f; + normal[2] = 1.0f; + for (id = 0; id < 3; ++id) { + for (ip = 0; ip < 3; ++ip) { + dnormdri[id][ip] = 0.0f; + for (m = 0; m < NNEI; ++m) { + dnormal[id][m][ip] = 0.0f; + } + } + } + } else if (cont > 1 && cont < NNEI) { + for (int k = 0; k < cont - 1; ++k) { + for (ip = 0; ip < 3; ++ip) { + pvet[k][ip] = vect[k][modulo(ip + 1, 3)] * vect[k + 1][modulo(ip + 2, 3)] - + vect[k][modulo(ip + 2, 3)] * vect[k + 1][modulo(ip + 1, 3)]; + } + // dpvet1[k][l][ip]: the derivatve of the k (=0,...cont-1)th Nik respect to the ip component of atom l + // derivatives respect to atom l + // dNik,x/drl + dpvet1[k][0][0] = 0.0f; + dpvet1[k][0][1] = vect[modulo(k + 1, NNEI)][2]; + dpvet1[k][0][2] = -vect[modulo(k + 1, NNEI)][1]; + // dNik,y/drl + dpvet1[k][1][0] = -vect[modulo(k + 1, NNEI)][2]; + dpvet1[k][1][1] = 0.0f; + dpvet1[k][1][2] = vect[modulo(k + 1, NNEI)][0]; + // dNik,z/drl + dpvet1[k][2][0] = vect[modulo(k + 1, NNEI)][1]; + dpvet1[k][2][1] = -vect[modulo(k + 1, NNEI)][0]; + dpvet1[k][2][2] = 0.0f; + + // dpvet2[k][l][ip]: the derivatve of the k (=0,...cont-1)th Nik respect to the ip component of atom l+1 + // derivatives respect to atom l+1 + // dNik,x/drl+1 + dpvet2[k][0][0] = 0.0f; + dpvet2[k][0][1] = -vect[modulo(k, NNEI)][2]; + dpvet2[k][0][2] = vect[modulo(k, NNEI)][1]; + // dNik,y/drl+1 + dpvet2[k][1][0] = vect[modulo(k, NNEI)][2]; + dpvet2[k][1][1] = 0.0f; + dpvet2[k][1][2] = -vect[modulo(k, NNEI)][0]; + // dNik,z/drl+1 + dpvet2[k][2][0] = -vect[modulo(k, NNEI)][1]; + dpvet2[k][2][1] = vect[modulo(k, NNEI)][0]; + dpvet2[k][2][2] = 0.0f; + } + + // average the normal vectors by using the NNEI neighboring planes + for (ip = 0; ip < 3; ip++) { + Nave[ip] = 0.0f; + for (int k = 0; k < cont - 1; k++) { + Nave[ip] += pvet[k][ip]; + } + Nave[ip] /= (cont - 1); + } + nninv = rnorm3df(Nave[0], Nave[1], Nave[2]); + + // the unit normal vector + normal[0] = Nave[0] * nninv; + normal[1] = Nave[1] * nninv; + normal[2] = Nave[2] * nninv; + + // derivatives of non-normalized normal vector, dNave:3xcontx3 array + // dNave[id][m][ip]: the derivatve of the id component of Nave respect to the ip component of atom m + for (id = 0; id < 3; id++) { + for (ip = 0; ip < 3; ip++) { + for (m = 0; m < cont; m++) { + if (m == 0) { + dNave[id][m][ip] = dpvet1[m][id][ip] / (cont - 1); + } else if (m == cont - 1) { + dNave[id][m][ip] = dpvet2[m - 1][id][ip] / (cont - 1); + } else { // sum of the derivatives of the mth and (m-1)th normal vector respect to the atom m + dNave[id][m][ip] = (dpvet1[m][id][ip] + dpvet2[m - 1][id][ip]) / (cont - 1); + } + } + } + } + // derivatives of nn, dnn:contx3 vector + // dnn[m][id]: the derivative of nn respect to r[m][id], m=0,...NNEI-1; id=0,1,2 + // r[m][id]: the id's component of atom m + for (m = 0; m < cont; m++) { + for (id = 0; id < 3; id++) { + dnn[m][id] = (Nave[0] * dNave[0][m][id] + Nave[1] * dNave[1][m][id] + + Nave[2] * dNave[2][m][id]) * nninv; + } + } + // dnormal[i][id][m][ip]: the derivative of normal[i][id] respect to r[m][ip], id,ip=0,1,2. + // for atom m, which is a neighbor atom of atom i, m = 0,...,NNEI-1 + for (m = 0; m < cont; m++) { + for (id = 0; id < 3; id++) { + for (ip = 0; ip < 3; ip++) { + dnormal[id][m][ip] = dNave[id][m][ip] * nninv - Nave[id] * dnn[m][ip] * nninv * nninv; + } + } + } + // Calculte dNave/dri, defined as dpvdri + for (id = 0; id < 3; id++) { + for (ip = 0; ip < 3; ip++) { + dpvdri[id][ip] = 0.0; + for (int k = 0; k < cont; k++) { + dpvdri[id][ip] -= dNave[id][k][ip]; + } + } + } + + // derivatives of nn, dnn:3x1 vector + dni[0] = (Nave[0] * dpvdri[0][0] + Nave[1] * dpvdri[1][0] + Nave[2] * dpvdri[2][0]) * nninv; + dni[1] = (Nave[0] * dpvdri[0][1] + Nave[1] * dpvdri[1][1] + Nave[2] * dpvdri[2][1]) * nninv; + dni[2] = (Nave[0] * dpvdri[0][2] + Nave[1] * dpvdri[1][2] + Nave[2] * dpvdri[2][2]) * nninv; + // derivatives of unit vector ni respect to ri, the result is 3x3 matrix + for (id = 0; id < 3; id++) { + for (ip = 0; ip < 3; ip++) { + dnormdri[id][ip] = dpvdri[id][ip] * nninv - Nave[id] * dni[ip] * nninv * nninv; + } + } + } else if (cont == NNEI) { + // derivatives of Ni[l] respect to the NNEI neighbors + for (int k = 0; k < NNEI; ++k) { + for (ip = 0; ip < 3; ++ip) { + pvet[k][ip] = vect[modulo(k, NNEI)][modulo(ip + 1, 3)] * + vect[modulo(k + 1, NNEI)][modulo(ip + 2, 3)] - + vect[modulo(k, NNEI)][modulo(ip + 2, 3)] * + vect[modulo(k + 1, NNEI)][modulo(ip + 1, 3)]; + } + // dpvet1[k][l][ip]: the derivatve of the k (=0,...cont-1)th Nik respect to the ip component of atom l + // derivatives respect to atom l + // dNik,x/drl + dpvet1[k][0][0] = 0.0f; + dpvet1[k][0][1] = vect[modulo(k + 1, NNEI)][2]; + dpvet1[k][0][2] = -vect[modulo(k + 1, NNEI)][1]; + // dNik,y/drl + dpvet1[k][1][0] = -vect[modulo(k + 1, NNEI)][2]; + dpvet1[k][1][1] = 0.0f; + dpvet1[k][1][2] = vect[modulo(k + 1, NNEI)][0]; + // dNik,z/drl + dpvet1[k][2][0] = vect[modulo(k + 1, NNEI)][1]; + dpvet1[k][2][1] = -vect[modulo(k + 1, NNEI)][0]; + dpvet1[k][2][2] = 0.0f; + + // dpvet2[k][l][ip]: the derivatve of the k (=0,...cont-1)th Nik respect to the ip component of atom l+1 + // derivatives respect to atom l+1 + // dNik,x/drl+1 + dpvet2[k][0][0] = 0.0f; + dpvet2[k][0][1] = -vect[modulo(k, NNEI)][2]; + dpvet2[k][0][2] = vect[modulo(k, NNEI)][1]; + // dNik,y/drl+1 + dpvet2[k][1][0] = vect[modulo(k, NNEI)][2]; + dpvet2[k][1][1] = 0.0f; + dpvet2[k][1][2] = -vect[modulo(k, NNEI)][0]; + // dNik,z/drl+1 + dpvet2[k][2][0] = -vect[modulo(k, NNEI)][1]; + dpvet2[k][2][1] = vect[modulo(k, NNEI)][0]; + dpvet2[k][2][2] = 0.0f; + } + + // average the normal vectors by using the NNEI neighboring planes + for (ip = 0; ip < 3; ++ip) { + Nave[ip] = 0.0f; + for (int k = 0; k < NNEI; ++k) { + Nave[ip] += pvet[k][ip]; + } + Nave[ip] /= NNEI; + } + // the magnitude of the normal vector + // nn2 = Nave[0] * Nave[0] + Nave[1] * Nave[1] + Nave[2] * Nave[2]; + nninv = rnorm3df(Nave[0], Nave[1], Nave[2]); + // the unit normal vector + normal[0] = Nave[0] * nninv; + normal[1] = Nave[1] * nninv; + normal[2] = Nave[2] * nninv; + + // for the central atoms, dnormdri is always zero + for (id = 0; id < 3; ++id) { + for (ip = 0; ip < 3; ++ip) { + dnormdri[id][ip] = 0.0f; + } + } + + // derivatives of non-normalized normal vector, dNave:3xNNEIx3 array + // dNave[id][m][ip]: the derivatve of the id component of Nave respect to the ip component of atom m + for (id = 0; id < 3; ++id) { + for (ip = 0; ip < 3; ++ip) { + for ( + m = 0; m < NNEI; + ++m) { // sum of the derivatives of the mth and (m-1)th normal vector respect to the atom m + dNave[id][m][ip] = + (dpvet1[modulo(m, NNEI)][id][ip] + dpvet2[modulo(m - 1, NNEI)][id][ip]) / NNEI; + } + } + } + // derivatives of nn, dnn:NNEIx3 vector + // dnn[m][id]: the derivative of nn respect to r[m][id], m=0,...NNEI-1; id=0,1,2 + // r[m][id]: the id's component of atom m + for (m = 0; m < NNEI; ++m) { + for (id = 0; id < 3; ++id) { + dnn[m][id] = + (Nave[0] * dNave[0][m][id] + Nave[1] * dNave[1][m][id] + Nave[2] * dNave[2][m][id]) * + nninv; + } + } + // dnormal[i][id][m][ip]: the derivative of normal[i][id] respect to r[m][ip], id,ip=0,1,2. + // for atom m, which is a neighbor atom of atom i, m = 0,...,NNEI-1 + for (m = 0; m < NNEI; ++m) { + for (id = 0; id < 3; ++id) { + for (ip = 0; ip < 3; ++ip) { + dnormal[id][m][ip] = dNave[id][m][ip] * nninv - Nave[id] * dnn[m][ip] * nninv * nninv; + } + } + } + } else { + printf("\n===== ILP neighbor number[%d] is greater than 6 =====\n", cont); + return; + } } \ No newline at end of file From e2f684a388bf91580981b8b1e09943fd8fdd933b Mon Sep 17 00:00:00 2001 From: bhk Date: Tue, 8 Oct 2024 20:41:21 +0800 Subject: [PATCH 16/46] add calc_vdw for ilp_tmd_sw --- src/force/ilp_tmd_sw.cu | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 881297990..86834cc59 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -528,4 +528,41 @@ static __device__ void calc_normal( printf("\n===== ILP neighbor number[%d] is greater than 6 =====\n", cont); return; } +} + +// calculate the van der Waals force and energy +static __device__ void calc_vdW( + float r, + float rinv, + float rsq, + float d, + float d_Seff, + float C_6, + float Tap, + float dTap, + float &p2_vdW, + float &f2_vdW) +{ + float r2inv, r6inv, r8inv; + float TSvdw, TSvdwinv, Vilp; + float fpair, fsum; + + r2inv = 1.0f / rsq; + r6inv = r2inv * r2inv * r2inv; + r8inv = r2inv * r6inv; + + // TSvdw = 1.0 + exp(-d_Seff * r + d); + TSvdw = 1.0f + expf(-d_Seff * r + d); + TSvdwinv = 1.0f / TSvdw; + Vilp = -C_6 * r6inv * TSvdwinv; + + // derivatives + // fpair = -6.0 * C_6 * r8inv * TSvdwinv + \ + // C_6 * d_Seff * (TSvdw - 1.0) * TSvdwinv * TSvdwinv * r8inv * r; + fpair = (-6.0f + d_Seff * (TSvdw - 1.0f) * TSvdwinv * r ) * C_6 * TSvdwinv * r8inv; + fsum = fpair * Tap - Vilp * dTap * rinv; + + p2_vdW = Tap * Vilp; + f2_vdW = fsum; + } \ No newline at end of file From cbd6ad7296edd588b55f7c75f9afc486baf0309b Mon Sep 17 00:00:00 2001 From: bhk Date: Tue, 8 Oct 2024 20:46:53 +0800 Subject: [PATCH 17/46] add gpu_find_force for ilp_tmd_sw --- src/force/ilp_tmd_sw.cu | 334 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 334 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 86834cc59..a1fc9becd 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -565,4 +565,338 @@ static __device__ void calc_vdW( p2_vdW = Tap * Vilp; f2_vdW = fsum; +} + +// force evaluation kernel +static __global__ void gpu_find_force( + ILP_TMD_SW_Para ilp_para, + const int number_of_particles, + const int N1, + const int N2, + const Box box, + const int *g_neighbor_number, + const int *g_neighbor_list, + int *g_ilp_neighbor_number, + int *g_ilp_neighbor_list, + const int *group_label, + const int *g_type, + const double *__restrict__ g_x, + const double *__restrict__ g_y, + const double *__restrict__ g_z, + double *g_fx, + double *g_fy, + double *g_fz, + double *g_virial, + double *g_potential, + float *g_f12x, + float *g_f12y, + float *g_f12z, + float *g_f12x_ilp_neigh, + float *g_f12y_ilp_neigh, + float *g_f12z_ilp_neigh) +{ + int n1 = blockIdx.x * blockDim.x + threadIdx.x + N1; // particle index + float s_fx = 0.0f; // force_x + float s_fy = 0.0f; // force_y + float s_fz = 0.0f; // force_z + float s_pe = 0.0f; // potential energy + float s_sxx = 0.0f; // virial_stress_xx + float s_sxy = 0.0f; // virial_stress_xy + float s_sxz = 0.0f; // virial_stress_xz + float s_syx = 0.0f; // virial_stress_yx + float s_syy = 0.0f; // virial_stress_yy + float s_syz = 0.0f; // virial_stress_yz + float s_szx = 0.0f; // virial_stress_zx + float s_szy = 0.0f; // virial_stress_zy + float s_szz = 0.0f; // virial_stress_zz + + float r = 0.0f; + float rsq = 0.0f; + float Rcut = 0.0f; + + if (n1 < N2) { + double x12d, y12d, z12d; + float x12f, y12f, z12f; + int neighor_number = g_neighbor_number[n1]; + int type1 = g_type[n1]; + double x1 = g_x[n1]; + double y1 = g_y[n1]; + double z1 = g_z[n1]; + + float delkix_half[NNEI] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; + float delkiy_half[NNEI] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; + float delkiz_half[NNEI] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; + + // calculate the normal + int cont = 0; + float normal[3]; + float dnormdri[3][3]; + float dnormal[3][NNEI][3]; + + float vet[NNEI][3]; + int id, ip, m; + for (id = 0; id < 3; ++id) { + normal[id] = 0.0f; + for (ip = 0; ip < 3; ++ip) { + dnormdri[ip][id] = 0.0f; + for (m = 0; m < NNEI; ++m) { + dnormal[id][m][ip] = 0.0f; + vet[m][id] = 0.0f; + } + } + } + + int ilp_neighbor_number = g_ilp_neighbor_number[n1]; + for (int i1 = 0; i1 < ilp_neighbor_number; ++i1) { + int n2_ilp = g_ilp_neighbor_list[n1 + number_of_particles * i1]; + x12d = g_x[n2_ilp] - x1; + y12d = g_y[n2_ilp] - y1; + z12d = g_z[n2_ilp] - z1; + apply_mic(box, x12d, y12d, z12d); + vet[cont][0] = float(x12d); + vet[cont][1] = float(y12d); + vet[cont][2] = float(z12d); + ++cont; + + delkix_half[i1] = float(x12d) * 0.5f; + delkiy_half[i1] = float(y12d) * 0.5f; + delkiz_half[i1] = float(z12d) * 0.5f; + } + + calc_normal(n1, vet, cont, normal, dnormdri, dnormal); + + // calculate energy and force + double tt1,tt2,tt3; + for (int i1 = 0; i1 < neighor_number; ++i1) { + int index = n1 + number_of_particles * i1; + int n2 = g_neighbor_list[index]; + int type2 = g_type[n2]; + + tt1 = g_x[n2]; + tt2 = g_y[n2]; + tt3 = g_z[n2]; + x12d = tt1 - x1; + y12d = tt2 - y1; + z12d = tt3 - z1; + apply_mic(box, x12d, y12d, z12d); + + // save x12, y12, z12 in float + x12f = float(x12d); + y12f = float(y12d); + z12f = float(z12d); + + // calculate distance between atoms + rsq = x12f * x12f + y12f * y12f + z12f * z12f; + r = sqrtf(rsq); + Rcut = ilp_para.rcut_global[type1][type2]; + + if (r >= Rcut) { + continue; + } + + // calc att + float Tap, dTap, rinv; + float Rcutinv = 1.0f / Rcut; + rinv = 1.0f / r; + Tap = calc_Tap(r, Rcutinv); + dTap = calc_dTap(r, Rcut, Rcutinv); + + float p2_vdW, f2_vdW; + calc_vdW( + r, + rinv, + rsq, + ilp_para.d[type1][type2], + ilp_para.d_Seff[type1][type2], + ilp_para.C_6[type1][type2], + Tap, + dTap, + p2_vdW, + f2_vdW); + + float f12x = -f2_vdW * x12f * 0.5f; + float f12y = -f2_vdW * y12f * 0.5f; + float f12z = -f2_vdW * z12f * 0.5f; + float f21x = -f12x; + float f21y = -f12y; + float f21z = -f12z; + + s_fx += f12x - f21x; + s_fy += f12y - f21y; + s_fz += f12z - f21z; + + s_pe += p2_vdW * 0.5f; + s_sxx += x12f * f21x; + s_sxy += x12f * f21y; + s_sxz += x12f * f21z; + s_syx += y12f * f21x; + s_syy += y12f * f21y; + s_syz += y12f * f21z; + s_szx += z12f * f21x; + s_szy += z12f * f21y; + s_szz += z12f * f21z; + + + // calc rep + float C = ilp_para.C[type1][type2]; + float lambda_ = ilp_para.lambda[type1][type2]; + float delta2inv = ilp_para.delta2inv[type1][type2]; + float epsilon = ilp_para.epsilon[type1][type2]; + float z0 = ilp_para.z0[type1][type2]; + // calc_rep + float prodnorm1, rhosq1, rdsq1, exp0, exp1, frho1, Erep, Vilp; + float fpair, fpair1, fsum, delx, dely, delz, fkcx, fkcy, fkcz; + float dprodnorm1[3] = {0.0f, 0.0f, 0.0f}; + float fp1[3] = {0.0f, 0.0f, 0.0f}; + float fprod1[3] = {0.0f, 0.0f, 0.0f}; + float fk[3] = {0.0f, 0.0f, 0.0f}; + + delx = -x12f; + dely = -y12f; + delz = -z12f; + + float delx_half = delx * 0.5f; + float dely_half = dely * 0.5f; + float delz_half = delz * 0.5f; + + // calculate the transverse distance + prodnorm1 = normal[0] * delx + normal[1] * dely + normal[2] * delz; + rhosq1 = rsq - prodnorm1 * prodnorm1; + rdsq1 = rhosq1 * delta2inv; + + // store exponents + // exp0 = exp(-lambda_ * (r - z0)); + // exp1 = exp(-rdsq1); + exp0 = expf(-lambda_ * (r - z0)); + exp1 = expf(-rdsq1); + + frho1 = exp1 * C; + Erep = 0.5f * epsilon + frho1; + Vilp = exp0 * Erep; + + // derivatives + fpair = lambda_ * exp0 * rinv * Erep; + fpair1 = 2.0f * exp0 * frho1 * delta2inv; + fsum = fpair + fpair1; + + float prodnorm1_m_fpair1 = prodnorm1 * fpair1; + float Vilp_m_dTap_m_rinv = Vilp * dTap * rinv; + + // derivatives of the product of rij and ni, the resutl is a vector + dprodnorm1[0] = + dnormdri[0][0] * delx + dnormdri[1][0] * dely + dnormdri[2][0] * delz; + dprodnorm1[1] = + dnormdri[0][1] * delx + dnormdri[1][1] * dely + dnormdri[2][1] * delz; + dprodnorm1[2] = + dnormdri[0][2] * delx + dnormdri[1][2] * dely + dnormdri[2][2] * delz; + // fp1[0] = prodnorm1 * normal[0] * fpair1; + // fp1[1] = prodnorm1 * normal[1] * fpair1; + // fp1[2] = prodnorm1 * normal[2] * fpair1; + // fprod1[0] = prodnorm1 * dprodnorm1[0] * fpair1; + // fprod1[1] = prodnorm1 * dprodnorm1[1] * fpair1; + // fprod1[2] = prodnorm1 * dprodnorm1[2] * fpair1; + fp1[0] = prodnorm1_m_fpair1 * normal[0]; + fp1[1] = prodnorm1_m_fpair1 * normal[1]; + fp1[2] = prodnorm1_m_fpair1 * normal[2]; + fprod1[0] = prodnorm1_m_fpair1 * dprodnorm1[0]; + fprod1[1] = prodnorm1_m_fpair1 * dprodnorm1[1]; + fprod1[2] = prodnorm1_m_fpair1 * dprodnorm1[2]; + + // fkcx = (delx * fsum - fp1[0]) * Tap - Vilp * dTap * delx * rinv; + // fkcy = (dely * fsum - fp1[1]) * Tap - Vilp * dTap * dely * rinv; + // fkcz = (delz * fsum - fp1[2]) * Tap - Vilp * dTap * delz * rinv; + fkcx = (delx * fsum - fp1[0]) * Tap - Vilp_m_dTap_m_rinv * delx; + fkcy = (dely * fsum - fp1[1]) * Tap - Vilp_m_dTap_m_rinv * dely; + fkcz = (delz * fsum - fp1[2]) * Tap - Vilp_m_dTap_m_rinv * delz; + + s_fx += fkcx - fprod1[0] * Tap; + s_fy += fkcy - fprod1[1] * Tap; + s_fz += fkcz - fprod1[2] * Tap; + + g_f12x[index] = fkcx; + g_f12y[index] = fkcy; + g_f12z[index] = fkcz; + + float minus_prodnorm1_m_fpair1_m_Tap = -prodnorm1 * fpair1 * Tap; + for (int kk = 0; kk < ilp_neighbor_number; ++kk) { + // for (int kk = 0; kk < 0; ++kk) { + // int index_ilp = n1 + number_of_particles * kk; + // int n2_ilp = g_ilp_neighbor_list[index_ilp]; + // derivatives of the product of rij and ni respect to rk, k=0,1,2, where atom k is the neighbors of atom i + dprodnorm1[0] = dnormal[0][kk][0] * delx + dnormal[1][kk][0] * dely + + dnormal[2][kk][0] * delz; + dprodnorm1[1] = dnormal[0][kk][1] * delx + dnormal[1][kk][1] * dely + + dnormal[2][kk][1] * delz; + dprodnorm1[2] = dnormal[0][kk][2] * delx + dnormal[1][kk][2] * dely + + dnormal[2][kk][2] * delz; + // fk[0] = (-prodnorm1 * dprodnorm1[0] * fpair1) * Tap; + // fk[1] = (-prodnorm1 * dprodnorm1[1] * fpair1) * Tap; + // fk[2] = (-prodnorm1 * dprodnorm1[2] * fpair1) * Tap; + fk[0] = minus_prodnorm1_m_fpair1_m_Tap * dprodnorm1[0]; + fk[1] = minus_prodnorm1_m_fpair1_m_Tap * dprodnorm1[1]; + fk[2] = minus_prodnorm1_m_fpair1_m_Tap * dprodnorm1[2]; + + g_f12x_ilp_neigh[n1 + number_of_particles * kk] += fk[0]; + g_f12y_ilp_neigh[n1 + number_of_particles * kk] += fk[1]; + g_f12z_ilp_neigh[n1 + number_of_particles * kk] += fk[2]; + + // delki[0] = g_x[n2_ilp] - x1; + // delki[1] = g_y[n2_ilp] - y1; + // delki[2] = g_z[n2_ilp] - z1; + // apply_mic(box, delki[0], delki[1], delki[2]); + + // s_sxx += delki[0] * fk[0] * 0.5; + // s_sxy += delki[0] * fk[1] * 0.5; + // s_sxz += delki[0] * fk[2] * 0.5; + // s_syx += delki[1] * fk[0] * 0.5; + // s_syy += delki[1] * fk[1] * 0.5; + // s_syz += delki[1] * fk[2] * 0.5; + // s_szx += delki[2] * fk[0] * 0.5; + // s_szy += delki[2] * fk[1] * 0.5; + // s_szz += delki[2] * fk[2] * 0.5; + s_sxx += delkix_half[kk] * fk[0]; + s_sxy += delkix_half[kk] * fk[1]; + s_sxz += delkix_half[kk] * fk[2]; + s_syx += delkiy_half[kk] * fk[0]; + s_syy += delkiy_half[kk] * fk[1]; + s_syz += delkiy_half[kk] * fk[2]; + s_szx += delkiz_half[kk] * fk[0]; + s_szy += delkiz_half[kk] * fk[1]; + s_szz += delkiz_half[kk] * fk[2]; + } + s_pe += Tap * Vilp; + s_sxx += delx_half * fkcx; + s_sxy += delx_half * fkcy; + s_sxz += delx_half * fkcz; + s_syx += dely_half * fkcx; + s_syy += dely_half * fkcy; + s_syz += dely_half * fkcz; + s_szx += delz_half * fkcx; + s_szy += delz_half * fkcy; + s_szz += delz_half * fkcz; + } + + // save force + g_fx[n1] += s_fx; + g_fy[n1] += s_fy; + g_fz[n1] += s_fz; + + // save virial + // xx xy xz 0 3 4 + // yx yy yz 6 1 5 + // zx zy zz 7 8 2 + g_virial[n1 + 0 * number_of_particles] += s_sxx; + g_virial[n1 + 1 * number_of_particles] += s_syy; + g_virial[n1 + 2 * number_of_particles] += s_szz; + g_virial[n1 + 3 * number_of_particles] += s_sxy; + g_virial[n1 + 4 * number_of_particles] += s_sxz; + g_virial[n1 + 5 * number_of_particles] += s_syz; + g_virial[n1 + 6 * number_of_particles] += s_syx; + g_virial[n1 + 7 * number_of_particles] += s_szx; + g_virial[n1 + 8 * number_of_particles] += s_szy; + + // save potential + g_potential[n1] += s_pe; + + } } \ No newline at end of file From 6b6ec594ba1e27babbf104daada92e51a54277df Mon Sep 17 00:00:00 2001 From: bhk Date: Tue, 8 Oct 2024 20:49:37 +0800 Subject: [PATCH 18/46] add build_reduce_nl for ilp_tmd_sw --- src/force/ilp_tmd_sw.cu | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index a1fc9becd..b063a1cda 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -899,4 +899,39 @@ static __global__ void gpu_find_force( g_potential[n1] += s_pe; } +} + +// build a neighbor list for reducing force +static __global__ void build_reduce_neighbor_list( + const int number_of_particles, + const int N1, + const int N2, + const int *g_neighbor_number, + const int *g_neighbor_list, + int *g_reduce_neighbor_list) +{ + int n1 = blockIdx.x * blockDim.x + threadIdx.x + N1; + if (N1 < N2) { + int neighbor_number = g_neighbor_number[n1]; + int l, r, m, tmp_value; + for (int i1 = 0; i1 < neighbor_number; ++i1) { + int index = n1 + i1 * number_of_particles; + int n2 = g_neighbor_list[index]; + + l = 0; + r = g_neighbor_number[n2]; + while (l < r) { + m = (l + r) >> 1; + tmp_value = g_neighbor_list[n2 + number_of_particles * m]; + if (tmp_value < n1) { + l = m + 1; + } else if (tmp_value > n1) { + r = m - 1; + } else { + break; + } + } + g_reduce_neighbor_list[index] = (l + r) >> 1; + } + } } \ No newline at end of file From 96ac7e0affb34bb97653060f77ebfa877bbd693c Mon Sep 17 00:00:00 2001 From: bhk Date: Tue, 8 Oct 2024 20:59:06 +0800 Subject: [PATCH 19/46] add reduce_force_many_body for ilp_tmd_sw --- src/force/ilp_tmd_sw.cu | 146 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index b063a1cda..98cf987a0 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -934,4 +934,150 @@ static __global__ void build_reduce_neighbor_list( g_reduce_neighbor_list[index] = (l + r) >> 1; } } +} + +// reduce the rep force +static __global__ void reduce_force_many_body( + const int number_of_particles, + const int N1, + const int N2, + const Box box, + const int *g_neighbor_number, + const int *g_neighbor_list, + int *g_reduce_neighbor_list, + int *g_ilp_neighbor_number, + int *g_ilp_neighbor_list, + const double *__restrict__ g_x, + const double *__restrict__ g_y, + const double *__restrict__ g_z, + double *g_fx, + double *g_fy, + double *g_fz, + double *g_virial, + float *g_f12x, + float *g_f12y, + float *g_f12z, + float *g_f12x_ilp_neigh, + float *g_f12y_ilp_neigh, + float *g_f12z_ilp_neigh) +{ + int n1 = blockIdx.x * blockDim.x + threadIdx.x + N1; // particle index + float s_fx = 0.0f; // force_x + float s_fy = 0.0f; // force_y + float s_fz = 0.0f; // force_z + float s_sxx = 0.0f; // virial_stress_xx + float s_sxy = 0.0f; // virial_stress_xy + float s_sxz = 0.0f; // virial_stress_xz + float s_syx = 0.0f; // virial_stress_yx + float s_syy = 0.0f; // virial_stress_yy + float s_syz = 0.0f; // virial_stress_yz + float s_szx = 0.0f; // virial_stress_zx + float s_szy = 0.0f; // virial_stress_zy + float s_szz = 0.0f; // virial_stress_zz + + + if (n1 < N2) { + double x12d, y12d, z12d; + float x12f, y12f, z12f; + int neighbor_number_1 = g_neighbor_number[n1]; + double x1 = g_x[n1]; + double y1 = g_y[n1]; + double z1 = g_z[n1]; + + // calculate energy and force + for (int i1 = 0; i1 < neighbor_number_1; ++i1) { + int index = n1 + number_of_particles * i1; + int n2 = g_neighbor_list[index]; + + x12d = g_x[n2] - x1; + y12d = g_y[n2] - y1; + z12d = g_z[n2] - z1; + apply_mic(box, x12d, y12d, z12d); + x12f = float(x12d); + y12f = float(y12d); + z12f = float(z12d); + + index = n2 + number_of_particles * g_reduce_neighbor_list[index]; + float f21x = g_f12x[index]; + float f21y = g_f12y[index]; + float f21z = g_f12z[index]; + + s_fx -= f21x; + s_fy -= f21y; + s_fz -= f21z; + + // per-atom virial + s_sxx += x12f * f21x * 0.5f; + s_sxy += x12f * f21y * 0.5f; + s_sxz += x12f * f21z * 0.5f; + s_syx += y12f * f21x * 0.5f; + s_syy += y12f * f21y * 0.5f; + s_syz += y12f * f21z * 0.5f; + s_szx += z12f * f21x * 0.5f; + s_szy += z12f * f21y * 0.5f; + s_szz += z12f * f21z * 0.5f; + } + + int ilp_neighbor_number_1 = g_ilp_neighbor_number[n1]; + + for (int i1 = 0; i1 < ilp_neighbor_number_1; ++i1) { + int index = n1 + number_of_particles * i1; + int n2 = g_ilp_neighbor_list[index]; + int ilp_neighor_number_2 = g_ilp_neighbor_number[n2]; + + x12d = g_x[n2] - x1; + y12d = g_y[n2] - y1; + z12d = g_z[n2] - z1; + apply_mic(box, x12d, y12d, z12d); + x12f = float(x12d); + y12f = float(y12d); + z12f = float(z12d); + + int offset = 0; + for (int k = 0; k < ilp_neighor_number_2; ++k) { + if (n1 == g_ilp_neighbor_list[n2 + number_of_particles * k]) { + offset = k; + break; + } + } + index = n2 + number_of_particles * offset; + float f21x = g_f12x_ilp_neigh[index]; + float f21y = g_f12y_ilp_neigh[index]; + float f21z = g_f12z_ilp_neigh[index]; + + s_fx += f21x; + s_fy += f21y; + s_fz += f21z; + + // per-atom virial + s_sxx += -x12f * f21x * 0.5f; + s_sxy += -x12f * f21y * 0.5f; + s_sxz += -x12f * f21z * 0.5f; + s_syx += -y12f * f21x * 0.5f; + s_syy += -y12f * f21y * 0.5f; + s_syz += -y12f * f21z * 0.5f; + s_szx += -z12f * f21x * 0.5f; + s_szy += -z12f * f21y * 0.5f; + s_szz += -z12f * f21z * 0.5f; + } + + // save force + g_fx[n1] += s_fx; + g_fy[n1] += s_fy; + g_fz[n1] += s_fz; + + // save virial + // xx xy xz 0 3 4 + // yx yy yz 6 1 5 + // zx zy zz 7 8 2 + g_virial[n1 + 0 * number_of_particles] += s_sxx; + g_virial[n1 + 1 * number_of_particles] += s_syy; + g_virial[n1 + 2 * number_of_particles] += s_szz; + g_virial[n1 + 3 * number_of_particles] += s_sxy; + g_virial[n1 + 4 * number_of_particles] += s_sxz; + g_virial[n1 + 5 * number_of_particles] += s_syz; + g_virial[n1 + 6 * number_of_particles] += s_syx; + g_virial[n1 + 7 * number_of_particles] += s_szx; + g_virial[n1 + 8 * number_of_particles] += s_szy; + } } \ No newline at end of file From 5a624fb082785f0bb7087153aeb6043943a83d40 Mon Sep 17 00:00:00 2001 From: bhk Date: Tue, 8 Oct 2024 21:14:27 +0800 Subject: [PATCH 20/46] add find_neighbor func for ilp --- src/force/neighbor.cu | 153 +++++++++++++++++++++++++++++++++++++++++ src/force/neighbor.cuh | 74 ++++++++++++++++++++ 2 files changed, 227 insertions(+) diff --git a/src/force/neighbor.cu b/src/force/neighbor.cu index bed99d912..04aa9e35c 100644 --- a/src/force/neighbor.cu +++ b/src/force/neighbor.cu @@ -341,3 +341,156 @@ void find_neighbor( gpu_sort_neighbor_list<<>>(N, NN.data(), NL.data()); CUDA_CHECK_KERNEL } + +// For ILP, the neighbor could not contain atoms in the same layer +static __global__ void gpu_find_neighbor_ON1( + const Box box, + const int N, + const int N1, + const int N2, + const int* __restrict__ type, + const int* __restrict__ cell_counts, + const int* __restrict__ cell_count_sum, + const int* __restrict__ cell_contents, + int* NN, + int* NL, + int* big_ilp_NN, + int* big_ilp_NL, + const int* group_label, + const double* __restrict__ x, + const double* __restrict__ y, + const double* __restrict__ z, + const int nx, + const int ny, + const int nz, + const double rc_inv, + const double cutoff_square, + const double big_ilp_cutoff_square) +{ + const int n1 = blockIdx.x * blockDim.x + threadIdx.x + N1; + int count = 0; + int ilp_count = 0; + if (n1 < N2) { + const double x1 = x[n1]; + const double y1 = y[n1]; + const double z1 = z[n1]; + int cell_id; + int cell_id_x; + int cell_id_y; + int cell_id_z; + find_cell_id(box, x1, y1, z1, rc_inv, nx, ny, nz, cell_id_x, cell_id_y, cell_id_z, cell_id); + + const int z_lim = box.pbc_z ? 2 : 0; + const int y_lim = box.pbc_y ? 2 : 0; + const int x_lim = box.pbc_x ? 2 : 0; + + // get radial descriptors + for (int k = -z_lim; k <= z_lim; ++k) { + for (int j = -y_lim; j <= y_lim; ++j) { + for (int i = -x_lim; i <= x_lim; ++i) { + int neighbor_cell = cell_id + k * nx * ny + j * nx + i; + if (cell_id_x + i < 0) + neighbor_cell += nx; + if (cell_id_x + i >= nx) + neighbor_cell -= nx; + if (cell_id_y + j < 0) + neighbor_cell += ny * nx; + if (cell_id_y + j >= ny) + neighbor_cell -= ny * nx; + if (cell_id_z + k < 0) + neighbor_cell += nz * ny * nx; + if (cell_id_z + k >= nz) + neighbor_cell -= nz * ny * nx; + + const int num_atoms_neighbor_cell = cell_counts[neighbor_cell]; + const int num_atoms_previous_cells = cell_count_sum[neighbor_cell]; + + for (int m = 0; m < num_atoms_neighbor_cell; ++m) { + const int n2 = cell_contents[num_atoms_previous_cells + m]; + // neighbors in different layers + if (n2 >= N1 && n2 < N2 && n1 != n2) { + + double x12 = x[n2] - x1; + double y12 = y[n2] - y1; + double z12 = z[n2] - z1; + apply_mic(box, x12, y12, z12); + const double d2 = x12 * x12 + y12 * y12 + z12 * z12; + + bool different_layer = group_label[n1] != group_label[n2]; + if (different_layer && d2 < cutoff_square) { + NL[count++ * N + n1] = n2; + } else if (!different_layer && d2 < big_ilp_cutoff_square) { + big_ilp_NL[ilp_count++ * N + n1] = n2; + } + + } + } + } + } + } + NN[n1] = count; + big_ilp_NN[n1] = ilp_count; + } +} + +void find_neighbor( + const int N1, + const int N2, + double rc, + double big_ilp_cutoff_square, + Box& box, + const int* group_label, + const GPU_Vector& type, + const GPU_Vector& position_per_atom, + GPU_Vector& cell_count, + GPU_Vector& cell_count_sum, + GPU_Vector& cell_contents, + GPU_Vector& NN, + GPU_Vector& NL, + GPU_Vector& big_ilp_NN, + GPU_Vector& big_ilp_NL) +{ + const int N = NN.size(); + const int block_size = 256; + const int grid_size = (N2 - N1 - 1) / block_size + 1; + const double* x = position_per_atom.data(); + const double* y = position_per_atom.data() + N; + const double* z = position_per_atom.data() + N * 2; + const double rc_cell_list = 0.5 * rc; + const double rc_inv_cell_list = 2.0 / rc; + + int num_bins[3]; + box.get_num_bins(rc_cell_list, num_bins); + + find_cell_list( + rc_cell_list, num_bins, box, position_per_atom, cell_count, cell_count_sum, cell_contents); + + gpu_find_neighbor_ON1<<>>( + box, + N, + N1, + N2, + type.data(), + cell_count.data(), + cell_count_sum.data(), + cell_contents.data(), + NN.data(), + NL.data(), + big_ilp_NN.data(), + big_ilp_NL.data(), + group_label, + x, + y, + z, + num_bins[0], + num_bins[1], + num_bins[2], + rc_inv_cell_list, + rc * rc, + big_ilp_cutoff_square); + CUDA_CHECK_KERNEL + + const int MN = NL.size() / NN.size(); + gpu_sort_neighbor_list_ilp<<>>(N, NN.data(), NL.data()); + CUDA_CHECK_KERNEL +} \ No newline at end of file diff --git a/src/force/neighbor.cuh b/src/force/neighbor.cuh index 068eb5e22..6fc5f3405 100644 --- a/src/force/neighbor.cuh +++ b/src/force/neighbor.cuh @@ -40,6 +40,24 @@ void find_neighbor( GPU_Vector& NN, GPU_Vector& NL); +// For ILP +void find_neighbor( + const int N1, + const int N2, + double rc, + double big_ilp_cutoff_square, + Box& box, + const int* group_label, + const GPU_Vector& type, + const GPU_Vector& position_per_atom, + GPU_Vector& cell_count, + GPU_Vector& cell_count_sum, + GPU_Vector& cell_contents, + GPU_Vector& NN, + GPU_Vector& NL, + GPU_Vector& big_ilp_NN, + GPU_Vector& big_ilp_NL); + static __device__ void find_cell_id( const Box& box, const double x, @@ -106,3 +124,59 @@ static __global__ void gpu_sort_neighbor_list(const int N, const int* NN, int* N NL[bid + count * N] = atom_index; } } + +static __global__ void gpu_sort_neighbor_list_ilp(const int N, const int* NN, int* NL) +{ + int bid = blockIdx.x; + int tid = threadIdx.x; + int neighbor_number = NN[bid]; + int atom_index; + int atom_index_hold[10] = {0}; + extern __shared__ int atom_index_copy[]; + + if (neighbor_number <= 1024) { + if (tid < neighbor_number) { + atom_index = NL[bid + tid * N]; + atom_index_copy[tid] = atom_index; + } + } else { + int tid_plus = tid; + for (int i = 0; tid_plus < neighbor_number; ++i) { + atom_index = NL[bid + tid_plus * N]; + atom_index_copy[tid_plus] = atom_index; + atom_index_hold[i] = atom_index; + tid_plus += 1024; + } + } + int count = 0; + __syncthreads(); + + if (neighbor_number <= 1024) { + for (int j = 0; j < neighbor_number; ++j) { + if (atom_index > atom_index_copy[j]) { + count++; + } + } + + if (tid < neighbor_number) { + NL[bid + count * N] = atom_index; + } + } else { + int tid_plus = tid; + for (int i = 0; tid_plus < neighbor_number; ++i) + { + count = 0; + atom_index = atom_index_hold[i]; + for (int j = 0; j < neighbor_number; ++j) { + if (atom_index > atom_index_copy[j]) { + ++count; + } + } + + if (tid_plus < neighbor_number) { + NL[bid + count * N] = atom_index; + } + tid_plus += 1024; + } + } +} \ No newline at end of file From 4b64e8c9b4475dc0ea33cf10d0f4d4c7df2e8b87 Mon Sep 17 00:00:00 2001 From: bhk Date: Tue, 8 Oct 2024 21:27:22 +0800 Subject: [PATCH 21/46] add compute for ilp_tmd_sw --- src/force/ilp_tmd_sw.cu | 163 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 1 deletion(-) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 98cf987a0..fd141f80d 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -663,7 +663,7 @@ static __global__ void gpu_find_force( delkiz_half[i1] = float(z12d) * 0.5f; } - calc_normal(n1, vet, cont, normal, dnormdri, dnormal); + calc_normal(vet, cont, normal, dnormdri, dnormal); // calculate energy and force double tt1,tt2,tt3; @@ -1080,4 +1080,165 @@ static __global__ void reduce_force_many_body( g_virial[n1 + 7 * number_of_particles] += s_szx; g_virial[n1 + 8 * number_of_particles] += s_szy; } +} + +// define the pure virtual func +void ILP_TMD_SW::compute( + Box &box, + const GPU_Vector &type, + const GPU_Vector &position_per_atom, + GPU_Vector &potential_per_atom, + GPU_Vector &force_per_atom, + GPU_Vector &virial_per_atom) +{ + // nothing +} + +#define USE_FIXED_NEIGHBOR 1 +#define UPDATE_TEMP 10 +#define BIG_ILP_CUTOFF_SQUARE 50.0 +// find force and related quantities +void ILP_TMD_SW::compute( + Box &box, + const GPU_Vector &type, + const GPU_Vector &position_per_atom, + GPU_Vector &potential_per_atom, + GPU_Vector &force_per_atom, + GPU_Vector &virial_per_atom, + std::vector &group) +{ + const int number_of_atoms = type.size(); + int grid_size = (N2 - N1 - 1) / BLOCK_SIZE_FORCE + 1; + + // assume the first group column is for ILP + const int *group_label = group[0].label.data(); + +#ifdef USE_FIXED_NEIGHBOR + static int num_calls = 0; +#endif +#ifdef USE_FIXED_NEIGHBOR + if (num_calls++ == 0) { +#endif + find_neighbor( + N1, + N2, + rc, + BIG_ILP_CUTOFF_SQUARE, + box, + group_label, + type, + position_per_atom, + ilp_data.cell_count, + ilp_data.cell_count_sum, + ilp_data.cell_contents, + ilp_data.NN, + ilp_data.NL, + ilp_data.big_ilp_NN, + ilp_data.big_ilp_NL); + + build_reduce_neighbor_list<<>>( + number_of_atoms, + N1, + N2, + ilp_data.NN.data(), + ilp_data.NL.data(), + ilp_data.reduce_NL.data()); +#ifdef USE_FIXED_NEIGHBOR + } + num_calls %= UPDATE_TEMP; +#endif + + const double* x = position_per_atom.data(); + const double* y = position_per_atom.data() + number_of_atoms; + const double* z = position_per_atom.data() + number_of_atoms * 2; + const int *NN = ilp_data.NN.data(); + const int *NL = ilp_data.NL.data(); + const int* big_ilp_NN = ilp_data.big_ilp_NN.data(); + const int* big_ilp_NL = ilp_data.big_ilp_NL.data(); + int *reduce_NL = ilp_data.reduce_NL.data(); + int *ilp_NL = ilp_data.ilp_NL.data(); + int *ilp_NN = ilp_data.ilp_NN.data(); + + ilp_data.ilp_NL.fill(0); + ilp_data.ilp_NN.fill(0); + + // find ILP neighbor list + ILP_neighbor<<>>( + number_of_atoms, N1, N2, box, big_ilp_NN, big_ilp_NL, \ + type.data(), ilp_para, x, y, z, ilp_NN, \ + ilp_NL, group[1].label.data()); + CUDA_CHECK_KERNEL + + // initialize force of ilp neighbor temporary vector + ilp_data.f12x_ilp_neigh.fill(0); + ilp_data.f12y_ilp_neigh.fill(0); + ilp_data.f12z_ilp_neigh.fill(0); + ilp_data.f12x.fill(0); + ilp_data.f12y.fill(0); + ilp_data.f12z.fill(0); + + double *g_fx = force_per_atom.data(); + double *g_fy = force_per_atom.data() + number_of_atoms; + double *g_fz = force_per_atom.data() + number_of_atoms * 2; + double *g_virial = virial_per_atom.data(); + double *g_potential = potential_per_atom.data(); + float *g_f12x = ilp_data.f12x.data(); + float *g_f12y = ilp_data.f12y.data(); + float *g_f12z = ilp_data.f12z.data(); + float *g_f12x_ilp_neigh = ilp_data.f12x_ilp_neigh.data(); + float *g_f12y_ilp_neigh = ilp_data.f12y_ilp_neigh.data(); + float *g_f12z_ilp_neigh = ilp_data.f12z_ilp_neigh.data(); + + gpu_find_force<<>>( + ilp_para, + number_of_atoms, + N1, + N2, + box, + NN, + NL, + ilp_NN, + ilp_NL, + group_label, + type.data(), + x, + y, + z, + g_fx, + g_fy, + g_fz, + g_virial, + g_potential, + g_f12x, + g_f12y, + g_f12z, + g_f12x_ilp_neigh, + g_f12y_ilp_neigh, + g_f12z_ilp_neigh); + CUDA_CHECK_KERNEL + + reduce_force_many_body<<>>( + number_of_atoms, + N1, + N2, + box, + NN, + NL, + reduce_NL, + ilp_NN, + ilp_NL, + x, + y, + z, + g_fx, + g_fy, + g_fz, + g_virial, + g_f12x, + g_f12y, + g_f12z, + g_f12x_ilp_neigh, + g_f12y_ilp_neigh, + g_f12z_ilp_neigh); + CUDA_CHECK_KERNEL } \ No newline at end of file From d17b8d4356b90853a289590ed763f671761a2399 Mon Sep 17 00:00:00 2001 From: bhk Date: Tue, 8 Oct 2024 21:29:34 +0800 Subject: [PATCH 22/46] ***FIX LD BUG AND COMPILE PASS*** --- src/force/ilp_tmd_sw.cuh | 1 + src/force/potential.cuh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index 84f2ef468..00c7eb263 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -57,6 +57,7 @@ struct ILP_TMD_SW_Data { class ILP_TMD_SW : public Potential { public: + using Potential::compute; ILP_TMD_SW(FILE*, int, int); virtual ~ILP_TMD_SW(void); virtual void compute( diff --git a/src/force/potential.cuh b/src/force/potential.cuh index e7fc10c0d..4be0a4792 100644 --- a/src/force/potential.cuh +++ b/src/force/potential.cuh @@ -55,7 +55,7 @@ public: GPU_Vector& potential, GPU_Vector& force, GPU_Vector& virial, - std::vector& group); + std::vector& group){}; protected: void find_properties_many_body( From b359b3b34ade771df52869679acb3280e39b0f3e Mon Sep 17 00:00:00 2001 From: bhk Date: Sat, 12 Oct 2024 18:58:45 +0800 Subject: [PATCH 23/46] add sw term to ilp_tmd_sw but not finished --- src/force/ilp_tmd_sw.cu | 97 ++++++++++++++++++++++++++++++++++++++-- src/force/ilp_tmd_sw.cuh | 32 +++++++++++-- 2 files changed, 122 insertions(+), 7 deletions(-) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index fd141f80d..bea70f805 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -110,7 +110,98 @@ ILP_TMD_SW::~ILP_TMD_SW(void) // nothing } -// calculate the long-range cutoff term +void ILP_TMD_SW::initialize_sw_1985_1(FILE* fid) +{ + printf("Use single-element Stillinger-Weber potential.\n"); + int count; + double epsilon, lambda, A, B, a, gamma, sigma, cos0; + count = + fscanf(fid, "%lf%lf%lf%lf%lf%lf%lf%lf", &epsilon, &lambda, &A, &B, &a, &gamma, &sigma, &cos0); + PRINT_SCANF_ERROR(count, 8, "Reading error for SW potential."); + + sw2_para.A[0][0] = epsilon * A; + sw2_para.B[0][0] = B; + sw2_para.a[0][0] = a; + sw2_para.sigma[0][0] = sigma; + sw2_para.gamma[0][0] = gamma; + sw2_para.rc[0][0] = sigma * a; + rc = sw2_para.rc[0][0]; + sw2_para.lambda[0][0][0] = epsilon * lambda; + sw2_para.cos0[0][0][0] = cos0; +} + +void ILP_TMD_SW::initialize_sw_1985_2(FILE* fid) +{ + printf("Use two-element Stillinger-Weber potential.\n"); + int count; + + // 2-body parameters and the force cutoff + double A[3], B[3], a[3], sigma[3], gamma[3]; + rc = 0.0; + for (int n = 0; n < 3; n++) { + count = fscanf(fid, "%lf%lf%lf%lf%lf", &A[n], &B[n], &a[n], &sigma[n], &gamma[n]); + PRINT_SCANF_ERROR(count, 5, "Reading error for SW potential."); + } + for (int n1 = 0; n1 < 2; n1++) + for (int n2 = 0; n2 < 2; n2++) { + sw2_para.A[n1][n2] = A[n1 + n2]; + sw2_para.B[n1][n2] = B[n1 + n2]; + sw2_para.a[n1][n2] = a[n1 + n2]; + sw2_para.sigma[n1][n2] = sigma[n1 + n2]; + sw2_para.gamma[n1][n2] = gamma[n1 + n2]; + sw2_para.rc[n1][n2] = sigma[n1 + n2] * a[n1 + n2]; + if (rc < sw2_para.rc[n1][n2]) + rc = sw2_para.rc[n1][n2]; + } + + // 3-body parameters + double lambda, cos0; + for (int n1 = 0; n1 < 2; n1++) + for (int n2 = 0; n2 < 2; n2++) + for (int n3 = 0; n3 < 2; n3++) { + count = fscanf(fid, "%lf%lf", &lambda, &cos0); + PRINT_SCANF_ERROR(count, 2, "Reading error for SW potential."); + sw2_para.lambda[n1][n2][n3] = lambda; + sw2_para.cos0[n1][n2][n3] = cos0; + } +} + +void ILP_TMD_SW::initialize_sw_1985_3(FILE* fid) +{ + printf("Use three-element Stillinger-Weber potential.\n"); + int count; + + // 2-body parameters and the force cutoff + double A, B, a, sigma, gamma; + rc = 0.0; + for (int n1 = 0; n1 < 3; n1++) + for (int n2 = 0; n2 < 3; n2++) { + count = fscanf(fid, "%lf%lf%lf%lf%lf", &A, &B, &a, &sigma, &gamma); + PRINT_SCANF_ERROR(count, 5, "Reading error for SW potential."); + sw2_para.A[n1][n2] = A; + sw2_para.B[n1][n2] = B; + sw2_para.a[n1][n2] = a; + sw2_para.sigma[n1][n2] = sigma; + sw2_para.gamma[n1][n2] = gamma; + sw2_para.rc[n1][n2] = sigma * a; + if (rc < sw2_para.rc[n1][n2]) + rc = sw2_para.rc[n1][n2]; + } + + // 3-body parameters + double lambda, cos0; + for (int n1 = 0; n1 < 3; n1++) { + for (int n2 = 0; n2 < 3; n2++) { + for (int n3 = 0; n3 < 3; n3++) { + count = fscanf(fid, "%lf%lf", &lambda, &cos0); + PRINT_SCANF_ERROR(count, 2, "Reading error for SW potential."); + sw2_para.lambda[n1][n2][n3] = lambda; + sw2_para.cos0[n1][n2][n3] = cos0; + } + } + } +} + static __device__ __forceinline__ float calc_Tap(const float r_ij, const float Rcutinv) { float Tap, r; @@ -156,7 +247,7 @@ static __global__ void ILP_neighbor( const int *g_neighbor_number, const int *g_neighbor_list, const int *g_type, - ILP_TMD_SW_Para ilp_para, + ILP_TMD_Para ilp_para, const double* __restrict__ g_x, const double* __restrict__ g_y, const double* __restrict__ g_z, @@ -569,7 +660,7 @@ static __device__ void calc_vdW( // force evaluation kernel static __global__ void gpu_find_force( - ILP_TMD_SW_Para ilp_para, + ILP_TMD_Para ilp_para, const int number_of_particles, const int N1, const int N2, diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index 00c7eb263..9b9af1fbe 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -24,7 +24,7 @@ #define MAX_ILP_NEIGHBOR_TMD 6 #define MAX_BIG_ILP_NEIGHBOR_TMD 128 -struct ILP_TMD_SW_Para { +struct ILP_TMD_Para { float rcutsq_ilp[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; float d[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; float d_Seff[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; // d / S_R / r_eff @@ -38,7 +38,7 @@ struct ILP_TMD_SW_Para { float rcut_global[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; // scale }; -struct ILP_TMD_SW_Data { +struct ILP_TMD_Data { GPU_Vector NN, NL; GPU_Vector reduce_NL; GPU_Vector big_ilp_NN, big_ilp_NL; @@ -54,6 +54,23 @@ struct ILP_TMD_SW_Data { GPU_Vector f12z_ilp_neigh; }; +struct SW2_Para { + // 2-body part + double A[3][3], B[3][3], a[3][3], sigma[3][3], gamma[3][3], rc[3][3]; + // 3-body part + double lambda[3][3][3], cos0[3][3][3]; +}; + +struct SW2_Data { + GPU_Vector NN, NL; + GPU_Vector cell_count; + GPU_Vector cell_count_sum; + GPU_Vector cell_contents; + // GPU_Vector f12x; + // GPU_Vector f12y; + // GPU_Vector f12z; +}; + class ILP_TMD_SW : public Potential { public: @@ -76,10 +93,17 @@ public: GPU_Vector& force, GPU_Vector& virial, std::vector &group); + + // sw term + void initialize_sw_1985_1(FILE*); // called by the constructor + void initialize_sw_1985_2(FILE*); // called by the constructor + void initialize_sw_1985_3(FILE*); // called by the constructor protected: - ILP_TMD_SW_Para ilp_para; - ILP_TMD_SW_Data ilp_data; + ILP_TMD_Para ilp_para; + ILP_TMD_Data ilp_data; + SW2_Para sw2_para; + SW2_Data sw2_data; }; static __constant__ float Tap_coeff_tmd[8]; \ No newline at end of file From ae90b281900f70ad5c6b19f0288350523ca1e5ec Mon Sep 17 00:00:00 2001 From: bhk Date: Sat, 12 Oct 2024 19:01:54 +0800 Subject: [PATCH 24/46] modify the construct func to recieve 2 potential files --- src/force/force.cu | 6 +++++- src/force/ilp_tmd_sw.cu | 6 +++--- src/force/ilp_tmd_sw.cuh | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/force/force.cu b/src/force/force.cu index 5e2382b1b..3c54a2700 100644 --- a/src/force/force.cu +++ b/src/force/force.cu @@ -133,7 +133,11 @@ void Force::parse_potential( } else if (strcmp(potential_name, "lj") == 0) { potential.reset(new LJ(fid_potential, num_types, number_of_atoms)); } else if (strcmp(potential_name, "ilp_tmd_sw") == 0) { - potential.reset(new ILP_TMD_SW(fid_potential, num_types, number_of_atoms)); + if (num_param != 3) { + PRINT_INPUT_ERROR("potential should ILP potential file and SW potential file.\n"); + } + FILE* fid_sw = my_fopen(param[2], "r"); + potential.reset(new ILP_TMD_SW(fid_potential, fid_sw, num_types, number_of_atoms)); } else { PRINT_INPUT_ERROR("illegal potential model.\n"); } diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index bea70f805..df71fe816 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -27,7 +27,7 @@ TODO: // there are most 6 intra-layer neighbors for TMD #define NNEI 6 -ILP_TMD_SW::ILP_TMD_SW(FILE* fid, int num_types, int num_atoms) +ILP_TMD_SW::ILP_TMD_SW(FILE* fid_ilp, FILE* fid_sw, int num_types, int num_atoms) { printf("Use %d-element ILP potential with elements:\n", num_types); if (!(num_types >= 1 && num_types <= MAX_TYPE_ILP_TMD_SW)) { @@ -35,7 +35,7 @@ ILP_TMD_SW::ILP_TMD_SW(FILE* fid, int num_types, int num_atoms) } for (int n = 0; n < num_types; ++n) { char atom_symbol[10]; - int count = fscanf(fid, "%s", atom_symbol); + int count = fscanf(fid_ilp, "%s", atom_symbol); PRINT_SCANF_ERROR(count, 1, "Reading error for ILP_TMD_SW potential."); printf(" %s", atom_symbol); } @@ -47,7 +47,7 @@ ILP_TMD_SW::ILP_TMD_SW(FILE* fid, int num_types, int num_atoms) rc = 0.0; for (int n = 0; n < num_types; ++n) { for (int m = 0; m < num_types; ++m) { - int count = fscanf(fid, "%f%f%f%f%f%f%f%f%f%f%f%f", \ + int count = fscanf(fid_ilp, "%f%f%f%f%f%f%f%f%f%f%f%f", \ &beta, &alpha, &delta, &epsilon, &C, &d, &sR, &reff, &C6, &S, \ &rcut_ilp, &rcut_global); PRINT_SCANF_ERROR(count, 12, "Reading error for ILP_TMD_SW potential."); diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index 9b9af1fbe..3d1a28284 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -75,7 +75,7 @@ class ILP_TMD_SW : public Potential { public: using Potential::compute; - ILP_TMD_SW(FILE*, int, int); + ILP_TMD_SW(FILE*, FILE*, int, int); virtual ~ILP_TMD_SW(void); virtual void compute( Box& box, From fec3ad907afea81c6130841527bf8027301851ef Mon Sep 17 00:00:00 2001 From: bhk Date: Sat, 12 Oct 2024 19:19:54 +0800 Subject: [PATCH 25/46] close sw file --- src/force/force.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/src/force/force.cu b/src/force/force.cu index 3c54a2700..bbc7dddc9 100644 --- a/src/force/force.cu +++ b/src/force/force.cu @@ -138,6 +138,7 @@ void Force::parse_potential( } FILE* fid_sw = my_fopen(param[2], "r"); potential.reset(new ILP_TMD_SW(fid_potential, fid_sw, num_types, number_of_atoms)); + fclose(fid_sw); } else { PRINT_INPUT_ERROR("illegal potential model.\n"); } From 568efc80f9440d42235ee1d51f66d863acba2ce1 Mon Sep 17 00:00:00 2001 From: bhk Date: Sat, 12 Oct 2024 19:22:13 +0800 Subject: [PATCH 26/46] achieve sw potential file reading --- src/force/ilp_tmd_sw.cu | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index df71fe816..5be8c2467 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -29,6 +29,7 @@ TODO: ILP_TMD_SW::ILP_TMD_SW(FILE* fid_ilp, FILE* fid_sw, int num_types, int num_atoms) { + // read ILP TMD potential parameter printf("Use %d-element ILP potential with elements:\n", num_types); if (!(num_types >= 1 && num_types <= MAX_TYPE_ILP_TMD_SW)) { PRINT_INPUT_ERROR("Incorrect type number of ILP_TMD_SW parameters.\n"); @@ -73,6 +74,18 @@ ILP_TMD_SW::ILP_TMD_SW(FILE* fid_ilp, FILE* fid_sw, int num_types, int num_atoms } } + // read SW potential parameter + if (num_types == 1) { + initialize_sw_1985_1(fid_sw); + } + if (num_types == 2) { + initialize_sw_1985_2(fid_sw); + } + if (num_types == 3) { + initialize_sw_1985_3(fid_sw); + } + + // initialize neighbor lists and some temp vectors int max_neighbor_number = min(num_atoms, CUDA_MAX_NL_TMD); ilp_data.NN.resize(num_atoms); ilp_data.NL.resize(num_atoms * max_neighbor_number); From a337118caab82d9c73998f6314f95ec9812dea41 Mon Sep 17 00:00:00 2001 From: bhk Date: Sat, 12 Oct 2024 19:32:43 +0800 Subject: [PATCH 27/46] initialize sw nl --- src/force/ilp_tmd_sw.cu | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 5be8c2467..ad1fd06d2 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -108,6 +108,13 @@ ILP_TMD_SW::ILP_TMD_SW(FILE* fid_ilp, FILE* fid_sw, int num_types, int num_atoms ilp_data.f12y_ilp_neigh.resize(num_atoms * MAX_ILP_NEIGHBOR_TMD); ilp_data.f12z_ilp_neigh.resize(num_atoms * MAX_ILP_NEIGHBOR_TMD); + // intialize sw neighbor list + sw2_data.NN.resize(num_atoms); + sw2_data.NL.resize(num_atoms * 1024); // the largest supported by CUDA + sw2_data.cell_count.resize(num_atoms); + sw2_data.cell_count_sum.resize(num_atoms); + sw2_data.cell_contents.resize(num_atoms); + // init constant cutoff coeff float h_tap_coeff[8] = \ {1.0f, 0.0f, 0.0f, 0.0f, -35.0f, 84.0f, -70.0f, 20.0f}; From a2005cbc86ba997d8e8a3a5534513239ed6739b5 Mon Sep 17 00:00:00 2001 From: bhk Date: Sat, 12 Oct 2024 19:38:23 +0800 Subject: [PATCH 28/46] add sw NL build func --- src/force/ilp_tmd_sw.cu | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index ad1fd06d2..07bcdddac 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -1246,6 +1246,20 @@ void ILP_TMD_SW::compute( ilp_data.NL, ilp_data.big_ilp_NN, ilp_data.big_ilp_NL); + + find_neighbor( + N1, + N2, + rc, + box, + type, + position_per_atom, + sw2_data.cell_count, + sw2_data.cell_count_sum, + sw2_data.cell_contents, + sw2_data.NN, + sw2_data.NL + ); build_reduce_neighbor_list<<>>( number_of_atoms, @@ -1270,6 +1284,9 @@ void ILP_TMD_SW::compute( int *ilp_NL = ilp_data.ilp_NL.data(); int *ilp_NN = ilp_data.ilp_NN.data(); + const int* NN_sw = sw2_data.NN.data(); + const int* NL_sw = sw2_data.NL.data(); + ilp_data.ilp_NL.fill(0); ilp_data.ilp_NN.fill(0); From 74ffb325839d445c3a08b2bca27ef14cc6fd60c7 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 14 Oct 2024 15:27:13 +0800 Subject: [PATCH 29/46] add type_shift to compute func for SW --- src/force/potential.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/force/potential.cuh b/src/force/potential.cuh index 4be0a4792..647921d46 100644 --- a/src/force/potential.cuh +++ b/src/force/potential.cuh @@ -47,8 +47,9 @@ public: GPU_Vector& force, GPU_Vector& virial){}; - // add group message for ILP + // add group message for ILP TMD SW virtual void compute( + const int type_shift, Box& box, const GPU_Vector& type, const GPU_Vector& position, From cc93caba068ea118f72ed1e32d540618cf22f4c2 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 14 Oct 2024 15:30:21 +0800 Subject: [PATCH 30/46] add type_shift for SW --- src/force/ilp_tmd_sw.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index 3d1a28284..81248712a 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -86,6 +86,7 @@ public: GPU_Vector& virial); virtual void compute( + const int type_shift, Box& box, const GPU_Vector& type, const GPU_Vector& position, From 05a5dcb5e95336943a16b57d331bf007158419aa Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 14 Oct 2024 15:31:54 +0800 Subject: [PATCH 31/46] add type_shift for SW --- src/force/ilp_tmd_sw.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 07bcdddac..34002af3a 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -1210,6 +1210,7 @@ void ILP_TMD_SW::compute( #define BIG_ILP_CUTOFF_SQUARE 50.0 // find force and related quantities void ILP_TMD_SW::compute( + const int type_shift, Box &box, const GPU_Vector &type, const GPU_Vector &position_per_atom, From b259d09e58985d08f7434e731704512dae9536f9 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 14 Oct 2024 15:38:30 +0800 Subject: [PATCH 32/46] add f12 for SW and initial it in construct func --- src/force/ilp_tmd_sw.cu | 6 ++++++ src/force/ilp_tmd_sw.cuh | 7 ++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 34002af3a..04d5a56b4 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -115,6 +115,12 @@ ILP_TMD_SW::ILP_TMD_SW(FILE* fid_ilp, FILE* fid_sw, int num_types, int num_atoms sw2_data.cell_count_sum.resize(num_atoms); sw2_data.cell_contents.resize(num_atoms); + // memory for the partial forces dU_i/dr_ij + const int num_of_neighbors = MAX_SW_NEIGHBOR_NUM * num_atoms; + sw2_data.f12x.resize(num_of_neighbors); + sw2_data.f12y.resize(num_of_neighbors); + sw2_data.f12z.resize(num_of_neighbors); + // init constant cutoff coeff float h_tap_coeff[8] = \ {1.0f, 0.0f, 0.0f, 0.0f, -35.0f, 84.0f, -70.0f, 20.0f}; diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index 81248712a..d84f8ce63 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -23,6 +23,7 @@ #define CUDA_MAX_NL_TMD 2048 #define MAX_ILP_NEIGHBOR_TMD 6 #define MAX_BIG_ILP_NEIGHBOR_TMD 128 +#define MAX_SW_NEIGHBOR_NUM 50 struct ILP_TMD_Para { float rcutsq_ilp[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; @@ -66,9 +67,9 @@ struct SW2_Data { GPU_Vector cell_count; GPU_Vector cell_count_sum; GPU_Vector cell_contents; - // GPU_Vector f12x; - // GPU_Vector f12y; - // GPU_Vector f12z; + GPU_Vector f12x; + GPU_Vector f12y; + GPU_Vector f12z; }; class ILP_TMD_SW : public Potential From 7c694fdaeb635973124757301f250131464fd938 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 14 Oct 2024 15:42:34 +0800 Subject: [PATCH 33/46] add SW compute step 1 --- src/force/ilp_tmd_sw.cu | 130 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 04d5a56b4..1bb85e577 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -27,6 +27,9 @@ TODO: // there are most 6 intra-layer neighbors for TMD #define NNEI 6 +#define BLOCK_SIZE_SW 64 +// 128 is also good + ILP_TMD_SW::ILP_TMD_SW(FILE* fid_ilp, FILE* fid_sw, int num_types, int num_atoms) { // read ILP TMD potential parameter @@ -1199,6 +1202,124 @@ static __global__ void reduce_force_many_body( } } +// SW term +// two-body part of the SW potential +static __device__ void find_p2_and_f2( + double sigma, double a, double B, double epsilon_times_A, double d12, double& p2, double& f2) +{ + double r12 = d12 / sigma; + double B_over_r12power4 = B / (r12 * r12 * r12 * r12); + double exp_factor = epsilon_times_A * exp(1.0 / (r12 - a)); + p2 = exp_factor * (B_over_r12power4 - 1.0); + f2 = -p2 / ((r12 - a) * (r12 - a)) - exp_factor * 4.0 * B_over_r12power4 / r12; + f2 /= (sigma * d12); +} + +// find the partial forces dU_i/dr_ij of SW potential +static __global__ void gpu_find_force_sw3_partial( + const int number_of_particles, + const int N1, + const int N2, + const Box box, + const SW2_Para sw3, + const int* g_neighbor_number, + const int* g_neighbor_list, + const int* g_type, + const int shift, + const double* __restrict__ g_x, + const double* __restrict__ g_y, + const double* __restrict__ g_z, + double* g_potential, + double* g_f12x, + double* g_f12y, + double* g_f12z) +{ + int n1 = blockIdx.x * blockDim.x + threadIdx.x + N1; // particle index + if (n1 >= N1 && n1 < N2) { + int neighbor_number = g_neighbor_number[n1]; + int type1 = g_type[n1] - shift; + double x1 = g_x[n1]; + double y1 = g_y[n1]; + double z1 = g_z[n1]; + double potential_energy = 0.0; + + for (int i1 = 0; i1 < neighbor_number; ++i1) { + int index = i1 * number_of_particles + n1; + int n2 = g_neighbor_list[index]; + int type2 = g_type[n2] - shift; + double x12 = g_x[n2] - x1; + double y12 = g_y[n2] - y1; + double z12 = g_z[n2] - z1; + apply_mic(box, x12, y12, z12); + double d12 = sqrt(x12 * x12 + y12 * y12 + z12 * z12); + double d12inv = 1.0 / d12; + if (d12 >= sw3.rc[type1][type2]) { + continue; + } + + double gamma12 = sw3.gamma[type1][type2]; + double sigma12 = sw3.sigma[type1][type2]; + double a12 = sw3.a[type1][type2]; + double tmp = gamma12 / (sigma12 * (d12 / sigma12 - a12) * (d12 / sigma12 - a12)); + double p2, f2; + find_p2_and_f2(sigma12, a12, sw3.B[type1][type2], sw3.A[type1][type2], d12, p2, f2); + + // treat the two-body part in the same way as the many-body part + double f12x = f2 * x12 * 0.5; + double f12y = f2 * y12 * 0.5; + double f12z = f2 * z12 * 0.5; + // accumulate potential energy + potential_energy += p2 * 0.5; + + // three-body part + for (int i2 = 0; i2 < neighbor_number; ++i2) { + int n3 = g_neighbor_list[n1 + number_of_particles * i2]; + if (n3 == n2) { + continue; + } + int type3 = g_type[n3] - shift; + double x13 = g_x[n3] - x1; + double y13 = g_y[n3] - y1; + double z13 = g_z[n3] - z1; + apply_mic(box, x13, y13, z13); + double d13 = sqrt(x13 * x13 + y13 * y13 + z13 * z13); + if (d13 >= sw3.rc[type1][type3]) { + continue; + } + + double cos0 = sw3.cos0[type1][type2][type3]; + double lambda = sw3.lambda[type1][type2][type3]; + double exp123 = d13 / sw3.sigma[type1][type3] - sw3.a[type1][type3]; + exp123 = sw3.gamma[type1][type3] / exp123; + exp123 = exp(gamma12 / (d12 / sigma12 - a12) + exp123); + double one_over_d12d13 = 1.0 / (d12 * d13); + double cos123 = (x12 * x13 + y12 * y13 + z12 * z13) * one_over_d12d13; + double cos123_over_d12d12 = cos123 * d12inv * d12inv; + + double tmp1 = exp123 * (cos123 - cos0) * lambda; + double tmp2 = tmp * (cos123 - cos0) * d12inv; + + // accumulate potential energy + potential_energy += (cos123 - cos0) * tmp1 * 0.5; + + double cos_d = x13 * one_over_d12d13 - x12 * cos123_over_d12d12; + f12x += tmp1 * (2.0 * cos_d - tmp2 * x12); + + cos_d = y13 * one_over_d12d13 - y12 * cos123_over_d12d12; + f12y += tmp1 * (2.0 * cos_d - tmp2 * y12); + + cos_d = z13 * one_over_d12d13 - z12 * cos123_over_d12d12; + f12z += tmp1 * (2.0 * cos_d - tmp2 * z12); + } + g_f12x[index] = f12x; + g_f12y[index] = f12y; + g_f12z[index] = f12z; + } + // save potential + g_potential[n1] += potential_energy; + } +} + // define the pure virtual func void ILP_TMD_SW::compute( Box &box, @@ -1376,4 +1497,13 @@ void ILP_TMD_SW::compute( g_f12y_ilp_neigh, g_f12z_ilp_neigh); CUDA_CHECK_KERNEL + + // step 1: calculate the partial forces + gpu_find_force_sw3_partial<<>>( + number_of_atoms, N1, N2, box, sw2_para, sw2_data.NN.data(), sw2_data.NL.data(), + type.data(), type_shift, position_per_atom.data(), position_per_atom.data() + number_of_atoms, + position_per_atom.data() + number_of_atoms * 2, potential_per_atom.data(), sw2_data.f12x.data(), + sw2_data.f12y.data(), sw2_data.f12z.data()); + CUDA_CHECK_KERNEL + } \ No newline at end of file From 31c46f7fead91d1e7e4bcb18c3518ee8e6af267f Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 14 Oct 2024 15:45:59 +0800 Subject: [PATCH 34/46] add SW compute step 2 --- src/force/ilp_tmd_sw.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 1bb85e577..392bef5b2 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -1506,4 +1506,8 @@ void ILP_TMD_SW::compute( sw2_data.f12y.data(), sw2_data.f12z.data()); CUDA_CHECK_KERNEL + // step 2: calculate force and related quantities + find_properties_many_body( + box, sw2_data.NN.data(), sw2_data.NL.data(), sw2_data.f12x.data(), + sw2_data.f12y.data(), sw2_data.f12z.data(), position_per_atom, force_per_atom, virial_per_atom); } \ No newline at end of file From 4e11d669f68b5e8beab82c3a0264f7de7fc7bc29 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 14 Oct 2024 16:28:04 +0800 Subject: [PATCH 35/46] add a new find_neighbor func for SW to make neighbors just in the smae layer --- src/force/neighbor.cu | 136 +++++++++++++++++++++++++++++++++++++++++ src/force/neighbor.cuh | 15 +++++ 2 files changed, 151 insertions(+) diff --git a/src/force/neighbor.cu b/src/force/neighbor.cu index 04aa9e35c..447c5aaaa 100644 --- a/src/force/neighbor.cu +++ b/src/force/neighbor.cu @@ -493,4 +493,140 @@ void find_neighbor( const int MN = NL.size() / NN.size(); gpu_sort_neighbor_list_ilp<<>>(N, NN.data(), NL.data()); CUDA_CHECK_KERNEL +} + +static __global__ void gpu_find_neighbor_ON1_SW( + const Box box, + const int N, + const int N1, + const int N2, + const int* __restrict__ type, + const int* __restrict__ cell_counts, + const int* __restrict__ cell_count_sum, + const int* __restrict__ cell_contents, + int* NN, + int* NL, + const int* group_label, + const double* __restrict__ x, + const double* __restrict__ y, + const double* __restrict__ z, + const int nx, + const int ny, + const int nz, + const double rc_inv, + const double cutoff_square) +{ + const int n1 = blockIdx.x * blockDim.x + threadIdx.x + N1; + int count = 0; + if (n1 < N2) { + const double x1 = x[n1]; + const double y1 = y[n1]; + const double z1 = z[n1]; + int cell_id; + int cell_id_x; + int cell_id_y; + int cell_id_z; + find_cell_id(box, x1, y1, z1, rc_inv, nx, ny, nz, cell_id_x, cell_id_y, cell_id_z, cell_id); + + const int z_lim = box.pbc_z ? 2 : 0; + const int y_lim = box.pbc_y ? 2 : 0; + const int x_lim = box.pbc_x ? 2 : 0; + + // get radial descriptors + for (int k = -z_lim; k <= z_lim; ++k) { + for (int j = -y_lim; j <= y_lim; ++j) { + for (int i = -x_lim; i <= x_lim; ++i) { + int neighbor_cell = cell_id + k * nx * ny + j * nx + i; + if (cell_id_x + i < 0) + neighbor_cell += nx; + if (cell_id_x + i >= nx) + neighbor_cell -= nx; + if (cell_id_y + j < 0) + neighbor_cell += ny * nx; + if (cell_id_y + j >= ny) + neighbor_cell -= ny * nx; + if (cell_id_z + k < 0) + neighbor_cell += nz * ny * nx; + if (cell_id_z + k >= nz) + neighbor_cell -= nz * ny * nx; + + const int num_atoms_neighbor_cell = cell_counts[neighbor_cell]; + const int num_atoms_previous_cells = cell_count_sum[neighbor_cell]; + + for (int m = 0; m < num_atoms_neighbor_cell; ++m) { + const int n2 = cell_contents[num_atoms_previous_cells + m]; + if (n2 >= N1 && n2 < N2 && n1 != n2) { + + double x12 = x[n2] - x1; + double y12 = y[n2] - y1; + double z12 = z[n2] - z1; + apply_mic(box, x12, y12, z12); + const double d2 = x12 * x12 + y12 * y12 + z12 * z12; + + if (d2 < cutoff_square && group_label[n1] == group_label[n2]) { + NL[count++ * N + n1] = n2; + } + } + } + } + } + } + NN[n1] = count; + } +} + +void find_neighbor_SW( + const int N1, + const int N2, + double rc, + Box& box, + const int* group_label, + const GPU_Vector& type, + const GPU_Vector& position_per_atom, + GPU_Vector& cell_count, + GPU_Vector& cell_count_sum, + GPU_Vector& cell_contents, + GPU_Vector& NN, + GPU_Vector& NL) +{ + const int N = NN.size(); + const int block_size = 256; + const int grid_size = (N2 - N1 - 1) / block_size + 1; + const double* x = position_per_atom.data(); + const double* y = position_per_atom.data() + N; + const double* z = position_per_atom.data() + N * 2; + const double rc_cell_list = 0.5 * rc; + const double rc_inv_cell_list = 2.0 / rc; + + int num_bins[3]; + box.get_num_bins(rc_cell_list, num_bins); + + find_cell_list( + rc_cell_list, num_bins, box, position_per_atom, cell_count, cell_count_sum, cell_contents); + + gpu_find_neighbor_ON1_SW<<>>( + box, + N, + N1, + N2, + type.data(), + cell_count.data(), + cell_count_sum.data(), + cell_contents.data(), + NN.data(), + NL.data(), + group_label, + x, + y, + z, + num_bins[0], + num_bins[1], + num_bins[2], + rc_inv_cell_list, + rc * rc); + CUDA_CHECK_KERNEL + + const int MN = NL.size() / NN.size(); + gpu_sort_neighbor_list<<>>(N, NN.data(), NL.data()); + CUDA_CHECK_KERNEL } \ No newline at end of file diff --git a/src/force/neighbor.cuh b/src/force/neighbor.cuh index 6fc5f3405..91981abff 100644 --- a/src/force/neighbor.cuh +++ b/src/force/neighbor.cuh @@ -58,6 +58,21 @@ void find_neighbor( GPU_Vector& big_ilp_NN, GPU_Vector& big_ilp_NL); +// for SW +void find_neighbor_SW( + const int N1, + const int N2, + double rc, + Box& box, + const int* group_label, + const GPU_Vector& type, + const GPU_Vector& position_per_atom, + GPU_Vector& cell_count, + GPU_Vector& cell_count_sum, + GPU_Vector& cell_contents, + GPU_Vector& NN, + GPU_Vector& NL); + static __device__ void find_cell_id( const Box& box, const double x, From 8d43e3428b5558de3d2fb7fb131e4387fccb81d2 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 14 Oct 2024 16:33:05 +0800 Subject: [PATCH 36/46] use the find_neighbor func for SW --- src/force/ilp_tmd_sw.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 392bef5b2..f84ccbe46 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -1375,11 +1375,12 @@ void ILP_TMD_SW::compute( ilp_data.big_ilp_NN, ilp_data.big_ilp_NL); - find_neighbor( + find_neighbor_SW( N1, N2, rc, box, + group_label, type, position_per_atom, sw2_data.cell_count, From 1762fb45775269654577b411434ec6a2a18fcc59 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 14 Oct 2024 16:38:31 +0800 Subject: [PATCH 37/46] add a rc for SW --- src/force/ilp_tmd_sw.cu | 16 ++++++++-------- src/force/ilp_tmd_sw.cuh | 2 ++ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index f84ccbe46..1d1ae80bc 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -154,7 +154,7 @@ void ILP_TMD_SW::initialize_sw_1985_1(FILE* fid) sw2_para.sigma[0][0] = sigma; sw2_para.gamma[0][0] = gamma; sw2_para.rc[0][0] = sigma * a; - rc = sw2_para.rc[0][0]; + rc_sw = sw2_para.rc[0][0]; sw2_para.lambda[0][0][0] = epsilon * lambda; sw2_para.cos0[0][0][0] = cos0; } @@ -166,7 +166,7 @@ void ILP_TMD_SW::initialize_sw_1985_2(FILE* fid) // 2-body parameters and the force cutoff double A[3], B[3], a[3], sigma[3], gamma[3]; - rc = 0.0; + rc_sw = 0.0; for (int n = 0; n < 3; n++) { count = fscanf(fid, "%lf%lf%lf%lf%lf", &A[n], &B[n], &a[n], &sigma[n], &gamma[n]); PRINT_SCANF_ERROR(count, 5, "Reading error for SW potential."); @@ -179,8 +179,8 @@ void ILP_TMD_SW::initialize_sw_1985_2(FILE* fid) sw2_para.sigma[n1][n2] = sigma[n1 + n2]; sw2_para.gamma[n1][n2] = gamma[n1 + n2]; sw2_para.rc[n1][n2] = sigma[n1 + n2] * a[n1 + n2]; - if (rc < sw2_para.rc[n1][n2]) - rc = sw2_para.rc[n1][n2]; + if (rc_sw < sw2_para.rc[n1][n2]) + rc_sw = sw2_para.rc[n1][n2]; } // 3-body parameters @@ -202,7 +202,7 @@ void ILP_TMD_SW::initialize_sw_1985_3(FILE* fid) // 2-body parameters and the force cutoff double A, B, a, sigma, gamma; - rc = 0.0; + rc_sw = 0.0; for (int n1 = 0; n1 < 3; n1++) for (int n2 = 0; n2 < 3; n2++) { count = fscanf(fid, "%lf%lf%lf%lf%lf", &A, &B, &a, &sigma, &gamma); @@ -213,8 +213,8 @@ void ILP_TMD_SW::initialize_sw_1985_3(FILE* fid) sw2_para.sigma[n1][n2] = sigma; sw2_para.gamma[n1][n2] = gamma; sw2_para.rc[n1][n2] = sigma * a; - if (rc < sw2_para.rc[n1][n2]) - rc = sw2_para.rc[n1][n2]; + if (rc_sw < sw2_para.rc[n1][n2]) + rc_sw = sw2_para.rc[n1][n2]; } // 3-body parameters @@ -1378,7 +1378,7 @@ void ILP_TMD_SW::compute( find_neighbor_SW( N1, N2, - rc, + rc_sw, box, group_label, type, diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index d84f8ce63..d8ff1b644 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -106,6 +106,8 @@ protected: ILP_TMD_Data ilp_data; SW2_Para sw2_para; SW2_Data sw2_data; + // rcut for SW + double rc_sw; }; static __constant__ float Tap_coeff_tmd[8]; \ No newline at end of file From 65bc8efd3f5c5ae0be9bcd543d5b3031610894c2 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 14 Oct 2024 21:23:43 +0800 Subject: [PATCH 38/46] rm type shift --- src/force/ilp_tmd_sw.cu | 10 ++++------ src/force/ilp_tmd_sw.cuh | 1 - src/force/potential.cuh | 1 - 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 1d1ae80bc..a13a48686 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -1225,7 +1225,6 @@ static __global__ void gpu_find_force_sw3_partial( const int* g_neighbor_number, const int* g_neighbor_list, const int* g_type, - const int shift, const double* __restrict__ g_x, const double* __restrict__ g_y, const double* __restrict__ g_z, @@ -1237,7 +1236,7 @@ static __global__ void gpu_find_force_sw3_partial( int n1 = blockIdx.x * blockDim.x + threadIdx.x + N1; // particle index if (n1 >= N1 && n1 < N2) { int neighbor_number = g_neighbor_number[n1]; - int type1 = g_type[n1] - shift; + int type1 = g_type[n1]; double x1 = g_x[n1]; double y1 = g_y[n1]; double z1 = g_z[n1]; @@ -1246,7 +1245,7 @@ static __global__ void gpu_find_force_sw3_partial( for (int i1 = 0; i1 < neighbor_number; ++i1) { int index = i1 * number_of_particles + n1; int n2 = g_neighbor_list[index]; - int type2 = g_type[n2] - shift; + int type2 = g_type[n2]; double x12 = g_x[n2] - x1; double y12 = g_y[n2] - y1; double z12 = g_z[n2] - z1; @@ -1277,7 +1276,7 @@ static __global__ void gpu_find_force_sw3_partial( if (n3 == n2) { continue; } - int type3 = g_type[n3] - shift; + int type3 = g_type[n3]; double x13 = g_x[n3] - x1; double y13 = g_y[n3] - y1; double z13 = g_z[n3] - z1; @@ -1337,7 +1336,6 @@ void ILP_TMD_SW::compute( #define BIG_ILP_CUTOFF_SQUARE 50.0 // find force and related quantities void ILP_TMD_SW::compute( - const int type_shift, Box &box, const GPU_Vector &type, const GPU_Vector &position_per_atom, @@ -1502,7 +1500,7 @@ void ILP_TMD_SW::compute( // step 1: calculate the partial forces gpu_find_force_sw3_partial<<>>( number_of_atoms, N1, N2, box, sw2_para, sw2_data.NN.data(), sw2_data.NL.data(), - type.data(), type_shift, position_per_atom.data(), position_per_atom.data() + number_of_atoms, + type.data(), position_per_atom.data(), position_per_atom.data() + number_of_atoms, position_per_atom.data() + number_of_atoms * 2, potential_per_atom.data(), sw2_data.f12x.data(), sw2_data.f12y.data(), sw2_data.f12z.data()); CUDA_CHECK_KERNEL diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index d8ff1b644..b1ed87180 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -87,7 +87,6 @@ public: GPU_Vector& virial); virtual void compute( - const int type_shift, Box& box, const GPU_Vector& type, const GPU_Vector& position, diff --git a/src/force/potential.cuh b/src/force/potential.cuh index 647921d46..cd0328307 100644 --- a/src/force/potential.cuh +++ b/src/force/potential.cuh @@ -49,7 +49,6 @@ public: // add group message for ILP TMD SW virtual void compute( - const int type_shift, Box& box, const GPU_Vector& type, const GPU_Vector& position, From de6d656dd23a2ecd974e125c3b0f22977550bd78 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 21 Oct 2024 12:31:58 +0800 Subject: [PATCH 39/46] add modification for SW --- src/force/ilp_tmd_sw.cu | 22 +++++++++++++++++++--- src/force/ilp_tmd_sw.cuh | 4 ++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index a13a48686..ce834c20b 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -1294,12 +1294,28 @@ static __global__ void gpu_find_force_sw3_partial( double one_over_d12d13 = 1.0 / (d12 * d13); double cos123 = (x12 * x13 + y12 * y13 + z12 * z13) * one_over_d12d13; double cos123_over_d12d12 = cos123 * d12inv * d12inv; + // cos123 - cos0 + double delta_cos = cos123 - cos0; + + // modification to (cos123 - cos0) + double abs_delta_cos = fabs(delta_cos); + if (abs_delta_cos >= DELTA2) { + delta_cos = 0.0; + } else if (abs_delta_cos < DELTA2 && abs_delta_cos > DELTA1) { + double factor = 0.5 + 0.5 * cos(M_PI * (abs_delta_cos - DELTA1) / (DELTA2 - DELTA1)); + delta_cos *= factor; + } - double tmp1 = exp123 * (cos123 - cos0) * lambda; - double tmp2 = tmp * (cos123 - cos0) * d12inv; + // double tmp1 = exp123 * (cos123 - cos0) * lambda; + // double tmp2 = tmp * (cos123 - cos0) * d12inv; + double tmp1 = exp123 * delta_cos * lambda; + double tmp2 = tmp * delta_cos * d12inv; // accumulate potential energy - potential_energy += (cos123 - cos0) * tmp1 * 0.5; + // potential_energy += (cos123 - cos0) * tmp1 * 0.5; + // double tmp_e = (cos123 - cos0) * tmp1 * 0.5; + double tmp_e = delta_cos * tmp1 * 0.5; + potential_energy += tmp_e; double cos_d = x13 * one_over_d12d13 - x12 * cos123_over_d12d12; f12x += tmp1 * (2.0 * cos_d - tmp2 * x12); diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index b1ed87180..544072ab3 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -25,6 +25,10 @@ #define MAX_BIG_ILP_NEIGHBOR_TMD 128 #define MAX_SW_NEIGHBOR_NUM 50 +// modification for TMD +#define DELTA1 0.25 +#define DELTA2 0.35 + struct ILP_TMD_Para { float rcutsq_ilp[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; float d[MAX_TYPE_ILP_TMD_SW][MAX_TYPE_ILP_TMD_SW]; From e8cf78cbc1a561074ad1877cd7b5cbb817253392 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 21 Oct 2024 12:34:17 +0800 Subject: [PATCH 40/46] FIX BUG: use the same block size --- src/force/ilp_tmd_sw.cu | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index ce834c20b..43b9c7d54 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -27,8 +27,6 @@ TODO: // there are most 6 intra-layer neighbors for TMD #define NNEI 6 -#define BLOCK_SIZE_SW 64 -// 128 is also good ILP_TMD_SW::ILP_TMD_SW(FILE* fid_ilp, FILE* fid_sw, int num_types, int num_atoms) { @@ -1514,7 +1512,7 @@ void ILP_TMD_SW::compute( CUDA_CHECK_KERNEL // step 1: calculate the partial forces - gpu_find_force_sw3_partial<<>>( + gpu_find_force_sw3_partial<<>>( number_of_atoms, N1, N2, box, sw2_para, sw2_data.NN.data(), sw2_data.NL.data(), type.data(), position_per_atom.data(), position_per_atom.data() + number_of_atoms, position_per_atom.data() + number_of_atoms * 2, potential_per_atom.data(), sw2_data.f12x.data(), From 33934c5ba6ef77854227c13361ae05e0619f0390 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 21 Oct 2024 13:01:35 +0800 Subject: [PATCH 41/46] add ilp_tmd_sw MoS2 potential --- potentials/ilp/ilp_tmd_sw_MoS2_oywg_2018.txt | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 potentials/ilp/ilp_tmd_sw_MoS2_oywg_2018.txt diff --git a/potentials/ilp/ilp_tmd_sw_MoS2_oywg_2018.txt b/potentials/ilp/ilp_tmd_sw_MoS2_oywg_2018.txt new file mode 100644 index 000000000..bd5064b8e --- /dev/null +++ b/potentials/ilp/ilp_tmd_sw_MoS2_oywg_2018.txt @@ -0,0 +1,5 @@ +ilp_tmd_sw 2 Mo S +5.57945037 9.37766246 2.02722246 144.15177543 97.97857017 89.43759676 2.05903055 5.12205490 491850.31619468 1.0 4.0 16.0 +3.62715212 19.97137510 7.58503123 76.10193100 3.31749559 45.72032846 0.94747037 4.41042503 150597.85771629 1.0 4.0 16.0 +3.62715212 19.97137510 7.58503123 76.10193100 3.31749559 45.72032846 0.94747037 4.41042503 150597.85771629 1.0 4.0 16.0 +3.16140219 8.09326275 1.95314020 4.58676418 118.06546646 58.80941584 0.21536658 4.29960013 148811.24340892 1.0 4.0 16.0 From 5255832ca5121556d3dc5aacd0e56fd620108d0f Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 21 Oct 2024 13:02:08 +0800 Subject: [PATCH 42/46] add sw MoS2 potnetial --- potentials/sw/MoS2_jjw_2018.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 potentials/sw/MoS2_jjw_2018.txt diff --git a/potentials/sw/MoS2_jjw_2018.txt b/potentials/sw/MoS2_jjw_2018.txt new file mode 100644 index 000000000..023e715cf --- /dev/null +++ b/potentials/sw/MoS2_jjw_2018.txt @@ -0,0 +1,11 @@ +0.000 1.000 1.000 1.000 1.000 +6.918 7.223 2.523 1.252 1.000 +0.000 1.000 1.000 1.000 1.000 +0.000 0.143 +0.000 0.143 +0.000 0.143 +67.883 0.143 +62.449 0.143 +0.000 0.143 +0.000 0.143 +0.000 0.143 \ No newline at end of file From 031e8f1da705019861a88ed4b9a59f599d252a28 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 21 Oct 2024 14:25:56 +0800 Subject: [PATCH 43/46] use definition of PI in common.cuh --- src/force/ilp_tmd_sw.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 43b9c7d54..7315b783a 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -21,6 +21,7 @@ TODO: #include "ilp_tmd_sw.cuh" #include "neighbor.cuh" #include "utilities/error.cuh" +#include "common.cuh" #define BLOCK_SIZE_FORCE 128 @@ -1300,7 +1301,7 @@ static __global__ void gpu_find_force_sw3_partial( if (abs_delta_cos >= DELTA2) { delta_cos = 0.0; } else if (abs_delta_cos < DELTA2 && abs_delta_cos > DELTA1) { - double factor = 0.5 + 0.5 * cos(M_PI * (abs_delta_cos - DELTA1) / (DELTA2 - DELTA1)); + double factor = 0.5 + 0.5 * cos(PI * (abs_delta_cos - DELTA1) / (DELTA2 - DELTA1)); delta_cos *= factor; } From 250e87fa2930682cd9dbca21285b768cde0ca0ba Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 21 Oct 2024 14:27:25 +0800 Subject: [PATCH 44/46] true path of common.cuh --- src/force/ilp_tmd_sw.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 7315b783a..07e772bbd 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -21,7 +21,7 @@ TODO: #include "ilp_tmd_sw.cuh" #include "neighbor.cuh" #include "utilities/error.cuh" -#include "common.cuh" +#include "utilities/common.cuh" #define BLOCK_SIZE_FORCE 128 From 871f6a4fbed027330c37bf889300307e1da6f1fd Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 21 Oct 2024 21:29:36 +0800 Subject: [PATCH 45/46] mv potential files of ilp_tmd_sw to ilp dir --- ...md_sw_MoS2_oywg_2018.txt => ilp_tmd_sw_ILP_MoS2_oywg_2018.txt} | 0 .../{sw/MoS2_jjw_2018.txt => ilp/ilp_tmd_sw_SW_MoS2_jjw_2018.txt} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename potentials/ilp/{ilp_tmd_sw_MoS2_oywg_2018.txt => ilp_tmd_sw_ILP_MoS2_oywg_2018.txt} (100%) rename potentials/{sw/MoS2_jjw_2018.txt => ilp/ilp_tmd_sw_SW_MoS2_jjw_2018.txt} (100%) diff --git a/potentials/ilp/ilp_tmd_sw_MoS2_oywg_2018.txt b/potentials/ilp/ilp_tmd_sw_ILP_MoS2_oywg_2018.txt similarity index 100% rename from potentials/ilp/ilp_tmd_sw_MoS2_oywg_2018.txt rename to potentials/ilp/ilp_tmd_sw_ILP_MoS2_oywg_2018.txt diff --git a/potentials/sw/MoS2_jjw_2018.txt b/potentials/ilp/ilp_tmd_sw_SW_MoS2_jjw_2018.txt similarity index 100% rename from potentials/sw/MoS2_jjw_2018.txt rename to potentials/ilp/ilp_tmd_sw_SW_MoS2_jjw_2018.txt From dc865c9be508e7f23c2304ce9a421512b72793c7 Mon Sep 17 00:00:00 2001 From: bhk Date: Mon, 21 Oct 2024 22:53:47 +0800 Subject: [PATCH 46/46] rename functions to clearly identify --- src/force/force.cu | 10 +++++----- src/force/ilp_tmd_sw.cu | 4 ++-- src/force/ilp_tmd_sw.cuh | 2 +- src/force/neighbor.cu | 6 +++--- src/force/neighbor.cuh | 2 +- src/force/potential.cuh | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/force/force.cu b/src/force/force.cu index bbc7dddc9..3313e9257 100644 --- a/src/force/force.cu +++ b/src/force/force.cu @@ -134,7 +134,7 @@ void Force::parse_potential( potential.reset(new LJ(fid_potential, num_types, number_of_atoms)); } else if (strcmp(potential_name, "ilp_tmd_sw") == 0) { if (num_param != 3) { - PRINT_INPUT_ERROR("potential should ILP potential file and SW potential file.\n"); + PRINT_INPUT_ERROR("potential should contain ILP potential file and SW potential file.\n"); } FILE* fid_sw = my_fopen(param[2], "r"); potential.reset(new ILP_TMD_SW(fid_potential, fid_sw, num_types, number_of_atoms)); @@ -487,7 +487,7 @@ void Force::compute( virial_per_atom); } else if (1 == potentials[0]->ilp_flag) { // compute the potential with ILP - potentials[0]->compute( + potentials[0]->compute_ilp( box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom, group); } else { potentials[0]->compute( @@ -508,7 +508,7 @@ void Force::compute( virial_per_atom); } else if (1 == potentials[i]->ilp_flag) { // compute the potential with ILP - potentials[i]->compute( + potentials[i]->compute_ilp( box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom, group); } else { potentials[i]->compute( @@ -782,7 +782,7 @@ void Force::compute( virial_per_atom); } else if (1 == potentials[0]->ilp_flag) { // compute the potential with ILP - potentials[0]->compute( + potentials[0]->compute_ilp( box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom, group); } else { potentials[0]->compute( @@ -803,7 +803,7 @@ void Force::compute( virial_per_atom); } else if (1 == potentials[i]->ilp_flag) { // compute the potential with ILP - potentials[i]->compute( + potentials[i]->compute_ilp( box, type, position_per_atom, potential_per_atom, force_per_atom, virial_per_atom, group); } else { potentials[i]->compute( diff --git a/src/force/ilp_tmd_sw.cu b/src/force/ilp_tmd_sw.cu index 07e772bbd..931f47348 100644 --- a/src/force/ilp_tmd_sw.cu +++ b/src/force/ilp_tmd_sw.cu @@ -1350,7 +1350,7 @@ void ILP_TMD_SW::compute( #define UPDATE_TEMP 10 #define BIG_ILP_CUTOFF_SQUARE 50.0 // find force and related quantities -void ILP_TMD_SW::compute( +void ILP_TMD_SW::compute_ilp( Box &box, const GPU_Vector &type, const GPU_Vector &position_per_atom, @@ -1371,7 +1371,7 @@ void ILP_TMD_SW::compute( #ifdef USE_FIXED_NEIGHBOR if (num_calls++ == 0) { #endif - find_neighbor( + find_neighbor_ilp( N1, N2, rc, diff --git a/src/force/ilp_tmd_sw.cuh b/src/force/ilp_tmd_sw.cuh index 544072ab3..7e43a611b 100644 --- a/src/force/ilp_tmd_sw.cuh +++ b/src/force/ilp_tmd_sw.cuh @@ -90,7 +90,7 @@ public: GPU_Vector& force, GPU_Vector& virial); - virtual void compute( + virtual void compute_ilp( Box& box, const GPU_Vector& type, const GPU_Vector& position, diff --git a/src/force/neighbor.cu b/src/force/neighbor.cu index 447c5aaaa..8b234415e 100644 --- a/src/force/neighbor.cu +++ b/src/force/neighbor.cu @@ -343,7 +343,7 @@ void find_neighbor( } // For ILP, the neighbor could not contain atoms in the same layer -static __global__ void gpu_find_neighbor_ON1( +static __global__ void gpu_find_neighbor_ON1_ilp( const Box box, const int N, const int N1, @@ -433,7 +433,7 @@ static __global__ void gpu_find_neighbor_ON1( } } -void find_neighbor( +void find_neighbor_ilp( const int N1, const int N2, double rc, @@ -465,7 +465,7 @@ void find_neighbor( find_cell_list( rc_cell_list, num_bins, box, position_per_atom, cell_count, cell_count_sum, cell_contents); - gpu_find_neighbor_ON1<<>>( + gpu_find_neighbor_ON1_ilp<<>>( box, N, N1, diff --git a/src/force/neighbor.cuh b/src/force/neighbor.cuh index 91981abff..3e7cd1681 100644 --- a/src/force/neighbor.cuh +++ b/src/force/neighbor.cuh @@ -41,7 +41,7 @@ void find_neighbor( GPU_Vector& NL); // For ILP -void find_neighbor( +void find_neighbor_ilp( const int N1, const int N2, double rc, diff --git a/src/force/potential.cuh b/src/force/potential.cuh index cd0328307..30012865f 100644 --- a/src/force/potential.cuh +++ b/src/force/potential.cuh @@ -48,7 +48,7 @@ public: GPU_Vector& virial){}; // add group message for ILP TMD SW - virtual void compute( + virtual void compute_ilp( Box& box, const GPU_Vector& type, const GPU_Vector& position,