You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Summary: I found an erroneous output when calling pdlapv2 with the following reproducible example. With inspection of the source code pdlapv2.f I think it's an algorithm error. (Or I misunderstood the usage of this routine). Please help!
I am trying to write some c++ code that calls ScaLAPACK and I encountered this problem.
I want to do a column reordering for a distributed matrix. Following the explanation together with the source code in ScaLAPACK, I find the result confusing. Here is a minimum 4 processes 7x7 matrix, 6x6 block-cyclic example:
#include<cstdlib>
#include<cassert>
#include<string>
#include<iostream>
#include<iomanip>
#include<vector>
#include"mpi.h"// build with:// mpicxx --std=c++17 minimal.cpp -L/home/xli/software/scalapack-2.2.0 -L/home/xli/.local/lib -L/home/xli/.local/lib64 -lscalapack -llapack -lopenblas -lgfortran -o minimal.x// run with// OMP_NUM_THREADS=1 LD_LIBRARY_PATH=/home/xli/.local/lib:/home/xli/.local/lib64:/opt/MPICH/4.2.0/lib:$LD_LIBRARY_PATH mpirun -np 4 ./minimal.x
#defineASSERT_DHIF(cond, msg) if (!(cond)) { std::cerr << msg << std::endl; exit(1); }
#defineLOGDEBUGif (myid == 0) std::cout
int myid;
extern"C" {
voidCblacs_get(constint ictxt, constint what, int *val);
voidCblacs_pinfo(int *myrank, int *nprocs);
voidCblacs_gridinit(int *ictxt, constchar *order, constint nprow, constint npcol);
voidCblacs_gridinfo(constint ictxt, int *nprow, int *npcol, int *myrow, int *mycol);
voidCblacs_gridexit(constint ictxt);
voiddescinit_(int *desc,
constint *m, constint *n, constint *mb, constint *nb, constint *irsrc, constint *icsrc, constint *ictxt, constint *lld, constint *info);
voidpdlapiv_(char *direc, char *rowcol, char *pivroc, int *m, int *n, double *A, int *IA, int *JA, int *DESCA, int *IPIV, int *ip, int *jp, int *descip, int *iwork);
voidpdlapv2_(char *direc, char *rowcol, int *m, int *n, double *A, int *IA, int *JA, int *DESCA, int *IPIV, int *ip, int *jp, int *descip);
// compute LOCr or LOCc (local size of data for distributed array)intnumroc_(int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
}
voidrandom_fill_matrix(double *A, int locr, int locc) {
std::srand(42+myid);
for (int i = 0; i < locr; i++)
for (int j = 0; j < locc; j++)
A[i + j * locr] = (double)rand() / RAND_MAX;
}
std::string print_mat(std::vector<double> &A, int locr, int locc) {
std::stringstream ss;
for (int i = 0; i < locr; i++) {
for (int j = 0; j < locc; j++)
ss << std::fixed << std::setprecision(5) << A[i + j * locr] << "";
ss << "\n";
}
return ss.str();
}
std::ostream& operator<<(std::ostream& os, const std::vector<int>& v) {
os << "{";
for (auto it = v.begin(); it != v.end(); ++it) {
os << *it;
if (it != v.end() - 1)
os << ", ";
}
os << "}";
return os;
}
voidvalidate_pdlapiv() {
int numprocs, info, ictxt;
int myrow, mycol;
int nprow = 2, npcol = 2;
int zero = 0, one = 1, two = 2;
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
ASSERT_DHIF(numprocs == 4, "This test must be run with 4 processes");
LOGDEBUG << "Start of test case" << std::endl;
// Initialize BLACS contextCblacs_pinfo(&myid, &numprocs);
Cblacs_get(0, 0, &ictxt);
Cblacs_gridinit(&ictxt, "Row", nprow, npcol);
Cblacs_gridinfo(ictxt, &nprow, &npcol, &myrow, &mycol);
LOGDEBUG << "finish cblacs init" << std::endl;
// Define matrix dimensions and block sizeint m = 7, n = 7, mb = 6, nb = 6;
double *A, *B;
// Initialize matrix descriptor for Aint locr = numroc_(&m, &mb, &myrow, &zero, &nprow); // Local number of rows of Aint locc = numroc_(&n, &nb, &mycol, &zero, &npcol); // Local number of columns of Aint lldA = std::max(1, locr); // Local leading dimension of Aint descA[9], descB[9];
descinit_(descA, &m, &n, &mb, &nb, &zero, &zero, &ictxt, &lldA, &info);
ASSERT_DHIF(info == 0, ("Error in descinit, info=" + std::to_string(info)).c_str());
descinit_(descB, &m, &n, &mb, &nb, &zero, &zero, &ictxt, &lldA, &info);
ASSERT_DHIF(info == 0, ("Error in descinit, info=" + std::to_string(info)).c_str());
int descip[9];
int nnb = n + nb * 2;
descinit_(descip, &two, &nnb, &one, &nb, &zero, &zero, &ictxt, &lldA, &info);
ASSERT_DHIF(info == 0, ("Error in descinit, info=" + std::to_string(info)).c_str());
LOGDEBUG << "locr and locc: " << locr << "" << locc << std::endl;
// get decip's locr and loccint locrip = numroc_(&two, &one, &myrow, &zero, &nprow);
int loccip = numroc_(&nnb, &nb, &mycol, &zero, &npcol);
LOGDEBUG << "locrip and loccip: " << locrip << "" << loccip << std::endl;
// Allocate memory for local array A
std::vector<double> vA(locr * locc, 0);
std::vector<double> vB(locr * locc, 0);
A = vA.data();
B = vB.data();
// Initialize matrix A with random values and make a copy because PSGEQPF will overwrite itrandom_fill_matrix(A, locr, locc);
std::copy(A, A + locr * locc, B);
LOGDEBUG << "Everything in A: \n" << print_mat(vA, locr, locc) << std::endl;
LOGDEBUG << "Everything in B: \n" << print_mat(vB, locr, locc) << std::endl;
std::vector<int> ipiv;
if (myid % 2 == 0) {
ipiv = std::vector<int> {7, 2, 3, 4, 5, 6};
} else {
ipiv = std::vector<int> {1};
}
LOGDEBUG << "IPIV: " << ipiv << std::endl;
char direc = 'F', rowcol = 'C';
// append several 0 to ipiv
ipiv.insert(ipiv.end(), 6, 0);
LOGDEBUG << "IPIV: " << ipiv << std::endl;
assert(ipiv.size() == loccip);
// print out all the parameters
LOGDEBUG << "Everything that will be used to call pdlapiv: " << std::endl;
// todo: pdlapv2 still not workingpdlapv2_(&direc, &rowcol, &m, &n, A, &one, &one, descA, ipiv.data(), &one, &one, descip);
LOGDEBUG << "Everything in A: \n" << print_mat(vA, locr, locc) << std::endl;
LOGDEBUG << "Everything in B: \n" << print_mat(vB, locr, locc) << std::endl;
Cblacs_gridexit(ictxt);
}
intmain(int argc, char **argv) {
MPI_Init(&argc, &argv);
validate_pdlapiv();
MPI_Finalize();
return0;
}
The program runs but with errornous output. Which I expect to exchange the first column and the last column of A.
(The output is limited to only the first process can output)
where I expect the second output of A will have a different first column.
With this question in mind, I went to check pdlapv2.f and found the code at line 252-287 handles the case Forward and Column (specified in the parameters).
It seems like this code iterates the column block jb from the start to the end; broadcasting it to every process in the row, and everyone then does the same job to switch columns to the corresponding ones.
So in my case, I set the pivot 7, 2, 3, 4, 5, 6 | 1 such that first process has the first 6 columns, and the second process has the last one column.
They work together and then first found 7 when iterating 7,2,3,4,5,6 not in correct place ( line 274 in pdlapv2.f ), and
do a switch 7 <--> 1. Then they found 1 not in correct place when iterating the second block 1, and they do again, which is errornous, switch 1 <--> 7.
This makes the output errornous, and I want to know if this is intended (which means I misunderstood the routine), or this is a bug.
Thanks for any help!
The text was updated successfully, but these errors were encountered:
j7168908jx
changed the title
Parameter for pdlapiv or pdlapv2 in ScaLAPACK
Wrong pdlapiv or pdlapv2 output in ScaLAPACK
Mar 8, 2024
In LAPACK there are two sorts of index arrays: IPIV (eg DGETRF) and JPVT (eg DGEQPF).
IPIV is a sequence of swaps and JPVT is a column permutation.
And I also find the correct routine, defined in the driver p*qrdriver.f. But why is this routine defined there rather than being a unique file in the SRC folder? There is no reference to this routine in the user's guide as well.
Summary: I found an erroneous output when calling
pdlapv2
with the following reproducible example. With inspection of the source codepdlapv2.f
I think it's an algorithm error. (Or I misunderstood the usage of this routine). Please help!I am trying to write some c++ code that calls ScaLAPACK and I encountered this problem.
I want to do a column reordering for a distributed matrix. Following the explanation together with the source code in ScaLAPACK, I find the result confusing. Here is a minimum 4 processes 7x7 matrix, 6x6 block-cyclic example:
The program runs but with errornous output. Which I expect to exchange the first column and the last column of A.
(The output is limited to only the first process can output)
where I expect the second output of
A
will have a different first column.With this question in mind, I went to check
pdlapv2.f
and found the code at line 252-287 handles the caseForward
andColumn
(specified in the parameters).It seems like this code iterates the column block
jb
from the start to the end; broadcasting it to every process in the row, and everyone then does the same job to switch columns to the corresponding ones.So in my case, I set the pivot
7, 2, 3, 4, 5, 6 | 1
such that first process has the first 6 columns, and the second process has the last one column.They work together and then first found
7
when iterating7,2,3,4,5,6
not in correct place ( line 274 in pdlapv2.f ), anddo a switch 7 <--> 1. Then they found
1
not in correct place when iterating the second block1
, and they do again, which is errornous, switch 1 <--> 7.This makes the output errornous, and I want to know if this is intended (which means I misunderstood the routine), or this is a bug.
Thanks for any help!
The text was updated successfully, but these errors were encountered: