Hi,
I have some trouble using pdhseqr from ScaLAPACK (it is supposed to compute the eigen value and Schur factorization of a matrix already in Hessenberg form). It happens for both pshseqr and pdhseqr. The same happens whether I compile with MSVC 12 or Intel 14. Here is a (as much as possible) minimal code sample. It is C++ but so far I haven't had any problem with other functions from PBLAS or ScaLAPACK.
#include "mpi.h" #include <algorithm> // std::max #include <iostream> #include "mkl.h" #ifdef _WIN32 /* Win32 or Win64 environment */ #define numroc_ NUMROC #define descinit_ DESCINIT #endif extern "C" { /* Cblacs declarations */ void Cblacs_get(int, int, int*); void Cblacs_gridinit(int*, const char*, int, int); void Cblacs_pcoord(int, int, int*, int*); void Cblacs_gridexit(int); void Cblacs_exit(int); int numroc_(int*, int*, int*, int*, int*); void descinit_(int *, int *, int *, int *, int *, int *, int *, int *, int *, int *); void pdhseqr_(char *job, char* compz, int* n, int* ilo, int* ihi, double* h, int* desch, double* wr, double* wi, double* z, int* descz, double* work, int* lwork, int* iwork, int* liwork, int* info); void psgemm_(char*transa, char*transb, int*m, int*n, int*k, float*alpha, float*a, int*ia, int*ja, int*desca, float*b, int*ib, int*jb, int*descb, float*beta, float*c, int*ic, int*jc, int*descc);
void pdgemm_(char*transa, char*transb, int*m, int*n, int*k, double*alpha, double*a, int*ia, int*ja, int*desca, double*b, int*ib, int*jb, int*descb, double*beta, double*c, int*ic, int*jc, int*descc); void pshseqr_(char *job, char* compz, int* n, int* ilo, int* ihi, float* h, int* desch, float* wr, float* wi, float* z, int* descz, float* work, int* lwork, int* iwork, int* liwork, int* info); } inline int indxl2g(int lidx, int sblock, int nprocs, int iproc) { return nprocs*sblock*(lidx / sblock) + lidx%sblock + ((nprocs + iproc - 0) % nprocs)*sblock; } int main(int argc, char* argv[]) { // Initialize parallel stuff int ctxt, myrank, myrow, mycol, numproc; MPI::Init(); myrank = MPI::COMM_WORLD.Get_rank(); numproc = MPI::COMM_WORLD.Get_size(); char major = 'R'; // the process grid will be row-major int iZERO = 0; int iONE = 1; int iMONE = -1; double dZERO = 0.0; double dONE = 1.0; double dTWO = 2.0; float fZERO = 0.0; float fONE = 1.0; float fTWO = 2.0; Cblacs_get(0, 0, &ctxt); // get the system context Cblacs_gridinit(&ctxt, &major, numproc, 1); Cblacs_pcoord(ctxt, myrank, &myrow, &mycol); // Create a shared matrix // Size of the data on the local process int M = 10; int N = 10; int block = 10; int m = numroc_(&M, &block, &myrow, &iZERO, &numproc); int n = numroc_(&N, &block, &mycol, &iZERO, &iONE); std::cout << "m, n: "<< m << ", "<< n << std::endl; float* H = new float[m*n]; float* Q = new float[m*n]; // Finally fill in the BLACS array descriptor int descH[9]; int descQ[9]; int info; m = std::max(1, m); // necessary because descinit() will throw if llda is 0 descinit_(descH, &m, &n, &block, &block, &iZERO, &iZERO, &ctxt, &m, &info); descinit_(descQ, &m, &n, &block, &block, &iZERO, &iZERO, &ctxt, &m, &info); //Make H and Q upper-Hessenberg for (int j = 0; j < m; ++j) { for (int k = 0; k < n; ++k) { if (indxl2g(j, block, numproc, myrow) > indxl2g(k, block, 1, mycol) + 0) { H[j + m*k] = 0; Q[j + m*k] = 0; } else{ H[j + m*k] = 1; Q[j + m*k] = 1; } } } char op = 'N'; // Compute H <- Q*Q + H, just to check that is works psgemm_(&op, &op, &M, &N, &N, &fONE, Q, &iONE, &iONE, descQ, Q, &iONE, &iONE, descQ, &fONE, H, &iONE, &iONE, descH); // Check the output for (int j = 0; j < m; ++j) { for (int k = 0; k < n; ++k) { std::cout << H[j + m*k] << "\t"; } std::cout << std::endl; } char job = 'S'; char compz = 'I'; float * wr = new float[N]; float * wi = new float[N]; float lwork = -42.0; int liwork = -42; pshseqr_(&job, &compz, &N, &iONE, &N, H, descH, wr, wi, Q, descQ, &lwork, &iMONE, &liwork, &iMONE, &info); std::cout << "lwork, liwork: "<< lwork << ", "<< liwork << std::endl; std::cout << "info: "<< info << std::endl; int work_size = 10000; float work[10000]; int iwork[10000]; pshseqr_(&job, &compz, &N, &iONE, &N, H, descH, wr, wi, Q, descQ, work, &work_size, iwork, &work_size, &info); std::cout << "info: "<< info << std::endl; Cblacs_gridexit(ctxt); Cblacs_exit(0); return 0; }
And here is the output:
D:\lib\pxhseqr_minimal\x64\Debug>mpiexec -n 1 pxhseqr_minimal.exe m, n: 10, 10 2 3 4 5 6 7 8 9 10 11 0 2 3 4 5 6 7 8 9 10 0 0 2 3 4 5 6 7 8 9 0 0 0 2 3 4 5 6 7 8 0 0 0 0 2 3 4 5 6 7 0 0 0 0 0 2 3 4 5 6 0 0 0 0 0 0 2 3 4 5 0 0 0 0 0 0 0 2 3 4 0 0 0 0 0 0 0 0 2 3 0 0 0 0 0 0 0 0 0 2 lwork, liwork: 894262, -42 info: 0 { -1, -1}: On entry to job aborted: rank: node: exit code[: error message] 0: Oz: -1073741819: process 0 exited without calling finalize
If I try to run with Visual Studio's debugger I get this when pdhseqr is called the second time:
Unhandled exception at 0x00007FF6CA2D8F1D in pxhseqr_minimal.exe: 0xC0000005: Access violation reading location 0x0000000000000007.
I don't know how MKL works, but since the error message is messed up (process coordinates are -1,-1) I think it might be based on an old source where pxerbla was called with only 2 arguments (see line 357 http://www.netlib.org/scalapack/explore-html/d3/d3a/pdhseqr_8f_source.html ).
Or am I doing something wrong?
Any help would be very welcome, I have been stuck with having to compute eigen vectors in serial for a while so I would really like to get this routine to work.
Best,
Romain