Quantcast
Channel: Intel® oneAPI Math Kernel Library & Intel® Math Kernel Library
Viewing all articles
Browse latest Browse all 2652

Problem using pdhseqr

$
0
0

Hi,

I have some trouble using pdhseqr from ScaLAPACK (it is supposed to compute the eigen value and Schur factorization of a matrix already in Hessenberg form). It happens for both pshseqr and pdhseqr. The same happens whether I compile with MSVC 12 or Intel 14. Here is a (as much as possible) minimal code sample. It is C++ but so far I haven't had any problem with other functions from PBLAS or ScaLAPACK. 

#include "mpi.h"
#include <algorithm>    // std::max
#include <iostream>
#include "mkl.h"

#ifdef _WIN32 /* Win32 or Win64 environment */
#define numroc_ NUMROC
#define descinit_ DESCINIT
#endif

extern "C" {
	/* Cblacs declarations */
	void Cblacs_get(int, int, int*);
	void Cblacs_gridinit(int*, const char*, int, int);
	void Cblacs_pcoord(int, int, int*, int*);
	void Cblacs_gridexit(int);
	void Cblacs_exit(int);

	int numroc_(int*, int*, int*, int*, int*);
	void descinit_(int *, int *, int *, int *, int *, int *, int *,
		int *, int *, int *);

	void pdhseqr_(char *job, char* compz, int* n, int* ilo, int* ihi, double* h, int* desch, double* wr, double* wi, double* z, int* descz, double* work, int* lwork, int* iwork, int* liwork, int* info);
void psgemm_(char*transa, char*transb, int*m, int*n, int*k, float*alpha, float*a, int*ia, int*ja, int*desca, float*b, int*ib, int*jb, int*descb, float*beta, float*c, int*ic, int*jc, int*descc);
	void pdgemm_(char*transa, char*transb, int*m, int*n, int*k, double*alpha, double*a, int*ia, int*ja, int*desca, double*b, int*ib, int*jb, int*descb, double*beta, double*c, int*ic, int*jc, int*descc);
	void pshseqr_(char *job, char* compz, int* n, int* ilo, int* ihi, float* h, int* desch, float* wr, float* wi, float* z, int* descz, float* work, int* lwork, int* iwork, int* liwork, int* info);
}

inline int indxl2g(int lidx, int sblock, int nprocs, int iproc)
{
	return nprocs*sblock*(lidx / sblock) + lidx%sblock + ((nprocs + iproc - 0) % nprocs)*sblock;
}


int main(int argc, char* argv[])
{
	// Initialize parallel stuff
	int ctxt, myrank, myrow, mycol, numproc;

	MPI::Init();
	myrank = MPI::COMM_WORLD.Get_rank();
	numproc = MPI::COMM_WORLD.Get_size();

	char major = 'R';	// the process grid will be row-major
	int iZERO = 0; int iONE = 1; int iMONE = -1;
	double dZERO = 0.0;  double dONE = 1.0; double dTWO = 2.0;
	float fZERO = 0.0;  float fONE = 1.0; float fTWO = 2.0;

	Cblacs_get(0, 0, &ctxt);	// get the system context
	Cblacs_gridinit(&ctxt, &major, numproc, 1);
	Cblacs_pcoord(ctxt, myrank, &myrow, &mycol);

	// Create a shared matrix
	// Size of the data on the local process
	int M = 10;	int N = 10;
	int block = 10;
	int m = numroc_(&M, &block, &myrow, &iZERO, &numproc);
	int n = numroc_(&N, &block, &mycol, &iZERO, &iONE);

	std::cout << "m, n: "<< m << ", "<< n << std::endl;
	float* H = new float[m*n];
	float* Q = new float[m*n];

	// Finally fill in the BLACS array descriptor
	int descH[9]; int descQ[9]; int info;
	m = std::max(1, m);	// necessary because descinit() will throw if llda is 0

	descinit_(descH, &m, &n, &block, &block, &iZERO, &iZERO, &ctxt, &m, &info);
	descinit_(descQ, &m, &n, &block, &block, &iZERO, &iZERO, &ctxt, &m, &info);


	//Make H and Q upper-Hessenberg
	for (int j = 0; j < m; ++j)
	{
		for (int k = 0; k < n; ++k)
		{
			if (indxl2g(j, block, numproc, myrow) > indxl2g(k, block, 1, mycol) + 0)
			{
				H[j + m*k] = 0;
				Q[j + m*k] = 0;
			}
			else{ H[j + m*k] = 1; Q[j + m*k] = 1; }
		}
	}

	char op = 'N';
	// Compute H <- Q*Q + H, just to check that is works
	psgemm_(&op, &op, &M, &N, &N, &fONE, Q, &iONE, &iONE, descQ, Q, &iONE, &iONE, descQ, &fONE, H, &iONE, &iONE, descH);

	// Check the output
	for (int j = 0; j < m; ++j)
	{
		for (int k = 0; k < n; ++k)
		{
			std::cout << H[j + m*k] << "\t";
		}
		std::cout << std::endl;
	}


	char job = 'S'; char compz = 'I';
	float * wr = new float[N];	float * wi = new float[N];
	float lwork = -42.0; int liwork = -42;

	pshseqr_(&job, &compz, &N, &iONE, &N, H, descH, wr, wi, Q, descQ, &lwork, &iMONE, &liwork, &iMONE, &info);

	std::cout << "lwork, liwork: "<< lwork << ", "<< liwork << std::endl;
	std::cout << "info: "<< info << std::endl;

	int work_size = 10000;
	float work[10000]; int iwork[10000];

	pshseqr_(&job, &compz, &N, &iONE, &N, H, descH, wr, wi, Q, descQ, work, &work_size, iwork, &work_size, &info);
	std::cout << "info: "<< info << std::endl;

	Cblacs_gridexit(ctxt);
	Cblacs_exit(0);
	return 0;
}

And here is the output:

D:\lib\pxhseqr_minimal\x64\Debug>mpiexec -n 1 pxhseqr_minimal.exe
m, n: 10, 10
2       3       4       5       6       7       8       9       10      11
0       2       3       4       5       6       7       8       9       10
0       0       2       3       4       5       6       7       8       9
0       0       0       2       3       4       5       6       7       8
0       0       0       0       2       3       4       5       6       7
0       0       0       0       0       2       3       4       5       6
0       0       0       0       0       0       2       3       4       5
0       0       0       0       0       0       0       2       3       4
0       0       0       0       0       0       0       0       2       3
0       0       0       0       0       0       0       0       0       2
lwork, liwork: 894262, -42
info: 0
{   -1,   -1}:  On entry to

job aborted:
rank: node: exit code[: error message]
0: Oz: -1073741819: process 0 exited without calling finalize

If I try to run with Visual Studio's debugger I get this when pdhseqr is called the second time:

Unhandled exception at 0x00007FF6CA2D8F1D in pxhseqr_minimal.exe: 0xC0000005: Access violation reading location 0x0000000000000007.

I don't know how MKL works, but since the error message is messed up (process coordinates are -1,-1) I think it might be based on an old source where pxerbla was called with only 2 arguments (see line 357 http://www.netlib.org/scalapack/explore-html/d3/d3a/pdhseqr_8f_source.html ).

Or am I doing something wrong? 

Any help would be very welcome, I have been stuck with having to compute eigen vectors in serial for a while so I would really like to get this routine to work.

Best,

Romain


Viewing all articles
Browse latest Browse all 2652

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>