I am trying to multiply two matrices using mkl_ddiamm method:
C = A * B
where A is diagonal matrix 3x3 and B is general matrix 3x3. No matter what I try, i get as a result no A*B, but B*A. This is my sample code. It essentially does SVD decomposition of matrix A and checks, if the computed matrices U, S and VT satisfy all requirements according to theory i.e.
1. U * UT = I, where I us identity matrix
2. V * VT = I
3. U * S * VT = A
Result of temporary operation S * VT is not correct. In fact, the function mkl_ddiamm computes VT * S.
// requirement: m >= n int m = 3; int n = 3; double *a = (double *)mkl_malloc(m * n * sizeof(double), 16); double *s = (double *)mkl_malloc(n * sizeof(double), 16); double *u = (double *)mkl_malloc(m * n * sizeof(double), 16); double *vt = (double *)mkl_malloc(n * n * sizeof(double), 16); double *superb = (double *)mkl_malloc((n-1) * sizeof(double), 16); // identity matrix m x m double *unit_m = (double *)mkl_malloc(m * m * sizeof(double), 16); for (int i = 0; i < m; i++) for (int j = 0; j < m; j++) unit_m[i*m+j] = i == j ? 1.0 : 0; // identity matrix n x n double *unit_n = (double *)mkl_malloc(n * n * sizeof(double), 16); for (int i = 0; i < n; i++) for (int j = 0; j < n; j++) unit_n[i*n+j] = i == j ? 1.0 : 0; a[0] = 1; a[1] = 1; a[2] = 1; a[3] = 2.5; a[4] = 3; a[5] = 4; a[6] = 3; a[7] = 2; a[8] = 1; lapack_int res = LAPACKE_dgesvd(LAPACK_ROW_MAJOR, 'S', 'S', m, n, a, n, s, u, n, vt, n, superb); // Checking correctness of SVD calculation ... // u * ut = I double *temp = (double *)mkl_malloc(m * m * sizeof(double), 16); memset(temp, 0, m * m * sizeof(double)); cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, m, n, 1.0, u, n, u, n, 0, temp, m); // v * vt = I memset(temp, 0, n * n * sizeof(double)); cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, n, n, n, 1.0, vt, n, vt, n, 0, temp, n); // u * s * vt = a memset(temp, 0, n * n * sizeof(double)); int lval = 3; int idiag = 0; int ndiag = 1; double alpha = 1.0; double beta = 0; mkl_ddiamm("N", &n, &n, &n, &alpha, "DLNF", s, &lval, &idiag, &ndiag, vt, &n, &beta, temp, &n); double *temp2 = (double *)mkl_malloc(m * n * sizeof(double), 16); memset(temp2, 0, m * n * sizeof(double)); cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, n, 1.0, u, n, temp, n, 0, temp2, n);