I am calculating a correlation matrix and am getting a weird issue. I have a 15x6 matrix R getting read into a subroutine correlation.c which calculates a 6x6 correlation matrix cor based off of the covariance matrix of R. My code looks like this
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include <mkl.h> #include "params.h" void show_matrix(REAL *A, int n_dim, int m_dim); REAL *correlation(REAL *A, int n_dim, int m_dim) { int diag; REAL *cvm = (REAL*)_mm_malloc(sizeof(REAL) * n_dim * n_dim, 64); REAL *cor = (REAL*)_mm_malloc(sizeof(REAL) * n_dim * n_dim, 64); REAL *colavg = (REAL*)_mm_malloc(sizeof(REAL) * n_dim, 64); if (cvm == NULL || colavg == NULL) { printf("\nERROR: Cannot allocate memory for matrices in CORRELATION. Aborting...\n\n"); exit(EXIT_FAILURE); } else { memset(cvm,0.0,sizeof(cvm)); memset(cor,0.0,sizeof(cor)); memset(colavg,0.0,sizeof(colavg)); } /* Calculate column averages */ for (int i = 0; i < n_dim; i++) { for (int j = 0; j < m_dim; j++) { colavg[i] += A[j * n_dim + i]; } colavg[i] /= m_dim; } /* Calculate covariance matrix */ for (int i = 0; i < m_dim; i++) { int indx = i * n_dim; for (int j = 0; j < n_dim; j++) { A[indx + j] -= colavg[j]; } } printf("A in\n"); show_matrix(A, m_dim, n_dim); cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, n_dim, n_dim, m_dim, 1.0, A, n_dim, A, n_dim, 1.0, cvm, n_dim); printf("cvm out\n"); show_matrix(cvm, n_dim, n_dim); for (int i = 0; i < n_dim; i++) { int indx = i * n_dim; for (int j = 0; j < n_dim; j++) { cvm[indx + j] /= (m_dim - 1); } } printf("cvm final\n"); show_matrix(cvm, n_dim, n_dim); /* Calculate correlation matrix */ diag = n_dim + 1; for (int i = 0; i < n_dim; i++) { int indx = i * n_dim; int idiag = i * diag; for (int j = 0; j <= i; j++) { cor[indx + j] = cvm[indx + j] / sqrt(cvm[idiag] * cvm[j * diag]); } } printf("cor\n"); show_matrix(cor, n_dim, n_dim); return cor;
The "A in" debug print gives my expected input of
A in 1.53400 1.53400 -1.53400 -1.53400 0.48900 -0.31900 -0.88700 -0.48900 0.88700 -0.88700 -0.15700 0.67400 -0.48900 0.67400 -0.48900 1.15000 1.53400 -0.48900 0.88700 0.00000 -0.67400 0.31900 -0.00000 -1.53400 1.15000 -0.31900 0.48900 0.67400 0.15700 1.15000 0.15700 -1.53400 -0.88700 -0.67400 -0.31900 0.15700 -1.15000 -0.67400 -0.15700 0.15700 -1.53400 -0.15700 0.00000 -0.88700 0.15700 -0.31900 -0.67400 0.88700 0.31900 -0.15700 0.67400 0.88700 0.67400 1.53400 -0.31900 0.15700 -0.31900 -1.15000 1.15000 -0.88700 -1.53400 0.88700 1.15000 1.53400 -0.48900 -1.15000 -0.15700 -1.15000 1.53400 -0.15700 -1.15000 -0.67400 0.48900 0.48900 -1.15000 0.48900 -0.88700 -0.00000 0.67400 0.31900 0.31900 0.00000 0.88700 0.31900 -0.67400 1.15000 0.00000 -0.48900 0.31900 0.48900
However the "cvm out" debug print gives something a little bit goofy
cvm out 10.56446 1.02348 -4.93052 -2.46705 2.76175 1.84661 1.02348 10.56446 -3.30580 0.74990 -nan -2.39809 -4.93052 -3.30580 10.56446 3.56732 -2.08079 2.00943 -2.46705 0.74990 3.56732 10.56446 -0.43575 -0.31520 2.76175 5.11138 -2.08079 -0.43575 10.56446 0.55179 1.84661 -2.39809 2.00943 -0.31520 0.55179 10.56446
Why is that NaN there?! The symmetric value across the diagonal is correct! Very weird!
I am currently working around it by working only on the lower half, which works due to the matrix being SPD, but I'm very curious about this rogue NaN.
Any ideas?