int a = 169*64; int b = 64*1024; const int c = 5; float* A = new float[169*64]; float* B = new float[64*1024]; float* C = new float[169*1024]; srand(time(NULL)); for (int i=0;i<a;i++) { A[i] = rand()%1000/100.0; if (i%c==0) { A[i] = -4.204e-045; } } for (int j=0;j<b;j++) { B[j] = rand()%10000/1000.0; } while (true) { double t0 = cvGetTickCount(); cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 169, 1024, 64, 1.0, A, 64, B, 1024, .0, C, 1024); double t1 = cvGetTickCount()-t0; cout<<"consume time:"<<t1/cvGetTickFrequency()/1000.0<<endl; }
excute code above, change constant c, the consume time is different. I guess the running time will be slower when the metrix contains denorimalized value. why?