Quantcast
Channel: Intel® oneAPI Math Kernel Library & Intel® Math Kernel Library
Viewing all articles
Browse latest Browse all 2652

Getting very low efficiency for mkl_dcsrmv function

$
0
0

Hi,

I am trying to benchmark the intel mkl library function mkl_dcsrmv. I tried running the function for single, three,five and seven diagonals nnz entries and i repeated it multiple times with different values and different matrix size.  The maximum efficiency I could achieve was 432MFLOPs which is 3%. The code snippet used to measure the time is attached below. I am running it on a 2Ghz, core2duo processor. The result sheet, in which the obtained time value and the MFLOP calculation is also attached. Can some one tell me if there is some bug in my code or is the performance of mkl not optimized for core2duo? I have checked my system configuration and it has SSE2 instruction support, which is the requirement mentioned in the manual.

Thanks for your help!

/********************************************************************************
*   Content : Simple MKL Sparse Matrix-Vector Multiply in C
*
********************************************************************************/

#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#define BILLION  1000000000L;
#include "mkl.h"





int main(int argc, char* argv[])
{

	int i, j;
	int m;
	int k;
	int nnz;
	double alpha;
	double beta;
	//double val[] = {1,2,4,5,6,5,8};
	double* val;
	double* x;
	double* y;
	int* indx;
	int* pntrb;
	int* pntre;
	FILE *values;
	FILE* vector;
	FILE* index;
	FILE* pointerb;
	FILE* pointere;

	struct timespec start,end;

	/*printf("Enter the no of rows of the matrix, m=");
	scanf("%d",&m);

	//no of columns
	printf("Enter the no of columns of the matrix, k=");
	scanf("%d",&k);

	printf("Enter the no of non-zero elements of the matrix, nnz=");
	scanf("%d",&nnz);*/

	if(argc != 4){
		printf("Pass value of no of rows, no of columns and no of nonzero elements as arguments when you run the executable\n");
		printf("Example: ./csr 1000 1000 3000 \n");
		exit(-1);
	}

	m = atoi(argv[1]);
	k = atoi(argv[2]);
	nnz = atoi(argv[3]);

	//the condition if nnz = 0 is remaining

	//for (i = 0, i < nnz, i++)

	alpha = 1.0;
	beta = 0.0;
	//m = 1000000;
	//k = 1000000;
	//nnz = 1000000;
	val = (double*) malloc( sizeof(double)*(nnz));
	y = (double*) malloc( sizeof(double)*(m));

	indx = (int*) malloc( sizeof(int)*(nnz));
	pntrb = (int*) malloc( sizeof(int)*(m));
	pntre = (int*) malloc( sizeof(int)*(m));
	x = (double*) malloc( sizeof(double)*(k));






	i = 0;
	//Getting the column index
	index = fopen("index.txt","rb");
	while(!feof(index)){
    fscanf(index,"%d",&indx[i]);
    i++;
	}
	fclose(index);



	i = 0;
	//Getting the beginning row pointer
	pointerb = fopen("ptrb.txt","rb");
	while(!feof(pointerb)){
    fscanf(pointerb,"%d",&pntrb[i]);
    i++;
	}
	fclose(pointerb);



	i = 0;
	//Getting the end row pointer
	pointere = fopen("ptre.txt","rb");
	while(!feof(pointere)){
    fscanf(pointere,"%d",&pntre[i]);
    i++;
	}
	fclose(pointere);


	i = 0;
	//Getting the vector values
	vector = fopen("x.txt","rb");
	while(!feof(vector)){
    fscanf(vector,"%lf",&x[i]);
    i++;
	}
	fclose(vector);


	i = 0;
	//Getting the values of the matrix
	values = fopen("values.txt","rb");
	while(!feof(values)){
	//printf("point2, %d \n",i);
    fscanf(values,"%lf",&val[i]);
    i++;
	}
	fclose(values);






	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	mkl_dcsrmv("N", &m, &k, &alpha, "G", val, indx, pntrb, pntre, x, &beta, y);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
	double runtime = ( end.tv_sec - start.tv_sec )+ (double)( end.tv_nsec - start.tv_nsec )/ (double)BILLION;
	printf("Elapsed time = %g seconds\n",
		runtime);



	return 0;
}

 


Viewing all articles
Browse latest Browse all 2652

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>