I wrote a piece of code to test the speed of the zheevd function(below). Then I use the Intel link advisor to compile it. These are the times I measured:
sequential: 20.15s
TBB: 30.64s
OpenMP: 30.5s
Why is the sequential code faster then either of the parallel version? Is it possible to speedup the zheevd through parallelization?
program STB !use mkl_service implicit none integer(4) :: num, i call test_herm() contains Subroutine test_herm() Implicit None integer(4), parameter :: N = 4000, LDA = N, LWMAX = 100000 integer(4) :: info, LWORK, LIWORK, LRWORK, i,j real(8) :: r,c integer(4), dimension(LWMAX) :: IWORK real(8), dimension(N) :: W real(8), dimension(LWMAX) :: RWORK complex(16), dimension(LDA, N):: A complex(16), dimension(LWMAX) :: WORK call mkl_set_num_threads(4) call random_seed() do i = 1,N do j = 1,i-1 call random_number(r) call random_number(c) A(i,j) = cmplx(r,c) A(j,i) = conjg(A(i,j)) enddo enddo do i = 1,N call random_number(r) A(i,i) = cmplx(r,0) enddo LWORK = LWMAX LIWORK = LWMAX LRWORK = LWMAX !call zheevd('N', 'L', N, A, LDA, W, WORK, LWORK, RWORK, & !LRWORK, IWORK, LIWORK, info) CALL ZHEEVD( 'N', 'Lower', N, A, LDA, W, WORK, LWORK, RWORK,& LRWORK, IWORK, LIWORK, INFO ) write (*,*) "Info: ", info write (*,*) "Lwork: ", LWORk write (*,*) "Liwork: ", LIWORK write (*,*) "LRWORK: ", LRWORK !write (*,*) W End Subroutine test_herm end program STB
Thread Topic:
Question