FrontISTR  5.2.0
Large-scale structural analysis program with finit element method
hecmw_ClusterMKL_wrapper.F90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
6 #ifdef WITH_MKL
7 include 'mkl_cluster_sparse_solver.f90'
8 #endif
9 
11  use hecmw_util
12  use m_hecmw_comm_f
13  use m_sparse_matrix
14 
15 #ifdef WITH_MKL
16  use mkl_cluster_sparse_solver
17 #endif
18 
19  implicit none
20 
21  private ! default
22  public hecmw_clustermkl_wrapper ! only entry point of Parallel Direct Solver is public
23 
24  logical, save :: INITIALIZED = .false.
25 #ifdef WITH_MKL
26  type(MKL_CLUSTER_SPARSE_SOLVER_HANDLE) :: pt(64)
27 #endif
28  integer maxfct, mnum, mtype, nrhs, msglvl
29  integer idum(1), i
30  data nrhs /1/, maxfct /1/, mnum /1/
31 
32  integer, save :: iparm(64)
33  integer(kind=kint), save :: nn
34  integer(kind=kint), pointer, save :: irow(:), jcol(:)
35  real(kind=kreal), pointer, save :: aval(:), rhs(:), solx(:)
36 
37  integer(kind=kint), parameter :: debug=0
38 
39 contains
40 
41  subroutine hecmw_clustermkl_wrapper(spMAT, phase_start, solution, istat)
42  implicit none
43  type (sparse_matrix), intent(inout) :: spmat
44  integer(kind=kint), intent(in) :: phase_start
45  integer(kind=kint), intent(out) :: istat
46  real(kind=kreal), pointer, intent(inout) :: solution(:)
47 
48  integer(kind=kint) :: myrank, phase
49  real(kind=kreal) :: t1,t2,t3,t4,t5
50 
51 #ifdef WITH_MKL
52 
53  myrank=hecmw_comm_get_rank()
54 
55  if (.not. initialized) then
56  do i=1,64
57  pt(i)%dummy = 0
58  enddo
59  iparm(:) = 0
60  iparm(1) = 1 ! no solver default
61  iparm(2) = 3 ! fill-in reordering from METIS
62  iparm(3) = 1
63  iparm(8) = 2
64  if (spmat%symtype == sparse_matrix_symtype_asym) then
65  iparm(10) = 13 ! perturbe the pivot elements with 1E-13
66  else
67  iparm(10) = 8 ! perturbe the pivot elements with 1E-8
68  endif
69  iparm(11) = 1 ! Enable scaling
70  iparm(13) = 1 ! Enable matching
71  iparm(18) = 0
72  iparm(19) = 0
73  msglvl = 0 ! print statistical information
74  initialized = .true.
75  else
76  if( phase_start == 1 ) then
77  phase = -1
78  call cluster_sparse_solver ( &
79  pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
80  idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
81  if (istat < 0) then
82  write(*,*) 'ERROR: MKL returned with error in phase -1', istat
83  return
84  endif
85  if( spmat%type == sparse_matrix_type_coo ) deallocate(irow,jcol,aval,rhs,solx)
86  end if
87  endif
88 
89  ! Additional setup
90  t1=hecmw_wtime()
91  if ( phase_start == 1 ) then
92  if (spmat%symtype == sparse_matrix_symtype_spd) then
93  mtype = 2
94  else if (spmat%symtype == sparse_matrix_symtype_sym) then
95  mtype = -2
96  else
97  mtype = 11
98  endif
99  endif
100 
101  if( spmat%type == sparse_matrix_type_csr ) then
102  iparm(40) = 2 ! Input: distributed matrix/rhs/solution format
103  iparm(41) = spmat%DISPLS(myrank+1)+1
104  iparm(42) = spmat%DISPLS(myrank+1)+spmat%N_COUNTS(myrank+1)
105  call sort_column_ascending_order(spmat,myrank)
106  nn = spmat%N
107  irow => spmat%IRN
108  jcol => spmat%JCN
109  aval => spmat%A
110  rhs => spmat%rhs
111  solx => solution
112  else if( spmat%type == sparse_matrix_type_coo ) then
113  iparm(40) = 0 ! Input: centralized input format( mkl don't have distributed nonsymmetric matrix input...)
114  !input matrix is gathered to rank0 if matrix is given in COO format
115  call export_spmat_to_centralizedcrs(spmat,myrank,nn,irow,jcol,aval,rhs,solx)
116  end if
117 
118  t2=hecmw_wtime()
119  if (myrank==0 .and. spmat%timelog > 0) &
120  write(*,'(A,f10.3)') ' [Cluster Pardiso]: Additional Setup completed. time(sec)=',t2-t1
121 
122  if ( phase_start == 1 ) then
123  ! ANALYSIS
124  t2=hecmw_wtime()
125 
126  phase = 11
127  call cluster_sparse_solver ( &
128  pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
129  idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
130  if (istat < 0) then
131  if (myrank==0 .and. spmat%timelog > 0) call print_iparm_paramters()
132  write(*,*) 'ERROR: MKL returned with error in phase 1', istat
133  return
134  endif
135 
136  t3=hecmw_wtime()
137  if (myrank==0 .and. spmat%timelog > 0) &
138  write(*,'(A,f10.3)') ' [Cluster Pardiso]: Analysis completed. time(sec)=',t3-t2
139  endif
140 
141  ! FACTORIZATION
142  if ( phase_start .le. 2 ) then
143  t3=hecmw_wtime()
144 
145  phase = 22
146  call cluster_sparse_solver ( &
147  pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
148  idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
149  if (istat < 0) then
150  if (myrank==0 .and. spmat%timelog > 0) call print_iparm_paramters()
151  write(*,*) 'ERROR: MKL returned with error in phase 2', istat
152  return
153  endif
154  t4=hecmw_wtime()
155  if (myrank==0 .and. spmat%timelog > 0) &
156  write(*,'(A,f10.3)') ' [Cluster Pardiso]: Factorization completed. time(sec)=',t4-t3
157  endif
158 
159  t4=hecmw_wtime()
160 
161  ! SOLUTION
162  phase = 33
163  call cluster_sparse_solver ( &
164  pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
165  idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
166  if (istat < 0) then
167  if (myrank==0 .and. spmat%timelog > 0) call print_iparm_paramters()
168  write(*,*) 'ERROR: MKL returned with error in phase 3', istat
169  return
170  endif
171 
172  if( spmat%type == sparse_matrix_type_coo ) then !scatter global solution
173  call sparse_matrix_scatter_rhs(spmat, solx)
174  do i=1,spmat%N_loc
175  solution(i) = spmat%rhs(i)
176  end do
177  endif
178 
179  ! solution is written to hecMAT%X. you have to edit lib/solve_LINEQ.f90
180  ! to use this routine properly.
181 
182  t5=hecmw_wtime()
183  if (myrank==0 .and. spmat%timelog > 0) then
184  write(*,'(A,f10.3)') ' [Cluster Pardiso]: Solution completed. time(sec)=',t5-t4
185  end if
186  if( debug>0 .and. myrank==0 ) call print_iparm_paramters()
187 
188 #else
189  stop "MKL Pardiso not available"
190 #endif
191  end subroutine hecmw_clustermkl_wrapper
192 
193 #ifdef WITH_MKL
194  subroutine print_iparm_paramters()
195  write(*,'(A60,I8)') 'Number of iterative refinement steps performed: ',iparm(7)
196  write(*,'(A60,I8)') 'Number of perturbed pivots: ',iparm(14)
197  write(*,'(A60,I8)') 'Peak memory on symbolic factorization: ',iparm(15)
198  write(*,'(A60,I8)') 'Permanent memory on symbolic factorization: ',iparm(16)
199  write(*,'(A60,I8)') 'Size of factors/Peak memory on num. fact. and sol: ',iparm(17)
200  write(*,'(A60,I8)') 'The number of non-zero elements in the factors: ',iparm(18)
201  if( mtype < 11 ) then
202  write(*,'(A60,I8)') 'Number of positive eigenvalues: ',iparm(22)
203  write(*,'(A60,I8)') 'Number of negative eigenvalues: ',iparm(23)
204  end if
205  end subroutine
206 
207  subroutine export_spmat_to_centralizedcrs(spMAT,myrank,n,ia,ja,a,b,x)
208  type (sparse_matrix), intent(in) :: spmat
209  integer(kind=kint), intent(in) :: myrank
210  integer(kind=kint), intent(out) :: n
211  integer(kind=kint), pointer, intent(inout) :: ia(:), ja(:)
212  real(kind=kreal), pointer, intent(inout) :: a(:), b(:), x(:)
213 
214  integer(kind=kint) :: i, nprocs, ierr, nnz
215  integer(kind=kint), allocatable :: dispmat(:), ncounts(:)
216  real(kind=kreal) :: t1,t2,t3,t4
217 
218  t1=hecmw_wtime()
219 
220  nprocs = hecmw_comm_get_size()
221  allocate(dispmat(nprocs),ncounts(nprocs))
222  if (nprocs > 1) then
223  call hecmw_allgather_int_1(spmat%NZ, ncounts, hecmw_comm_get_comm())
224  endif
225  n = spmat%N
226  nnz = 0
227  dispmat(1)=0
228  do i=1,nprocs-1
229  dispmat(i+1)=dispmat(i)+ncounts(i)
230  nnz = nnz + ncounts(i)
231  enddo
232  nnz = nnz + ncounts(nprocs)
233 
234  if( myrank == 0 ) then
235  ierr = 0
236  if( .not. associated(ia) ) allocate(ia(nnz+n), stat=ierr)
237  if (ierr /= 0) then
238  write(*,*) " Allocation error in Setup Cluster MKL, ia",ierr,nnz
240  endif
241  if( .not. associated(ja) ) allocate(ja(nnz+n), stat=ierr)
242  if (ierr /= 0) then
243  write(*,*) " Allocation error in Setup Cluster MKL, ja",ierr,nnz
245  endif
246  if( .not. associated(a) ) allocate(a(nnz+n), stat=ierr)
247  if (ierr /= 0) then
248  write(*,*) " Allocation error in Setup Cluster MKL, a",ierr,nnz
250  endif
251  if( .not. associated(b) ) allocate(b(n), stat=ierr)
252  if (ierr /= 0) then
253  write(*,*) " Allocation error in Setup Cluster MKL, b",ierr,n
255  endif
256  if( .not. associated(x) ) allocate(x(n), stat=ierr)
257  if (ierr /= 0) then
258  write(*,*) " Allocation error in Setup Cluster MKL, x",ierr,n
260  endif
261  else !dummy
262  allocate(ia(1),ja(1),a(1),b(1),x(1))
263  end if
264 
265  t2=hecmw_wtime()
266  if (myrank==0 .and. spmat%timelog > 0 .and. debug > 0 ) &
267  write(*,'(A,f10.3)') ' [Cluster Pardiso]: - Allocate Matrix time(sec)=',t2-t1
268 
269  !gather matrix components to rank 0
270 
271  call hecmw_gatherv_int(spmat%IRN, spmat%NZ, ia, ncounts, dispmat, 0, hecmw_comm_get_comm())
272  call hecmw_gatherv_int(spmat%JCN, spmat%NZ, ja, ncounts, dispmat, 0, hecmw_comm_get_comm())
273  call hecmw_gatherv_real(spmat%A, spmat%NZ, a, ncounts, dispmat, 0, hecmw_comm_get_comm())
274  if( myrank == 0 ) then !cluster pardiso must be given diagonal components even if they are zero.
275  do i=1,n
276  ia(nnz+i) = i
277  ja(nnz+i) = i
278  a(nnz+i) = 0.d0
279  end do
280  nnz = nnz + n
281  endif
282  call sparse_matrix_gather_rhs(spmat, b)
283 
284  t3=hecmw_wtime()
285  if (myrank==0 .and. spmat%timelog > 0 .and. debug > 0 ) &
286  write(*,'(A,f10.3)') ' [Cluster Pardiso]: - Gather Matrix time(sec)=',t3-t2
287 
288  !convert COO to CRS
289  if( myrank==0 ) then
290  call coo2csr(n, nnz, ia, ja, a)
291  if( debug>0 ) call check_csr(n, nnz, ia, ja, a)
292  endif
293 
294  t4=hecmw_wtime()
295  if (myrank==0 .and. spmat%timelog > 0 .and. debug > 0 ) &
296  write(*,'(A,f10.3)') ' [Cluster Pardiso]: - Convert Matrix Format time(sec)=',t4-t3
297 
298  deallocate(dispmat,ncounts)
299  end subroutine
300 
301  subroutine coo2csr(n, nnz, ia, ja, a)
302  integer(kind=kint), intent(in) :: n
303  integer(kind=kint), intent(inout) :: nnz
304  integer(kind=kint), pointer, intent(inout) :: ia(:)
305  integer(kind=kint), pointer, intent(inout) :: ja(:)
306  real(kind=kreal), pointer, intent(inout) :: a(:)
307 
308  integer(kind=kint) :: i, k, idx, is, ie, nnz_new, prev
309  integer(kind=kint), allocatable :: counter(:), counter_curr(:), ia0(:), ja0(:)
310  real(kind=kreal), allocatable :: a0(:)
311 
312  allocate(counter(0:n),counter_curr(n),ia0(n+1),ja0(nnz),a0(nnz))
313 
314  counter(:) = 0
315  do i=1,nnz
316  idx = ia(i)
317  counter(idx) = counter(idx) + 1
318  end do
319  ia0(:)=0
320  do i=1,n
321  ia0(i+1) = ia0(i)+counter(i)
322  end do
323 
324  counter_curr(:) = 0
325  do i=1,nnz
326  idx = ia(i)
327  counter_curr(idx) = counter_curr(idx) + 1
328  idx = ia0(idx)+counter_curr(idx)
329  ja0(idx) = ja(i)
330  a0(idx) = a(i)
331  end do
332 
333  !$omp parallel private(i, iS, iE)
334  !$omp do
335  do i=1,n
336  is = ia0(i)+1
337  ie = ia0(i+1)
338  call sort_column_ascending_order0(ie-is+1,ja0(is:ie),a0(is:ie))
339  enddo
340  !$omp end do
341  !$omp end parallel
342 
343  ia(1)=1
344  do i=1,n
345  ia(i+1) = ia(i)+counter(i)
346  end do
347 
348  prev = 0
349  nnz_new = 0
350  ia(1) = 1
351  do i=1,n
352  do k=ia0(i)+1,ia0(i+1)
353  if( ja0(k) == prev ) then
354  a(nnz_new) = a(nnz_new) + a0(k)
355  else
356  nnz_new = nnz_new + 1
357  ja(nnz_new) = ja0(k)
358  a(nnz_new) = a0(k)
359  end if
360  prev = ja0(k)
361  end do
362  ia(i+1) = nnz_new + 1
363  prev = 0
364  end do
365 
366  nnz = nnz_new
367 
368  deallocate(counter,counter_curr,ia0,ja0,a0)
369 
370  end subroutine
371 
372  subroutine check_csr(n, nnz, ia, ja, a)
373  integer(kind=kint), intent(in) :: n
374  integer(kind=kint), intent(in) :: nnz
375  integer(kind=kint), pointer, intent(in) :: ia(:)
376  integer(kind=kint), pointer, intent(in) :: ja(:)
377  real(kind=kreal), pointer, intent(in) :: a(:)
378 
379  integer :: i,k
380 
381  if( ia(n+1)-1 /= nnz ) then
382  write(*,*) "Error in check_csr(1): ia(n+1)-1 /= nnz"
384  end if
385 
386  do i=1,n
387  do k=ia(i),ia(i+1)-1
388  if( ja(k) <= 0 ) then
389  write(*,*) "Error in check_csr(2): ja(k) <= 0"
391  end if
392  if( ja(k) > n ) then
393  write(*,*) "Error in check_csr(3): ja(k) > n"
395  end if
396  if( k > ia(i) ) then
397  if( ja(k) <= ja(k-1) ) then
398  write(*,*) "Error in check_csr(4): ja(k) <= ja(k-1)"
400  end if
401  end if
402  end do
403  end do
404 
405  end subroutine
406 
407  subroutine sort_column_ascending_order(spMAT,myrank)
408  type (sparse_matrix), intent(inout) :: spmat
409  integer(kind=kint), intent(in) :: myrank
410 
411  integer(kind=kint) :: i, is, ie
412 
413  do i=2,spmat%N_loc+1
414  if(spmat%IRN(i)-spmat%IRN(i-1)>10000) &
415  & write(*,*) 'warning: n may be too large for set_mkl_set_prof0 for row ',i-1
416  enddo
417 
418  !!$omp parallel private(i, iS, iE)
419  !!$omp do
420  do i=1,spmat%N_loc
421  is = spmat%IRN(i)
422  ie = spmat%IRN(i+1)-1
423  call sort_column_ascending_order0(ie-is+1,spmat%JCN(is:ie),spmat%A(is:ie))
424  enddo
425  !!$omp end do
426  !!$omp end parallel
427  end subroutine sort_column_ascending_order
428 
429  subroutine sort_column_ascending_order0(n,ja,a)
430  integer(kind=kint), intent(in) :: n
431  integer(kind=kint), intent(inout) :: ja(:)
432  real(kind=kreal), intent(inout) :: a(:)
433 
434  integer(kind=kint) :: i, work(2,n)
435  real(kind=kreal) :: oa(n)
436 
437  do i=1,n
438  work(1,i) = ja(i)
439  work(2,i) = i
440  oa(i) = a(i)
441  enddo
442 
443  call myqsort(n,work)
444 
445  do i=1,n
446  ja(i) = work(1,i)
447  a(i) = oa(work(2,i))
448  enddo
449 
450  end subroutine sort_column_ascending_order0
451 
452  recursive subroutine myqsort(n,work)
453  integer(kind=kint), intent(in) :: n
454  integer(kind=kint), intent(inout) :: work(:,:)
455 
456  integer(kind=kint) :: i, key, work1(2,n), n_low, n_high, next, minidx, maxidx
457  logical :: sorted
458 
459  if(n<2) return
460  !return if work is already sorted
461  sorted = .true.
462  minidx = work(1,1)
463  maxidx = work(1,1)
464  do i=1,n-1
465  next = work(1,i+1)
466  if( work(1,i) > next ) sorted = .false.
467  if( next < minidx ) minidx = next
468  if( next > maxidx ) maxidx = next
469  enddo
470  if(sorted) return
471 
472  if( n<5 .or. maxidx-minidx < 2) then
473  call myinssort(n,work)
474  return
475  endif
476 
477  key = (minidx+maxidx)/2
478 
479  n_low=0
480  do i=1,n
481  if(work(1,i)<key) then
482  n_low=n_low+1
483  work1(1:2,n_low) = work(1:2,i)
484  endif
485  enddo
486 
487  n_high=0
488  do i=1,n
489  if(work(1,i)>=key) then
490  n_high=n_high+1
491  work1(1:2,n_low+n_high) = work(1:2,i)
492  endif
493  enddo
494 
495  if(n_low>0) call myqsort(n_low,work1(1:2,1:n_low))
496  if(n_high>0) call myqsort(n_high,work1(1:2,n_low+1:n_low+n_high))
497 
498  do i=1,n
499  work(1:2,i) = work1(1:2,i)
500  enddo
501 
502  end subroutine myqsort
503 
504  subroutine myinssort(n,work)
505  integer(kind=kint), intent(in) :: n
506  integer(kind=kint), intent(inout) :: work(:,:)
507 
508  integer(kind=kint) :: i,j,tmp(2)
509 
510  do i=2,n
511  do j=i,2,-1
512  if(work(1,j)<work(1,j-1)) then
513  tmp(1:2) = work(1:2,j)
514  work(1:2,j) = work(1:2,j-1)
515  work(1:2,j-1) = tmp(1:2)
516  endif
517  enddo
518  enddo
519 
520  end subroutine myinssort
521 
522 #endif
523 
524  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=kint) function hecmw_comm_get_size()
integer(kind=kint) function hecmw_comm_get_comm()
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine hecmw_abort(comm)
real(kind=kreal) function hecmw_wtime()
This module provides linear equation solver interface for Cluster Pardiso.
subroutine, public hecmw_clustermkl_wrapper(spMAT, phase_start, solution, istat)
subroutine hecmw_gatherv_real(sbuf, sc, rbuf, rcs, disp, root, comm)
subroutine hecmw_allgather_int_1(sval, rbuf, comm)
subroutine hecmw_gatherv_int(sbuf, sc, rbuf, rcs, disp, root, comm)
This module provides DOF based sparse matrix data structure (CSR and COO)
integer(kind=kint), parameter, public sparse_matrix_type_coo
integer(kind=kint), parameter, public sparse_matrix_type_csr
integer(kind=kint), parameter, public sparse_matrix_symtype_sym
subroutine, public sparse_matrix_gather_rhs(spMAT, rhs_all)
subroutine, public sparse_matrix_scatter_rhs(spMAT, rhs_all)
integer(kind=kint), parameter, public sparse_matrix_symtype_asym
integer(kind=kint), parameter, public sparse_matrix_symtype_spd