FrontISTR  5.2.0
Large-scale structural analysis program with finit element method
hecmw_precond_SSOR_22.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
6 !C
7 !C***
8 !C*** module hecmw_precond_SSOR_22
9 !C***
10 !C
12  use hecmw_util
18  !$ use omp_lib
19 
20  private
21 
25 
26  integer(kind=kint) :: N
27  real(kind=kreal), pointer :: d(:) => null()
28  real(kind=kreal), pointer :: al(:) => null()
29  real(kind=kreal), pointer :: au(:) => null()
30  integer(kind=kint), pointer :: indexL(:) => null()
31  integer(kind=kint), pointer :: indexU(:) => null()
32  integer(kind=kint), pointer :: itemL(:) => null()
33  integer(kind=kint), pointer :: itemU(:) => null()
34  real(kind=kreal), pointer :: alu(:) => null()
35 
36  integer(kind=kint) :: NContact = 0
37  real(kind=kreal), pointer :: cal(:) => null()
38  real(kind=kreal), pointer :: cau(:) => null()
39  integer(kind=kint), pointer :: indexCL(:) => null()
40  integer(kind=kint), pointer :: indexCU(:) => null()
41  integer(kind=kint), pointer :: itemCL(:) => null()
42  integer(kind=kint), pointer :: itemCU(:) => null()
43 
44  integer(kind=kint) :: NColor
45  integer(kind=kint), pointer :: COLORindex(:) => null()
46  integer(kind=kint), pointer :: perm(:) => null()
47  integer(kind=kint), pointer :: iperm(:) => null()
48 
49  logical, save :: isFirst = .true.
50 
51  logical, save :: INITIALIZED = .false.
52 
53 contains
54 
55  subroutine hecmw_precond_ssor_22_setup(hecMAT)
56  implicit none
57  type(hecmwst_matrix), intent(inout) :: hecmat
58  integer(kind=kint ) :: npl, npu, npcl, npcu
59  real (kind=kreal), allocatable :: cd(:)
60  integer(kind=kint ) :: ncolor_in
61  real (kind=kreal) :: sigma_diag
62  real (kind=kreal) :: alutmp(2,2), pw(2)
63  integer(kind=kint ) :: ii, i, j, k
64  integer(kind=kint ) :: nthreads = 1
65  integer(kind=kint ), allocatable :: perm_tmp(:)
66  !real (kind=kreal) :: t0
67 
68  !t0 = hecmw_Wtime()
69  !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
70 
71  if (initialized) then
72  if (hecmat%Iarray(98) == 1) then ! need symbolic and numerical setup
73  call hecmw_precond_ssor_22_clear(hecmat)
74  else if (hecmat%Iarray(97) == 1) then ! need numerical setup only
75  call hecmw_precond_ssor_22_clear(hecmat) ! TEMPORARY
76  else
77  return
78  endif
79  endif
80 
81  !$ nthreads = omp_get_max_threads()
82 
83  n = hecmat%N
84  ! N = hecMAT%NP
85  ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
86  sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
87  ncontact = hecmat%cmat%n_val
88 
89  if (ncontact.gt.0) then
90  call hecmw_cmat_lu( hecmat )
91  endif
92 
93  if (nthreads == 1) then
94  ncolor = 1
95  allocate(colorindex(0:1), perm(n), iperm(n))
96  colorindex(0) = 0
97  colorindex(1) = n
98  do i=1,n
99  perm(i) = i
100  iperm(i) = i
101  end do
102  else
103  allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
104  call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
105  hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
106  !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
107  call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
108  hecmat%indexU, hecmat%itemU, perm_tmp, &
109  ncolor_in, ncolor, colorindex, perm, iperm)
110  !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
111  deallocate(perm_tmp)
112 
113  !call write_debug_info
114  endif
115 
116  npl = hecmat%indexL(n)
117  npu = hecmat%indexU(n)
118  allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
119  call hecmw_matrix_reorder_profile(n, perm, iperm, &
120  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
121  indexl, indexu, iteml, itemu)
122  !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
123 
124  !call check_ordering
125 
126  allocate(d(4*n), al(4*npl), au(4*npu))
127  call hecmw_matrix_reorder_values(n, 2, perm, iperm, &
128  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
129  hecmat%AL, hecmat%AU, hecmat%D, &
130  indexl, indexu, iteml, itemu, al, au, d)
131  !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
132 
133  call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
134  call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
135 
136  if (ncontact.gt.0) then
137  npcl = hecmat%indexCL(n)
138  npcu = hecmat%indexCU(n)
139  allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
140  call hecmw_matrix_reorder_profile(n, perm, iperm, &
141  hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
142  indexcl, indexcu, itemcl, itemcu)
143 
144  allocate(cd(4*n), cal(4*npcl), cau(4*npcu))
145  call hecmw_matrix_reorder_values(n, 2, perm, iperm, &
146  hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
147  hecmat%CAL, hecmat%CAU, hecmat%D, &
148  indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
149  deallocate(cd)
150 
151  call hecmw_matrix_reorder_renum_item(n, perm, indexcl, itemcl)
152  call hecmw_matrix_reorder_renum_item(n, perm, indexcu, itemcu)
153  end if
154 
155  allocate(alu(4*n))
156  alu = 0.d0
157 
158  do ii= 1, 4*n
159  alu(ii) = d(ii)
160  enddo
161 
162  if (ncontact.gt.0) then
163  do k= 1, hecmat%cmat%n_val
164  if (hecmat%cmat%pair(k)%i.ne.hecmat%cmat%pair(k)%j) cycle
165  ii = iperm( hecmat%cmat%pair(k)%i )
166  alu(4*ii-3) = alu(4*ii-3) + hecmat%cmat%pair(k)%val(1, 1)
167  alu(4*ii-2) = alu(4*ii-2) + hecmat%cmat%pair(k)%val(1, 2)
168  alu(4*ii-1) = alu(4*ii-1) + hecmat%cmat%pair(k)%val(2, 1)
169  alu(4*ii-0) = alu(4*ii-0) + hecmat%cmat%pair(k)%val(2, 2)
170  enddo
171  endif
172 
173  !$omp parallel default(none),private(ii,ALUtmp,k,i,j,PW),shared(N,ALU,SIGMA_DIAG)
174  !$omp do
175  do ii= 1, n
176  alutmp(1,1)= alu(4*ii-3) * sigma_diag
177  alutmp(1,2)= alu(4*ii-2)
178  alutmp(2,1)= alu(4*ii-1)
179  alutmp(2,2)= alu(4*ii-0) * sigma_diag
180  do k= 1, 2
181  alutmp(k,k)= 1.d0/alutmp(k,k)
182  do i= k+1, 2
183  alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
184  do j= k+1, 2
185  pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
186  enddo
187  do j= k+1, 2
188  alutmp(i,j)= pw(j)
189  enddo
190  enddo
191  enddo
192  alu(4*ii-3)= alutmp(1,1)
193  alu(4*ii-2)= alutmp(1,2)
194  alu(4*ii-1)= alutmp(2,1)
195  alu(4*ii-0)= alutmp(2,2)
196  enddo
197  !$omp end do
198  !$omp end parallel
199 
200  isfirst = .true.
201 
202  initialized = .true.
203  hecmat%Iarray(98) = 0 ! symbolic setup done
204  hecmat%Iarray(97) = 0 ! numerical setup done
205 
206  !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
207 
208  end subroutine hecmw_precond_ssor_22_setup
209 
211  use hecmw_tuning_fx
212  implicit none
213  real(kind=kreal), intent(inout) :: zp(:)
214  integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
215  real(kind=kreal) :: sw(2), x(2)
216 
217  ! added for turning >>>
218  integer(kind=kint), parameter :: numofblockperthread = 100
219  integer(kind=kint), save :: numofthread = 1, numofblock
220  integer(kind=kint), save, allocatable :: ictoblockindex(:)
221  integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
222  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
223  integer(kind=kint) :: blockindex, elementcount, numofelement, ii
224  real(kind=kreal) :: numofelementperblock
225  integer(kind=kint) :: my_rank
226 
227  if (isfirst) then
228  !$ numOfThread = omp_get_max_threads()
229  numofblock = numofthread * numofblockperthread
230  if (allocated(ictoblockindex)) deallocate(ictoblockindex)
231  if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
232  allocate (ictoblockindex(0:ncolor), &
233  blockindextocolorindex(0:numofblock + ncolor))
234  numofelement = n + indexl(n) + indexu(n)
235  numofelementperblock = dble(numofelement) / numofblock
236  blockindex = 0
237  ictoblockindex = -1
238  ictoblockindex(0) = 0
239  blockindextocolorindex = -1
240  blockindextocolorindex(0) = 0
241  my_rank = hecmw_comm_get_rank()
242  ! write(9000+my_rank,*) &
243  ! '# numOfElementPerBlock =', numOfElementPerBlock
244  ! write(9000+my_rank,*) &
245  ! '# ic, blockIndex, colorIndex, elementCount'
246  do ic = 1, ncolor
247  elementcount = 0
248  ii = 1
249  do i = colorindex(ic-1)+1, colorindex(ic)
250  elementcount = elementcount + 1
251  elementcount = elementcount + (indexl(i) - indexl(i-1))
252  elementcount = elementcount + (indexu(i) - indexu(i-1))
253  if (elementcount > ii * numofelementperblock &
254  .or. i == colorindex(ic)) then
255  ii = ii + 1
256  blockindex = blockindex + 1
257  blockindextocolorindex(blockindex) = i
258  ! write(9000+my_rank,*) ic, blockIndex, &
259  ! blockIndexToColorIndex(blockIndex), elementCount
260  endif
261  enddo
262  ictoblockindex(ic) = blockindex
263  enddo
264  numofblock = blockindex
265 
267  sectorcachesize0, sectorcachesize1 )
268 
269  isfirst = .false.
270  endif
271  ! <<< added for turning
272 
273  !call start_collection("loopInPrecond33")
274 
275  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
276  !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
277 
278  !$omp parallel default(none) &
279  !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
280  !$omp& NContact,indexCL,itemCL,indexCU,itemCU,CAL,CAU,&
281  !$omp& ZP,icToBlockIndex,blockIndexToColorIndex) &
282  !$omp&private(SW,X,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex)
283 
284  !C-- FORWARD
285  do ic=1,ncolor
286  !$omp do schedule (static, 1)
287  do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
288  do i = blockindextocolorindex(blockindex-1)+1, &
289  blockindextocolorindex(blockindex)
290  iold = perm(i)
291  sw(1)= zp(2*iold-1)
292  sw(2)= zp(2*iold-0)
293  isl= indexl(i-1)+1
294  iel= indexl(i)
295  do j= isl, iel
296  k= iteml(j)
297  x(1)= zp(2*k-1)
298  x(2)= zp(2*k-0)
299  sw(1)= sw(1) - al(4*j-3)*x(1) - al(4*j-2)*x(2)
300  sw(2)= sw(2) - al(4*j-1)*x(1) - al(4*j-0)*x(2)
301  enddo ! j
302 
303  if (ncontact.ne.0) then
304  isl= indexcl(i-1)+1
305  iel= indexcl(i)
306  do j= isl, iel
307  k= itemcl(j)
308  x(1)= zp(2*k-1)
309  x(2)= zp(2*k-0)
310  sw(1)= sw(1) - cal(4*j-3)*x(1) - cal(4*j-2)*x(2)
311  sw(2)= sw(2) - cal(4*j-1)*x(1) - cal(4*j-0)*x(2)
312  enddo ! j
313  endif
314 
315  x = sw
316  x(2)= x(2) - alu(4*i-1)*x(1)
317  x(2)= alu(4*i )* x(2)
318  x(1)= alu(4*i-3)*( x(1) - alu(4*i-2)*x(2))
319  zp(2*iold-1)= x(1)
320  zp(2*iold-0)= x(2)
321  enddo ! i
322  enddo ! blockIndex
323  !$omp end do
324  enddo ! ic
325 
326  !C-- BACKWARD
327  do ic=ncolor, 1, -1
328  !$omp do schedule (static, 1)
329  do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
330  do i = blockindextocolorindex(blockindex), &
331  blockindextocolorindex(blockindex-1)+1, -1
332  ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
333  ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
334  ! blockIndexToColorIndex(blockIndex)
335  ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
336  sw= 0.d0
337  isu= indexu(i-1) + 1
338  ieu= indexu(i)
339  do j= ieu, isu, -1
340  k= itemu(j)
341  x(1)= zp(2*k-1)
342  x(2)= zp(2*k-0)
343  sw(1)= sw(1) + au(4*j-3)*x(1) + au(4*j-2)*x(2)
344  sw(2)= sw(2) + au(4*j-1)*x(1) + au(4*j-0)*x(2)
345  enddo ! j
346 
347  if (ncontact.gt.0) then
348  isu= indexcu(i-1) + 1
349  ieu= indexcu(i)
350  do j= ieu, isu, -1
351  k= itemcu(j)
352  x(1)= zp(2*k-1)
353  x(2)= zp(2*k-0)
354  sw(1)= sw(1) + cau(4*j-3)*x(1) + cau(4*j-2)*x(2)
355  sw(2)= sw(2) + cau(4*j-1)*x(1) + cau(4*j-0)*x(2)
356  enddo ! j
357  endif
358 
359  x = sw
360  x(2)= x(2) - alu(4*i-1)*x(1)
361 
362 
363  x(2)= alu(4*i )* x(2)
364  x(1)= alu(4*i-3)*( x(1) - alu(4*i-2)*x(2) )
365 
366  iold = perm(i)
367  zp(2*iold-1)= zp(2*iold-1) - x(1)
368  zp(2*iold )= zp(2*iold ) - x(2)
369  enddo ! i
370  enddo ! blockIndex
371  !$omp end do
372  enddo ! ic
373  !$omp end parallel
374 
375  !OCL END_CACHE_SUBSECTOR
376  !OCL END_CACHE_SECTOR_SIZE
377 
378  !call stop_collection("loopInPrecond33")
379 
380  end subroutine hecmw_precond_ssor_22_apply
381 
382  subroutine hecmw_precond_ssor_22_clear(hecMAT)
383  implicit none
384  type(hecmwst_matrix), intent(inout) :: hecmat
385  integer(kind=kint ) :: nthreads = 1
386  !$ nthreads = omp_get_max_threads()
387  if (associated(colorindex)) deallocate(colorindex)
388  if (associated(perm)) deallocate(perm)
389  if (associated(iperm)) deallocate(iperm)
390  if (associated(alu)) deallocate(alu)
391  if (nthreads >= 1) then
392  if (associated(d)) deallocate(d)
393  if (associated(al)) deallocate(al)
394  if (associated(au)) deallocate(au)
395  if (associated(indexl)) deallocate(indexl)
396  if (associated(indexu)) deallocate(indexu)
397  if (associated(iteml)) deallocate(iteml)
398  if (associated(itemu)) deallocate(itemu)
399  if (ncontact.ne.0) then
400  if (associated(cal)) deallocate(cal)
401  if (associated(cau)) deallocate(cau)
402  if (associated(indexcl)) deallocate(indexcl)
403  if (associated(indexcu)) deallocate(indexcu)
404  if (associated(itemcl)) deallocate(itemcl)
405  if (associated(itemcu)) deallocate(itemcu)
406  end if
407  end if
408  nullify(colorindex)
409  nullify(perm)
410  nullify(iperm)
411  nullify(alu)
412  nullify(d)
413  nullify(al)
414  nullify(au)
415  nullify(indexl)
416  nullify(indexu)
417  nullify(iteml)
418  nullify(itemu)
419  if (ncontact.ne.0) then
420  nullify(cal)
421  nullify(cau)
422  nullify(indexcl)
423  nullify(indexcu)
424  nullify(itemcl)
425  nullify(itemcu)
426  call hecmw_cmat_lu_free( hecmat )
427  endif
428  ncontact = 0
429  initialized = .false.
430  end subroutine hecmw_precond_ssor_22_clear
431 
432  subroutine write_debug_info
433  implicit none
434  integer(kind=kint) :: my_rank, ic, in
435  my_rank = hecmw_comm_get_rank()
436  !--------------------> debug: shizawa
437  if (my_rank.eq.0) then
438  write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
439  endif
440  write(19000+my_rank,'(a)') '#NCOLORTot'
441  write(19000+my_rank,*) ncolor
442  write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
443  do ic=1,ncolor
444  write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
445  enddo ! ic
446  write(29000+my_rank,'(a)') '#n_node'
447  write(29000+my_rank,*) n
448  write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
449  do in=1,n
450  write(29000+my_rank,*) in, iperm(in), perm(in)
451  if (perm(iperm(in)) .ne. in) then
452  write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
453  endif
454  enddo
455  end subroutine write_debug_info
456 
457  subroutine check_ordering
458  implicit none
459  integer(kind=kint) :: ic, i, j, k
460  integer(kind=kint), allocatable :: iicolor(:)
461  ! check color dependence of neighbouring nodes
462  if (ncolor.gt.1) then
463  allocate(iicolor(n))
464  do ic=1,ncolor
465  do i= colorindex(ic-1)+1, colorindex(ic)
466  iicolor(i) = ic
467  enddo ! i
468  enddo ! ic
469  ! FORWARD: L-part
470  do ic=1,ncolor
471  do i= colorindex(ic-1)+1, colorindex(ic)
472  do j= indexl(i-1)+1, indexl(i)
473  k= iteml(j)
474  if (iicolor(i).eq.iicolor(k)) then
475  write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
476  endif
477  enddo ! j
478  enddo ! i
479  enddo ! ic
480  ! BACKWARD: U-part
481  do ic=ncolor, 1, -1
482  do i= colorindex(ic), colorindex(ic-1)+1, -1
483  do j= indexu(i-1)+1, indexu(i)
484  k= itemu(j)
485  if (iicolor(i).eq.iicolor(k)) then
486  write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
487  endif
488  enddo ! j
489  enddo ! i
490  enddo ! ic
491  deallocate(iicolor)
492  endif ! if (NColor.gt.1)
493  !--------------------< debug: shizawa
494  end subroutine check_ordering
495 
496 end module hecmw_precond_ssor_22
subroutine, public hecmw_cmat_lu(hecMAT)
subroutine, public hecmw_cmat_lu_free(hecMAT)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
subroutine, public hecmw_precond_ssor_22_apply(ZP)
subroutine, public hecmw_precond_ssor_22_setup(hecMAT)
subroutine, public hecmw_precond_ssor_22_clear(hecMAT)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)