FrontISTR  5.2.0
Large-scale structural analysis program with finit element method
hecmw_precond_SSOR_44.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
6 !C
7 !C***
8 !C*** module hecmw_precond_SSOR_44
9 !C***
10 !C
12  use hecmw_util
18  !$ use omp_lib
19 
20  private
21 
25 
26  integer(kind=kint) :: N
27  real(kind=kreal), pointer :: d(:) => null()
28  real(kind=kreal), pointer :: al(:) => null()
29  real(kind=kreal), pointer :: au(:) => null()
30  integer(kind=kint), pointer :: indexL(:) => null()
31  integer(kind=kint), pointer :: indexU(:) => null()
32  integer(kind=kint), pointer :: itemL(:) => null()
33  integer(kind=kint), pointer :: itemU(:) => null()
34  real(kind=kreal), pointer :: alu(:) => null()
35 
36  integer(kind=kint) :: NContact = 0
37  real(kind=kreal), pointer :: cal(:) => null()
38  real(kind=kreal), pointer :: cau(:) => null()
39  integer(kind=kint), pointer :: indexCL(:) => null()
40  integer(kind=kint), pointer :: indexCU(:) => null()
41  integer(kind=kint), pointer :: itemCL(:) => null()
42  integer(kind=kint), pointer :: itemCU(:) => null()
43 
44  integer(kind=kint) :: NColor
45  integer(kind=kint), pointer :: COLORindex(:) => null()
46  integer(kind=kint), pointer :: perm(:) => null()
47  integer(kind=kint), pointer :: iperm(:) => null()
48 
49  logical, save :: isFirst = .true.
50 
51  logical, save :: INITIALIZED = .false.
52 
53 contains
54 
55  subroutine hecmw_precond_ssor_44_setup(hecMAT)
56  implicit none
57  type(hecmwst_matrix), intent(inout) :: hecmat
58  integer(kind=kint ) :: npl, npu, npcl, npcu
59  real (kind=kreal), allocatable :: cd(:)
60  integer(kind=kint ) :: ncolor_in
61  real (kind=kreal) :: sigma_diag
62  real (kind=kreal) :: alutmp(4,4), pw(4)
63  integer(kind=kint ) :: ii, i, j, k
64  integer(kind=kint ) :: nthreads = 1
65  integer(kind=kint ), allocatable :: perm_tmp(:)
66  !real (kind=kreal) :: t0
67 
68  !t0 = hecmw_Wtime()
69  !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
70 
71  if (initialized) then
72  if (hecmat%Iarray(98) == 1) then ! need symbolic and numerical setup
73  call hecmw_precond_ssor_44_clear(hecmat)
74  else if (hecmat%Iarray(97) == 1) then ! need numerical setup only
75  call hecmw_precond_ssor_44_clear(hecmat) ! TEMPORARY
76  else
77  return
78  endif
79  endif
80 
81  !$ nthreads = omp_get_max_threads()
82 
83  n = hecmat%N
84  ! N = hecMAT%NP
85  ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
86  sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
87  ncontact = hecmat%cmat%n_val
88 
89  if (ncontact.gt.0) then
90  call hecmw_cmat_lu( hecmat )
91  endif
92 
93  if (nthreads == 1) then
94  ncolor = 1
95  allocate(colorindex(0:1), perm(n), iperm(n))
96  colorindex(0) = 0
97  colorindex(1) = n
98  do i=1,n
99  perm(i) = i
100  iperm(i) = i
101  end do
102  else
103  allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
104  call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
105  hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
106  !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
107  call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
108  hecmat%indexU, hecmat%itemU, perm_tmp, &
109  ncolor_in, ncolor, colorindex, perm, iperm)
110  !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
111  deallocate(perm_tmp)
112 
113  !call write_debug_info
114  endif
115 
116  npl = hecmat%indexL(n)
117  npu = hecmat%indexU(n)
118  allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
119  call hecmw_matrix_reorder_profile(n, perm, iperm, &
120  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
121  indexl, indexu, iteml, itemu)
122  !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
123 
124  !call check_ordering
125 
126  allocate(d(16*n), al(16*npl), au(16*npu))
127  call hecmw_matrix_reorder_values(n, 4, perm, iperm, &
128  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
129  hecmat%AL, hecmat%AU, hecmat%D, &
130  indexl, indexu, iteml, itemu, al, au, d)
131  !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
132 
133  call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
134  call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
135 
136  if (ncontact.gt.0) then
137  npcl = hecmat%indexCL(n)
138  npcu = hecmat%indexCU(n)
139  allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
140  call hecmw_matrix_reorder_profile(n, perm, iperm, &
141  hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
142  indexcl, indexcu, itemcl, itemcu)
143 
144  allocate(cd(16*n), cal(16*npcl), cau(16*npcu))
145  call hecmw_matrix_reorder_values(n, 4, perm, iperm, &
146  hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
147  hecmat%CAL, hecmat%CAU, hecmat%D, &
148  indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
149  deallocate(cd)
150 
151  call hecmw_matrix_reorder_renum_item(n, perm, indexcl, itemcl)
152  call hecmw_matrix_reorder_renum_item(n, perm, indexcu, itemcu)
153  end if
154 
155  allocate(alu(16*n))
156  alu = 0.d0
157 
158  do ii= 1, 16*n
159  alu(ii) = d(ii)
160  enddo
161 
162  ! if (NContact.gt.0) then
163  ! do k= 1, hecMAT%cmat%n_val
164  ! if (hecMAT%cmat%pair(k)%i.ne.hecMAT%cmat%pair(k)%j) cycle
165  ! ii = iperm( hecMAT%cmat%pair(k)%i )
166  ! ALU(16*ii-15) = ALU(16*ii-15) + hecMAT%cmat%pair(k)%val(1, 1)
167  ! ALU(16*ii-14) = ALU(16*ii-14) + hecMAT%cmat%pair(k)%val(1, 2)
168  ! ALU(16*ii-13) = ALU(16*ii-13) + hecMAT%cmat%pair(k)%val(1, 3)
169  ! ALU(16*ii-12) = ALU(16*ii-12) + hecMAT%cmat%pair(k)%val(1, 4)
170  ! ALU(16*ii-11) = ALU(16*ii-11) + hecMAT%cmat%pair(k)%val(2, 1)
171  ! ALU(16*ii-10) = ALU(16*ii-10) + hecMAT%cmat%pair(k)%val(2, 2)
172  ! ALU(16*ii- 9) = ALU(16*ii- 9) + hecMAT%cmat%pair(k)%val(2, 3)
173  ! ALU(16*ii- 8) = ALU(16*ii- 8) + hecMAT%cmat%pair(k)%val(2, 4)
174  ! ALU(16*ii- 7) = ALU(16*ii- 7) + hecMAT%cmat%pair(k)%val(3, 1)
175  ! ALU(16*ii- 6) = ALU(16*ii- 6) + hecMAT%cmat%pair(k)%val(3, 2)
176  ! ALU(16*ii- 5) = ALU(16*ii- 5) + hecMAT%cmat%pair(k)%val(3, 3)
177  ! ALU(16*ii- 4) = ALU(16*ii- 4) + hecMAT%cmat%pair(k)%val(3, 4)
178  ! ALU(16*ii- 3) = ALU(16*ii- 3) + hecMAT%cmat%pair(k)%val(4, 1)
179  ! ALU(16*ii- 2) = ALU(16*ii- 2) + hecMAT%cmat%pair(k)%val(4, 2)
180  ! ALU(16*ii- 1) = ALU(16*ii- 1) + hecMAT%cmat%pair(k)%val(4, 3)
181  ! ALU(16*ii ) = ALU(16*ii ) + hecMAT%cmat%pair(k)%val(4, 4)
182  ! enddo
183  ! endif
184 
185  do ii= 1, n
186  alutmp(1,1)= alu(16*ii-15) * sigma_diag
187  alutmp(1,2)= alu(16*ii-14)
188  alutmp(1,3)= alu(16*ii-13)
189  alutmp(1,4)= alu(16*ii-12)
190  alutmp(2,1)= alu(16*ii-11)
191  alutmp(2,2)= alu(16*ii-10) * sigma_diag
192  alutmp(2,3)= alu(16*ii- 9)
193  alutmp(2,4)= alu(16*ii- 8)
194  alutmp(3,1)= alu(16*ii- 7)
195  alutmp(3,2)= alu(16*ii- 6)
196  alutmp(3,3)= alu(16*ii- 5) * sigma_diag
197  alutmp(3,4)= alu(16*ii- 4)
198  alutmp(4,1)= alu(16*ii- 3)
199  alutmp(4,2)= alu(16*ii- 2)
200  alutmp(4,3)= alu(16*ii- 1)
201  alutmp(4,4)= alu(16*ii ) * sigma_diag
202 
203  do k= 1, 4
204  alutmp(k,k)= 1.d0/alutmp(k,k)
205  do i= k+1, 4
206  alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
207  do j= k+1, 4
208  pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
209  enddo
210  do j= k+1, 4
211  alutmp(i,j)= pw(j)
212  enddo
213  enddo
214  enddo
215  alu(16*ii-15)= alutmp(1,1)
216  alu(16*ii-14)= alutmp(1,2)
217  alu(16*ii-13)= alutmp(1,3)
218  alu(16*ii-12)= alutmp(1,4)
219  alu(16*ii-11)= alutmp(2,1)
220  alu(16*ii-10)= alutmp(2,2)
221  alu(16*ii- 9)= alutmp(2,3)
222  alu(16*ii- 8)= alutmp(2,4)
223  alu(16*ii- 7)= alutmp(3,1)
224  alu(16*ii- 6)= alutmp(3,2)
225  alu(16*ii- 5)= alutmp(3,3)
226  alu(16*ii- 4)= alutmp(3,4)
227  alu(16*ii- 3)= alutmp(4,1)
228  alu(16*ii- 2)= alutmp(4,2)
229  alu(16*ii- 1)= alutmp(4,3)
230  alu(16*ii )= alutmp(4,4)
231 
232  enddo
233 
234  isfirst = .true.
235 
236  initialized = .true.
237  hecmat%Iarray(98) = 0 ! symbolic setup done
238  hecmat%Iarray(97) = 0 ! numerical setup done
239 
240  !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
241 
242  end subroutine hecmw_precond_ssor_44_setup
243 
245  use hecmw_tuning_fx
246  implicit none
247  real(kind=kreal), intent(inout) :: zp(:)
248  integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
249  real(kind=kreal) :: sw1, sw2, sw3, sw4, x1, x2, x3, x4
250 
251  ! added for turning >>>
252  integer(kind=kint), parameter :: numofblockperthread = 100
253  integer(kind=kint), save :: numofthread = 1, numofblock
254  integer(kind=kint), save, allocatable :: ictoblockindex(:)
255  integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
256  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
257  integer(kind=kint) :: blockindex, elementcount, numofelement, ii
258  real(kind=kreal) :: numofelementperblock
259  integer(kind=kint) :: my_rank
260 
261  if (isfirst) then
262  !$ numOfThread = omp_get_max_threads()
263  numofblock = numofthread * numofblockperthread
264  if (allocated(ictoblockindex)) deallocate(ictoblockindex)
265  if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
266  allocate (ictoblockindex(0:ncolor), &
267  blockindextocolorindex(0:numofblock + ncolor))
268  numofelement = n + indexl(n) + indexu(n)
269  numofelementperblock = dble(numofelement) / numofblock
270  blockindex = 0
271  ictoblockindex = -1
272  ictoblockindex(0) = 0
273  blockindextocolorindex = -1
274  blockindextocolorindex(0) = 0
275  my_rank = hecmw_comm_get_rank()
276  ! write(9000+my_rank,*) &
277  ! '# numOfElementPerBlock =', numOfElementPerBlock
278  ! write(9000+my_rank,*) &
279  ! '# ic, blockIndex, colorIndex, elementCount'
280  do ic = 1, ncolor
281  elementcount = 0
282  ii = 1
283  do i = colorindex(ic-1)+1, colorindex(ic)
284  elementcount = elementcount + 1
285  elementcount = elementcount + (indexl(i) - indexl(i-1))
286  elementcount = elementcount + (indexu(i) - indexu(i-1))
287  if (elementcount > ii * numofelementperblock &
288  .or. i == colorindex(ic)) then
289  ii = ii + 1
290  blockindex = blockindex + 1
291  blockindextocolorindex(blockindex) = i
292  ! write(9000+my_rank,*) ic, blockIndex, &
293  ! blockIndexToColorIndex(blockIndex), elementCount
294  endif
295  enddo
296  ictoblockindex(ic) = blockindex
297  enddo
298  numofblock = blockindex
299 
301  sectorcachesize0, sectorcachesize1 )
302 
303  isfirst = .false.
304  endif
305  ! <<< added for turning
306 
307  !call start_collection("loopInPrecond44")
308 
309  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
310  !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
311 
312  !$omp parallel default(none) &
313  !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
314  !$omp& NContact,indexCL,itemCL,indexCU,itemCU,CAL,CAU,&
315  !$omp& ZP,icToBlockIndex,blockIndexToColorIndex) &
316  !$omp&private(SW1,SW2,SW3,SW4,X1,X2,X3,X4,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex)
317 
318  !C-- FORWARD
319  do ic=1,ncolor
320  !$omp do schedule (static, 1)
321  do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
322  do i = blockindextocolorindex(blockindex-1)+1, &
323  blockindextocolorindex(blockindex)
324  ! do i = startPos(threadNum, ic), endPos(threadNum, ic)
325  iold = perm(i)
326  sw1= zp(4*iold-3)
327  sw2= zp(4*iold-2)
328  sw3= zp(4*iold-1)
329  sw4= zp(4*iold )
330  isl= indexl(i-1)+1
331  iel= indexl(i)
332  do j= isl, iel
333  !k= perm(itemL(j))
334  k= iteml(j)
335  x1= zp(4*k-3)
336  x2= zp(4*k-2)
337  x3= zp(4*k-1)
338  x4= zp(4*k )
339  sw1= sw1 - al(16*j-15)*x1 - al(16*j-14)*x2 - al(16*j-13)*x3 - al(16*j-12)*x4
340  sw2= sw2 - al(16*j-11)*x1 - al(16*j-10)*x2 - al(16*j- 9)*x3 - al(16*j- 8)*x4
341  sw3= sw3 - al(16*j- 7)*x1 - al(16*j- 6)*x2 - al(16*j- 5)*x3 - al(16*j- 4)*x4
342  sw4= sw4 - al(16*j- 3)*x1 - al(16*j- 2)*x2 - al(16*j- 1)*x3 - al(16*j- 0)*x4
343  enddo ! j
344 
345  if (ncontact.ne.0) then
346  isl= indexcl(i-1)+1
347  iel= indexcl(i)
348  do j= isl, iel
349  !k= perm(itemCL(j))
350  k= itemcl(j)
351  x1= zp(4*k-3)
352  x2= zp(4*k-2)
353  x3= zp(4*k-1)
354  x4= zp(4*k )
355  sw1= sw1 - cal(16*j-15)*x1 - cal(16*j-14)*x2 - cal(16*j-13)*x3 - cal(16*j-12)*x4
356  sw2= sw2 - cal(16*j-11)*x1 - cal(16*j-10)*x2 - cal(16*j- 9)*x3 - cal(16*j- 8)*x4
357  sw3= sw3 - cal(16*j- 7)*x1 - cal(16*j- 6)*x2 - cal(16*j- 5)*x3 - cal(16*j- 4)*x4
358  sw4= sw4 - cal(16*j- 3)*x1 - cal(16*j- 2)*x2 - cal(16*j- 1)*x3 - cal(16*j- 0)*x4
359  enddo ! j
360  endif
361 
362  x1= sw1
363  x2= sw2
364  x3= sw3
365  x4= sw4
366  x2= x2 - alu(16*i-11)*x1
367  x3= x3 - alu(16*i- 7)*x1 - alu(16*i- 6)*x2
368  x4= x4 - alu(16*i- 3)*x1 - alu(16*i- 2)*x2 - alu(16*i- 1)*x3
369 
370  x4= alu(16*i )* x4
371  x3= alu(16*i- 5)*( x3 - alu(16*i- 4)*x4 )
372  x2= alu(16*i-10)*( x2 - alu(16*i- 8)*x4 - alu(16*i- 9)*x3 )
373  x1= alu(16*i-15)*( x1 - alu(16*i-12)*x4 - alu(16*i-13)*x3 - alu(16*i-14)*x2 )
374 
375  zp(4*iold-3)= x1
376  zp(4*iold-2)= x2
377  zp(4*iold-1)= x3
378  zp(4*iold )= x4
379  enddo ! i
380  enddo ! blockIndex
381  !$omp end do
382  enddo ! ic
383 
384  !C-- BACKWARD
385  do ic=ncolor, 1, -1
386  !$omp do schedule (static, 1)
387  do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
388  do i = blockindextocolorindex(blockindex), &
389  blockindextocolorindex(blockindex-1)+1, -1
390  ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
391  ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
392  ! blockIndexToColorIndex(blockIndex)
393  ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
394  sw1= 0.d0
395  sw2= 0.d0
396  sw3= 0.d0
397  sw4= 0.d0
398  isu= indexu(i-1) + 1
399  ieu= indexu(i)
400  do j= ieu, isu, -1
401  !k= perm(itemU(j))
402  k= itemu(j)
403  x1= zp(4*k-3)
404  x2= zp(4*k-2)
405  x3= zp(4*k-1)
406  x4= zp(4*k )
407  sw1= sw1 + au(16*j-15)*x1 + au(16*j-14)*x2 + au(16*j-13)*x3 + au(16*j-12)*x4
408  sw2= sw2 + au(16*j-11)*x1 + au(16*j-10)*x2 + au(16*j- 9)*x3 + au(16*j- 8)*x4
409  sw3= sw3 + au(16*j- 7)*x1 + au(16*j- 6)*x2 + au(16*j- 5)*x3 + au(16*j- 4)*x4
410  sw4= sw4 + au(16*j- 3)*x1 + au(16*j- 2)*x2 + au(16*j- 1)*x3 + au(16*j- 0)*x4
411 
412  enddo ! j
413 
414  if (ncontact.gt.0) then
415  isu= indexcu(i-1) + 1
416  ieu= indexcu(i)
417  do j= ieu, isu, -1
418  !k= perm(itemCU(j))
419  k= itemcu(j)
420  x1= zp(4*k-3)
421  x2= zp(4*k-2)
422  x3= zp(4*k-1)
423  x4= zp(4*k )
424  sw1= sw1 + cau(16*j-15)*x1 + cau(16*j-14)*x2 + cau(16*j-13)*x3 + cau(16*j-12)*x4
425  sw2= sw2 + cau(16*j-11)*x1 + cau(16*j-10)*x2 + cau(16*j- 9)*x3 + cau(16*j- 8)*x4
426  sw3= sw3 + cau(16*j- 7)*x1 + cau(16*j- 6)*x2 + cau(16*j- 5)*x3 + cau(16*j- 4)*x4
427  sw4= sw4 + cau(16*j- 3)*x1 + cau(16*j- 2)*x2 + cau(16*j- 1)*x3 + cau(16*j- 0)*x4
428 
429  enddo ! j
430  endif
431 
432  x1= sw1
433  x2= sw2
434  x3= sw3
435  x4= sw4
436  x2= x2 - alu(16*i-11)*x1
437  x3= x3 - alu(16*i- 7)*x1 - alu(16*i- 6)*x2
438  x4= x4 - alu(16*i- 3)*x1 - alu(16*i- 2)*x2 - alu(16*i- 1)*x3
439  x4= alu(16*i )* x4
440  x3= alu(16*i- 5)*( x3 - alu(16*i- 4)*x4 )
441  x2= alu(16*i-10)*( x2 - alu(16*i- 8)*x4 - alu(16*i- 9)*x3 )
442  x1= alu(16*i-15)*( x1 - alu(16*i-12)*x4 - alu(16*i-13)*x3 - alu(16*i-14)*x2 )
443  iold = perm(i)
444  zp(4*iold-3)= zp(4*iold-3) - x1
445  zp(4*iold-2)= zp(4*iold-2) - x2
446  zp(4*iold-1)= zp(4*iold-1) - x3
447  zp(4*iold )= zp(4*iold ) - x4
448  enddo ! i
449  enddo ! blockIndex
450  !$omp end do
451  enddo ! ic
452  !$omp end parallel
453 
454  !OCL END_CACHE_SUBSECTOR
455  !OCL END_CACHE_SECTOR_SIZE
456 
457  !call stop_collection("loopInPrecond44")
458 
459  end subroutine hecmw_precond_ssor_44_apply
460 
461  subroutine hecmw_precond_ssor_44_clear(hecMAT)
462  implicit none
463  type(hecmwst_matrix), intent(inout) :: hecmat
464  integer(kind=kint ) :: nthreads = 1
465  !$ nthreads = omp_get_max_threads()
466  if (associated(colorindex)) deallocate(colorindex)
467  if (associated(perm)) deallocate(perm)
468  if (associated(iperm)) deallocate(iperm)
469  if (associated(alu)) deallocate(alu)
470  if (nthreads >= 1) then
471  if (associated(d)) deallocate(d)
472  if (associated(al)) deallocate(al)
473  if (associated(au)) deallocate(au)
474  if (associated(indexl)) deallocate(indexl)
475  if (associated(indexu)) deallocate(indexu)
476  if (associated(iteml)) deallocate(iteml)
477  if (associated(itemu)) deallocate(itemu)
478  if (ncontact.ne.0) then
479  if (associated(cal)) deallocate(cal)
480  if (associated(cau)) deallocate(cau)
481  if (associated(indexcl)) deallocate(indexcl)
482  if (associated(indexcu)) deallocate(indexcu)
483  if (associated(itemcl)) deallocate(itemcl)
484  if (associated(itemcu)) deallocate(itemcu)
485  end if
486  end if
487  nullify(colorindex)
488  nullify(perm)
489  nullify(iperm)
490  nullify(alu)
491  nullify(d)
492  nullify(al)
493  nullify(au)
494  nullify(indexl)
495  nullify(indexu)
496  nullify(iteml)
497  nullify(itemu)
498  if (ncontact.ne.0) then
499  nullify(cal)
500  nullify(cau)
501  nullify(indexcl)
502  nullify(indexcu)
503  nullify(itemcl)
504  nullify(itemcu)
505  call hecmw_cmat_lu_free( hecmat )
506  endif
507  ncontact = 0
508  end subroutine hecmw_precond_ssor_44_clear
509 
510  subroutine write_debug_info
511  implicit none
512  integer(kind=kint) :: my_rank, ic, in
513  my_rank = hecmw_comm_get_rank()
514  !--------------------> debug: shizawa
515  if (my_rank.eq.0) then
516  write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
517  endif
518  write(19000+my_rank,'(a)') '#NCOLORTot'
519  write(19000+my_rank,*) ncolor
520  write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
521  do ic=1,ncolor
522  write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
523  enddo ! ic
524  write(29000+my_rank,'(a)') '#n_node'
525  write(29000+my_rank,*) n
526  write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
527  do in=1,n
528  write(29000+my_rank,*) in, iperm(in), perm(in)
529  if (perm(iperm(in)) .ne. in) then
530  write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
531  endif
532  enddo
533  end subroutine write_debug_info
534 
535  subroutine check_ordering
536  implicit none
537  integer(kind=kint) :: ic, i, j, k
538  integer(kind=kint), allocatable :: iicolor(:)
539  ! check color dependence of neighbouring nodes
540  if (ncolor.gt.1) then
541  allocate(iicolor(n))
542  do ic=1,ncolor
543  do i= colorindex(ic-1)+1, colorindex(ic)
544  iicolor(i) = ic
545  enddo ! i
546  enddo ! ic
547  ! FORWARD: L-part
548  do ic=1,ncolor
549  do i= colorindex(ic-1)+1, colorindex(ic)
550  do j= indexl(i-1)+1, indexl(i)
551  k= iteml(j)
552  if (iicolor(i).eq.iicolor(k)) then
553  write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
554  endif
555  enddo ! j
556  enddo ! i
557  enddo ! ic
558  ! BACKWARD: U-part
559  do ic=ncolor, 1, -1
560  do i= colorindex(ic), colorindex(ic-1)+1, -1
561  do j= indexu(i-1)+1, indexu(i)
562  k= itemu(j)
563  if (iicolor(i).eq.iicolor(k)) then
564  write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
565  endif
566  enddo ! j
567  enddo ! i
568  enddo ! ic
569  deallocate(iicolor)
570  endif ! if (NColor.gt.1)
571  !--------------------< debug: shizawa
572  end subroutine check_ordering
573 
574 end module hecmw_precond_ssor_44
subroutine, public hecmw_cmat_lu(hecMAT)
subroutine, public hecmw_cmat_lu_free(hecMAT)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
subroutine, public hecmw_precond_ssor_44_clear(hecMAT)
subroutine, public hecmw_precond_ssor_44_apply(ZP)
subroutine, public hecmw_precond_ssor_44_setup(hecMAT)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)