FrontISTR  5.2.0
Large-scale structural analysis program with finit element method
hecmw_precond_SSOR_66.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
6 !C
7 !C***
8 !C*** module hecmw_precond_SSOR_66
9 !C***
10 !C
12  use hecmw_util
18  !$ use omp_lib
19 
20  private
21 
25 
26  integer(kind=kint) :: N
27  real(kind=kreal), pointer :: d(:) => null()
28  real(kind=kreal), pointer :: al(:) => null()
29  real(kind=kreal), pointer :: au(:) => null()
30  integer(kind=kint), pointer :: indexL(:) => null()
31  integer(kind=kint), pointer :: indexU(:) => null()
32  integer(kind=kint), pointer :: itemL(:) => null()
33  integer(kind=kint), pointer :: itemU(:) => null()
34  real(kind=kreal), pointer :: alu(:) => null()
35 
36  integer(kind=kint) :: NContact = 0
37  real(kind=kreal), pointer :: cal(:) => null()
38  real(kind=kreal), pointer :: cau(:) => null()
39  integer(kind=kint), pointer :: indexCL(:) => null()
40  integer(kind=kint), pointer :: indexCU(:) => null()
41  integer(kind=kint), pointer :: itemCL(:) => null()
42  integer(kind=kint), pointer :: itemCU(:) => null()
43 
44  integer(kind=kint) :: NColor
45  integer(kind=kint), pointer :: COLORindex(:) => null()
46  integer(kind=kint), pointer :: perm(:) => null()
47  integer(kind=kint), pointer :: iperm(:) => null()
48 
49  logical, save :: isFirst = .true.
50 
51 contains
52 
53  subroutine hecmw_precond_ssor_66_setup(hecMAT)
54  implicit none
55  type(hecmwst_matrix), intent(in) :: hecmat
56  integer(kind=kint ) :: npl, npu, npcl, npcu
57  real (kind=kreal), allocatable :: cd(:)
58  integer(kind=kint ) :: ncolor_in
59  real (kind=kreal) :: sigma_diag
60  real (kind=kreal) :: alutmp(6,6), pw(6)
61  integer(kind=kint ) :: ii, i, j, k
62  integer(kind=kint ) :: nthreads = 1
63  integer(kind=kint ), allocatable :: perm_tmp(:)
64  !real (kind=kreal) :: t0
65 
66  !t0 = hecmw_Wtime()
67  !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
68 
69  !$ nthreads = omp_get_max_threads()
70 
71  n = hecmat%N
72  ! N = hecMAT%NP
73  ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
74  sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
75  ncontact = hecmat%cmat%n_val
76 
77  if (ncontact.gt.0) then
78  call hecmw_cmat_lu( hecmat )
79  endif
80 
81  if (nthreads == 1) then
82  ncolor = 1
83  allocate(colorindex(0:1), perm(n), iperm(n))
84  colorindex(0) = 0
85  colorindex(1) = n
86  do i=1,n
87  perm(i) = i
88  iperm(i) = i
89  end do
90 
91  d => hecmat%D
92  al => hecmat%AL
93  au => hecmat%AU
94  indexl => hecmat%indexL
95  indexu => hecmat%indexU
96  iteml => hecmat%itemL
97  itemu => hecmat%itemU
98  if (ncontact.gt.0) then
99  cal => hecmat%CAL
100  cau => hecmat%CAU
101  indexcl => hecmat%indexCL
102  indexcu => hecmat%indexCU
103  itemcl => hecmat%itemCL
104  itemcu => hecmat%itemCU
105  end if
106  else
107  allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
108  call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
109  hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
110  !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
111  call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
112  hecmat%indexU, hecmat%itemU, perm_tmp, &
113  ncolor_in, ncolor, colorindex, perm, iperm)
114  !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
115  deallocate(perm_tmp)
116 
117  !call write_debug_info
118 
119  npl = hecmat%indexL(n)
120  npu = hecmat%indexU(n)
121  allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
122  call hecmw_matrix_reorder_profile(n, perm, iperm, &
123  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
124  indexl, indexu, iteml, itemu)
125  !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
126 
127  call check_ordering
128 
129  allocate(d(36*n), al(36*npl), au(36*npu))
130  call hecmw_matrix_reorder_values(n, 6, perm, iperm, &
131  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
132  hecmat%AL, hecmat%AU, hecmat%D, &
133  indexl, indexu, iteml, itemu, al, au, d)
134  !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
135 
136  call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
137  call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
138 
139  if (ncontact.gt.0) then
140  npcl = hecmat%indexCL(n)
141  npcu = hecmat%indexCU(n)
142  allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
143  call hecmw_matrix_reorder_profile(n, perm, iperm, &
144  hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
145  indexcl, indexcu, itemcl, itemcu)
146 
147  allocate(cd(36*n), cal(36*npcl), cau(36*npcu))
148  call hecmw_matrix_reorder_values(n, 6, perm, iperm, &
149  hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
150  hecmat%CAL, hecmat%CAU, hecmat%D, &
151  indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
152  deallocate(cd)
153 
154  call hecmw_matrix_reorder_renum_item(n, perm, indexcl, itemcl)
155  call hecmw_matrix_reorder_renum_item(n, perm, indexcu, itemcu)
156  end if
157 
158  end if
159 
160  allocate(alu(36*n))
161  alu = 0.d0
162 
163  do ii= 1, 36*n
164  alu(ii) = d(ii)
165  enddo
166 
167  ! if (NContact.gt.0) then
168  ! do k= 1, hecMAT%cmat%n_val
169  ! if (hecMAT%cmat%pair(k)%i.ne.hecMAT%cmat%pair(k)%j) cycle
170  ! ii = iperm( hecMAT%cmat%pair(k)%i )
171  ! ALU(9*ii-8) = ALU(9*ii-8) + hecMAT%cmat%pair(k)%val(1, 1)
172  ! ALU(9*ii-7) = ALU(9*ii-7) + hecMAT%cmat%pair(k)%val(1, 2)
173  ! ALU(9*ii-6) = ALU(9*ii-6) + hecMAT%cmat%pair(k)%val(1, 3)
174  ! ALU(9*ii-5) = ALU(9*ii-5) + hecMAT%cmat%pair(k)%val(2, 1)
175  ! ALU(9*ii-4) = ALU(9*ii-4) + hecMAT%cmat%pair(k)%val(2, 2)
176  ! ALU(9*ii-3) = ALU(9*ii-3) + hecMAT%cmat%pair(k)%val(2, 3)
177  ! ALU(9*ii-2) = ALU(9*ii-2) + hecMAT%cmat%pair(k)%val(3, 1)
178  ! ALU(9*ii-1) = ALU(9*ii-1) + hecMAT%cmat%pair(k)%val(3, 2)
179  ! ALU(9*ii ) = ALU(9*ii ) + hecMAT%cmat%pair(k)%val(3, 3)
180  ! enddo
181  ! endif
182 
183  do ii= 1, n
184  alutmp(1,1)= alu(36*ii-35) * sigma_diag
185  alutmp(1,2)= alu(36*ii-34)
186  alutmp(1,3)= alu(36*ii-33)
187  alutmp(1,4)= alu(36*ii-32)
188  alutmp(1,5)= alu(36*ii-31)
189  alutmp(1,6)= alu(36*ii-30)
190 
191  alutmp(2,1)= alu(36*ii-29)
192  alutmp(2,2)= alu(36*ii-28) * sigma_diag
193  alutmp(2,3)= alu(36*ii-27)
194  alutmp(2,4)= alu(36*ii-26)
195  alutmp(2,5)= alu(36*ii-25)
196  alutmp(2,6)= alu(36*ii-24)
197 
198  alutmp(3,1)= alu(36*ii-23)
199  alutmp(3,2)= alu(36*ii-22)
200  alutmp(3,3)= alu(36*ii-21) * sigma_diag
201  alutmp(3,4)= alu(36*ii-20)
202  alutmp(3,5)= alu(36*ii-19)
203  alutmp(3,6)= alu(36*ii-18)
204 
205  alutmp(4,1)= alu(36*ii-17)
206  alutmp(4,2)= alu(36*ii-16)
207  alutmp(4,3)= alu(36*ii-15)
208  alutmp(4,4)= alu(36*ii-14) * sigma_diag
209  alutmp(4,5)= alu(36*ii-13)
210  alutmp(4,6)= alu(36*ii-12)
211 
212  alutmp(5,1)= alu(36*ii-11)
213  alutmp(5,2)= alu(36*ii-10)
214  alutmp(5,3)= alu(36*ii-9 )
215  alutmp(5,4)= alu(36*ii-8 )
216  alutmp(5,5)= alu(36*ii-7 ) * sigma_diag
217  alutmp(5,6)= alu(36*ii-6 )
218 
219  alutmp(6,1)= alu(36*ii-5 )
220  alutmp(6,2)= alu(36*ii-4 )
221  alutmp(6,3)= alu(36*ii-3 )
222  alutmp(6,4)= alu(36*ii-2 )
223  alutmp(6,5)= alu(36*ii-1 )
224  alutmp(6,6)= alu(36*ii ) * sigma_diag
225 
226  do k= 1, 6
227  alutmp(k,k)= 1.d0/alutmp(k,k)
228  do i= k+1, 6
229  alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
230  do j= k+1, 6
231  pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
232  enddo
233  do j= k+1, 6
234  alutmp(i,j)= pw(j)
235  enddo
236  enddo
237  enddo
238 
239  alu(36*ii-35)= alutmp(1,1)
240  alu(36*ii-34)= alutmp(1,2)
241  alu(36*ii-33)= alutmp(1,3)
242  alu(36*ii-32)= alutmp(1,4)
243  alu(36*ii-31)= alutmp(1,5)
244  alu(36*ii-30)= alutmp(1,6)
245  alu(36*ii-29)= alutmp(2,1)
246  alu(36*ii-28)= alutmp(2,2)
247  alu(36*ii-27)= alutmp(2,3)
248  alu(36*ii-26)= alutmp(2,4)
249  alu(36*ii-25)= alutmp(2,5)
250  alu(36*ii-24)= alutmp(2,6)
251  alu(36*ii-23)= alutmp(3,1)
252  alu(36*ii-22)= alutmp(3,2)
253  alu(36*ii-21)= alutmp(3,3)
254  alu(36*ii-20)= alutmp(3,4)
255  alu(36*ii-19)= alutmp(3,5)
256  alu(36*ii-18)= alutmp(3,6)
257  alu(36*ii-17)= alutmp(4,1)
258  alu(36*ii-16)= alutmp(4,2)
259  alu(36*ii-15)= alutmp(4,3)
260  alu(36*ii-14)= alutmp(4,4)
261  alu(36*ii-13)= alutmp(4,5)
262  alu(36*ii-12)= alutmp(4,6)
263  alu(36*ii-11)= alutmp(5,1)
264  alu(36*ii-10)= alutmp(5,2)
265  alu(36*ii-9 )= alutmp(5,3)
266  alu(36*ii-8 )= alutmp(5,4)
267  alu(36*ii-7 )= alutmp(5,5)
268  alu(36*ii-6 )= alutmp(5,6)
269  alu(36*ii-5 )= alutmp(6,1)
270  alu(36*ii-4 )= alutmp(6,2)
271  alu(36*ii-3 )= alutmp(6,3)
272  alu(36*ii-2 )= alutmp(6,4)
273  alu(36*ii-1 )= alutmp(6,5)
274  alu(36*ii )= alutmp(6,6)
275  enddo
276 
277  isfirst = .true.
278 
279  !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
280 
281  end subroutine hecmw_precond_ssor_66_setup
282 
284  use hecmw_tuning_fx
285  implicit none
286  real(kind=kreal), intent(inout) :: zp(:)
287  integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
288  real(kind=kreal) :: x1, x2, x3, x4, x5, x6
289  real(kind=kreal) :: sw1, sw2, sw3, sw4, sw5, sw6
290 
291  ! added for turning >>>
292  integer(kind=kint), parameter :: numofblockperthread = 100
293  integer(kind=kint), save :: numofthread = 1, numofblock
294  integer(kind=kint), save, allocatable :: ictoblockindex(:)
295  integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
296  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
297  integer(kind=kint) :: blockindex, elementcount, numofelement, ii
298  real(kind=kreal) :: numofelementperblock
299  integer(kind=kint) :: my_rank
300 
301  if (isfirst) then
302  !$ numOfThread = omp_get_max_threads()
303  numofblock = numofthread * numofblockperthread
304  if (allocated(ictoblockindex)) deallocate(ictoblockindex)
305  if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
306  allocate (ictoblockindex(0:ncolor), &
307  blockindextocolorindex(0:numofblock + ncolor))
308  numofelement = n + indexl(n) + indexu(n)
309  numofelementperblock = dble(numofelement) / numofblock
310  blockindex = 0
311  ictoblockindex = -1
312  ictoblockindex(0) = 0
313  blockindextocolorindex = -1
314  blockindextocolorindex(0) = 0
315  my_rank = hecmw_comm_get_rank()
316  ! write(9000+my_rank,*) &
317  ! '# numOfElementPerBlock =', numOfElementPerBlock
318  ! write(9000+my_rank,*) &
319  ! '# ic, blockIndex, colorIndex, elementCount'
320  do ic = 1, ncolor
321  elementcount = 0
322  ii = 1
323  do i = colorindex(ic-1)+1, colorindex(ic)
324  elementcount = elementcount + 1
325  elementcount = elementcount + (indexl(i) - indexl(i-1))
326  elementcount = elementcount + (indexu(i) - indexu(i-1))
327  if (elementcount > ii * numofelementperblock &
328  .or. i == colorindex(ic)) then
329  ii = ii + 1
330  blockindex = blockindex + 1
331  blockindextocolorindex(blockindex) = i
332  ! write(9000+my_rank,*) ic, blockIndex, &
333  ! blockIndexToColorIndex(blockIndex), elementCount
334  endif
335  enddo
336  ictoblockindex(ic) = blockindex
337  enddo
338  numofblock = blockindex
339 
341  sectorcachesize0, sectorcachesize1 )
342 
343  isfirst = .false.
344  endif
345  ! <<< added for turning
346 
347  !call start_collection("loopInPrecond66")
348 
349  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
350  !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
351 
352  !$omp parallel default(none) &
353  !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
354  !$omp& NContact,indexCL,itemCL,indexCU,itemCU,CAL,CAU,&
355  !$omp& ZP,icToBlockIndex,blockIndexToColorIndex) &
356  !$omp&private(SW1,SW2,SW3,SW4,SW5,SW6,X1,X2,X3,X4,X5,X6,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex)
357 
358  !C-- FORWARD
359  do ic=1,ncolor
360  !$omp do schedule (static, 1)
361  do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
362  do i = blockindextocolorindex(blockindex-1)+1, &
363  blockindextocolorindex(blockindex)
364  ! do i = startPos(threadNum, ic), endPos(threadNum, ic)
365  iold = perm(i)
366  sw1= zp(6*iold-5)
367  sw2= zp(6*iold-4)
368  sw3= zp(6*iold-3)
369  sw4= zp(6*iold-2)
370  sw5= zp(6*iold-1)
371  sw6= zp(6*iold )
372  isl= indexl(i-1)+1
373  iel= indexl(i)
374  do j= isl, iel
375  !k= perm(itemL(j))
376  k= iteml(j)
377  x1= zp(6*k-5)
378  x2= zp(6*k-4)
379  x3= zp(6*k-3)
380  x4= zp(6*k-2)
381  x5= zp(6*k-1)
382  x6= zp(6*k )
383  sw1= sw1 -al(36*j-35)*x1 -al(36*j-34)*x2 -al(36*j-33)*x3 -al(36*j-32)*x4 -al(36*j-31)*x5 -al(36*j-30)*x6
384  sw2= sw2 -al(36*j-29)*x1 -al(36*j-28)*x2 -al(36*j-27)*x3 -al(36*j-26)*x4 -al(36*j-25)*x5 -al(36*j-24)*x6
385  sw3= sw3 -al(36*j-23)*x1 -al(36*j-22)*x2 -al(36*j-21)*x3 -al(36*j-20)*x4 -al(36*j-19)*x5 -al(36*j-18)*x6
386  sw4= sw4 -al(36*j-17)*x1 -al(36*j-16)*x2 -al(36*j-15)*x3 -al(36*j-14)*x4 -al(36*j-13)*x5 -al(36*j-12)*x6
387  sw5= sw5 -al(36*j-11)*x1 -al(36*j-10)*x2 -al(36*j-9 )*x3 -al(36*j-8 )*x4 -al(36*j-7 )*x5 -al(36*j-6 )*x6
388  sw6= sw6 -al(36*j-5 )*x1 -al(36*j-4 )*x2 -al(36*j-3 )*x3 -al(36*j-2 )*x4 -al(36*j-1 )*x5 -al(36*j )*x6
389  enddo ! j
390 
391  if (ncontact.ne.0) then
392  isl= indexcl(i-1)+1
393  iel= indexcl(i)
394  do j= isl, iel
395  !k= perm(itemCL(j))
396  k= itemcl(j)
397  x1= zp(6*k-5)
398  x2= zp(6*k-4)
399  x3= zp(6*k-3)
400  x4= zp(6*k-2)
401  x5= zp(6*k-1)
402  x6= zp(6*k )
403  sw1= sw1 -cal(36*j-35)*x1 -cal(36*j-34)*x2 -cal(36*j-33)*x3 -cal(36*j-32)*x4 -cal(36*j-31)*x5 -cal(36*j-30)*x6
404  sw2= sw2 -cal(36*j-29)*x1 -cal(36*j-28)*x2 -cal(36*j-27)*x3 -cal(36*j-26)*x4 -cal(36*j-25)*x5 -cal(36*j-24)*x6
405  sw3= sw3 -cal(36*j-23)*x1 -cal(36*j-22)*x2 -cal(36*j-21)*x3 -cal(36*j-20)*x4 -cal(36*j-19)*x5 -cal(36*j-18)*x6
406  sw4= sw4 -cal(36*j-17)*x1 -cal(36*j-16)*x2 -cal(36*j-15)*x3 -cal(36*j-14)*x4 -cal(36*j-13)*x5 -cal(36*j-12)*x6
407  sw5= sw5 -cal(36*j-11)*x1 -cal(36*j-10)*x2 -cal(36*j-9 )*x3 -cal(36*j-8 )*x4 -cal(36*j-7 )*x5 -cal(36*j-6 )*x6
408  sw6= sw6 -cal(36*j-5 )*x1 -cal(36*j-4 )*x2 -cal(36*j-3 )*x3 -cal(36*j-2 )*x4 -cal(36*j-1 )*x5 -cal(36*j )*x6
409  enddo ! j
410  endif
411 
412  x1= sw1
413  x2= sw2
414  x3= sw3
415  x4= sw4
416  x5= sw5
417  x6= sw6
418  x2= x2 -alu(36*i-29)*x1
419  x3= x3 -alu(36*i-23)*x1 -alu(36*i-22)*x2
420  x4= x4 -alu(36*i-17)*x1 -alu(36*i-16)*x2 -alu(36*i-5)*x3
421  x5= x5 -alu(36*i-11)*x1 -alu(36*i-10)*x2 -alu(36*i-9)*x3 -alu(36*i-8)*x4
422  x6= x6 -alu(36*i-5 )*x1 -alu(36*i-4 )*x2 -alu(36*i-3)*x3 -alu(36*i-2)*x4 -alu(36*i-1)*x5
423  x6= alu(36*i )* x6
424  x5= alu(36*i-7 )*( x5 -alu(36*i-6)*x6 )
425  x4= alu(36*i-14)*( x4 -alu(36*i-12)*x6 -alu(36*i-13)*x5)
426  x3= alu(36*i-21)*( x3 -alu(36*i-18)*x6 -alu(36*i-19)*x5 -alu(36*i-20)*x4)
427  x2= alu(36*i-28)*( x2 -alu(36*i-24)*x6 -alu(36*i-25)*x5 -alu(36*i-26)*x4 -alu(36*i-27)*x3)
428  x1= alu(36*i-35)*( x1 -alu(36*i-30)*x6 -alu(36*i-31)*x5 -alu(36*i-32)*x4 -alu(36*i-33)*x3 -alu(36*i-34)*x2)
429  zp(6*iold-5)= x1
430  zp(6*iold-4)= x2
431  zp(6*iold-3)= x3
432  zp(6*iold-2)= x4
433  zp(6*iold-1)= x5
434  zp(6*iold )= x6
435  enddo ! i
436  enddo ! blockIndex
437  !$omp end do
438  enddo ! ic
439 
440  !C-- BACKWARD
441  do ic=ncolor, 1, -1
442  !$omp do schedule (static, 1)
443  do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
444  do i = blockindextocolorindex(blockindex), &
445  blockindextocolorindex(blockindex-1)+1, -1
446  ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
447  ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
448  ! blockIndexToColorIndex(blockIndex)
449  ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
450  sw1= 0.d0
451  sw2= 0.d0
452  sw3= 0.d0
453  sw4= 0.d0
454  sw5= 0.d0
455  sw6= 0.d0
456  isu= indexu(i-1) + 1
457  ieu= indexu(i)
458  do j= ieu, isu, -1
459  !k= perm(itemU(j))
460  k= itemu(j)
461  x1= zp(6*k-5)
462  x2= zp(6*k-4)
463  x3= zp(6*k-3)
464  x4= zp(6*k-2)
465  x5= zp(6*k-1)
466  x6= zp(6*k )
467  sw1= sw1 +au(36*j-35)*x1 +au(36*j-34)*x2 +au(36*j-33)*x3 +au(36*j-32)*x4 +au(36*j-31)*x5 +au(36*j-30)*x6
468  sw2= sw2 +au(36*j-29)*x1 +au(36*j-28)*x2 +au(36*j-27)*x3 +au(36*j-26)*x4 +au(36*j-25)*x5 +au(36*j-24)*x6
469  sw3= sw3 +au(36*j-23)*x1 +au(36*j-22)*x2 +au(36*j-21)*x3 +au(36*j-20)*x4 +au(36*j-19)*x5 +au(36*j-18)*x6
470  sw4= sw4 +au(36*j-17)*x1 +au(36*j-16)*x2 +au(36*j-15)*x3 +au(36*j-14)*x4 +au(36*j-13)*x5 +au(36*j-12)*x6
471  sw5= sw5 +au(36*j-11)*x1 +au(36*j-10)*x2 +au(36*j-9 )*x3 +au(36*j-8 )*x4 +au(36*j-7 )*x5 +au(36*j-6 )*x6
472  sw6= sw6 +au(36*j-5 )*x1 +au(36*j-4 )*x2 +au(36*j-3 )*x3 +au(36*j-2 )*x4 +au(36*j-1 )*x5 +au(36*j )*x6
473  enddo ! j
474 
475  if (ncontact.gt.0) then
476  isu= indexcu(i-1) + 1
477  ieu= indexcu(i)
478  do j= ieu, isu, -1
479  !k= perm(itemCU(j))
480  k= itemcu(j)
481  x1= zp(6*k-5)
482  x2= zp(6*k-4)
483  x3= zp(6*k-3)
484  x4= zp(6*k-2)
485  x5= zp(6*k-1)
486  x6= zp(6*k )
487  sw1= sw1 +cau(36*j-35)*x1 +cau(36*j-34)*x2 +cau(36*j-33)*x3 +cau(36*j-32)*x4 +cau(36*j-31)*x5 +cau(36*j-30)*x6
488  sw2= sw2 +cau(36*j-29)*x1 +cau(36*j-28)*x2 +cau(36*j-27)*x3 +cau(36*j-26)*x4 +cau(36*j-25)*x5 +cau(36*j-24)*x6
489  sw3= sw3 +cau(36*j-23)*x1 +cau(36*j-22)*x2 +cau(36*j-21)*x3 +cau(36*j-20)*x4 +cau(36*j-19)*x5 +cau(36*j-18)*x6
490  sw4= sw4 +cau(36*j-17)*x1 +cau(36*j-16)*x2 +cau(36*j-15)*x3 +cau(36*j-14)*x4 +cau(36*j-13)*x5 +cau(36*j-12)*x6
491  sw5= sw5 +cau(36*j-11)*x1 +cau(36*j-10)*x2 +cau(36*j-9 )*x3 +cau(36*j-8 )*x4 +cau(36*j-7 )*x5 +cau(36*j-6 )*x6
492  sw6= sw6 +cau(36*j-5 )*x1 +cau(36*j-4 )*x2 +cau(36*j-3 )*x3 +cau(36*j-2 )*x4 +cau(36*j-1 )*x5 +cau(36*j )*x6
493  enddo ! j
494  endif
495 
496  x1= sw1
497  x2= sw2
498  x3= sw3
499  x4= sw4
500  x5= sw5
501  x6= sw6
502  x2= x2 -alu(36*i-29)*x1
503  x3= x3 -alu(36*i-23)*x1 -alu(36*i-22)*x2
504  x4= x4 -alu(36*i-17)*x1 -alu(36*i-16)*x2 -alu(36*i-5)*x3
505  x5= x5 -alu(36*i-11)*x1 -alu(36*i-10)*x2 -alu(36*i-9)*x3 -alu(36*i-8)*x4
506  x6= x6 -alu(36*i-5 )*x1 -alu(36*i-4 )*x2 -alu(36*i-3)*x3 -alu(36*i-2)*x4 -alu(36*i-1)*x5
507  x6= alu(36*i )* x6
508  x5= alu(36*i-7 )*( x5 -alu(36*i-6)*x6 )
509  x4= alu(36*i-14)*( x4 -alu(36*i-12)*x6 -alu(36*i-13)*x5)
510  x3= alu(36*i-21)*( x3 -alu(36*i-18)*x6 -alu(36*i-19)*x5 -alu(36*i-20)*x4)
511  x2= alu(36*i-28)*( x2 -alu(36*i-24)*x6 -alu(36*i-25)*x5 -alu(36*i-26)*x4 -alu(36*i-27)*x3)
512  x1= alu(36*i-35)*( x1 -alu(36*i-30)*x6 -alu(36*i-31)*x5 -alu(36*i-32)*x4 -alu(36*i-33)*x3 -alu(36*i-34)*x2)
513  iold = perm(i)
514  zp(6*iold-5)= zp(6*iold-5) -x1
515  zp(6*iold-4)= zp(6*iold-4) -x2
516  zp(6*iold-3)= zp(6*iold-3) -x3
517  zp(6*iold-2)= zp(6*iold-2) -x4
518  zp(6*iold-1)= zp(6*iold-1) -x5
519  zp(6*iold )= zp(6*iold ) -x6
520  enddo ! i
521  enddo ! blockIndex
522  !$omp end do
523  enddo ! ic
524  !$omp end parallel
525 
526  !OCL END_CACHE_SUBSECTOR
527  !OCL END_CACHE_SECTOR_SIZE
528 
529  !call stop_collection("loopInPrecond66")
530 
531  end subroutine hecmw_precond_ssor_66_apply
532 
533  subroutine hecmw_precond_ssor_66_clear(hecMAT)
534  implicit none
535  type(hecmwst_matrix), intent(inout) :: hecmat
536  integer(kind=kint ) :: nthreads = 1
537  !$ nthreads = omp_get_max_threads()
538  if (associated(colorindex)) deallocate(colorindex)
539  if (associated(perm)) deallocate(perm)
540  if (associated(iperm)) deallocate(iperm)
541  if (associated(alu)) deallocate(alu)
542  if (nthreads >= 1) then
543  if (associated(d)) deallocate(d)
544  if (associated(al)) deallocate(al)
545  if (associated(au)) deallocate(au)
546  if (associated(indexl)) deallocate(indexl)
547  if (associated(indexu)) deallocate(indexu)
548  if (associated(iteml)) deallocate(iteml)
549  if (associated(itemu)) deallocate(itemu)
550  if (ncontact.ne.0) then
551  if (associated(cal)) deallocate(cal)
552  if (associated(cau)) deallocate(cau)
553  if (associated(indexcl)) deallocate(indexcl)
554  if (associated(indexcu)) deallocate(indexcu)
555  if (associated(itemcl)) deallocate(itemcl)
556  if (associated(itemcu)) deallocate(itemcu)
557  end if
558  end if
559  nullify(colorindex)
560  nullify(perm)
561  nullify(iperm)
562  nullify(alu)
563  nullify(d)
564  nullify(al)
565  nullify(au)
566  nullify(indexl)
567  nullify(indexu)
568  nullify(iteml)
569  nullify(itemu)
570  if (ncontact.ne.0) then
571  nullify(cal)
572  nullify(cau)
573  nullify(indexcl)
574  nullify(indexcu)
575  nullify(itemcl)
576  nullify(itemcu)
577  call hecmw_cmat_lu_free( hecmat )
578  endif
579  ncontact = 0
580  end subroutine hecmw_precond_ssor_66_clear
581 
582  subroutine write_debug_info
583  implicit none
584  integer(kind=kint) :: my_rank, ic, in
585  my_rank = hecmw_comm_get_rank()
586  !--------------------> debug: shizawa
587  if (my_rank.eq.0) then
588  write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
589  endif
590  write(19000+my_rank,'(a)') '#NCOLORTot'
591  write(19000+my_rank,*) ncolor
592  write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
593  do ic=1,ncolor
594  write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
595  enddo ! ic
596  write(29000+my_rank,'(a)') '#n_node'
597  write(29000+my_rank,*) n
598  write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
599  do in=1,n
600  write(29000+my_rank,*) in, iperm(in), perm(in)
601  if (perm(iperm(in)) .ne. in) then
602  write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
603  endif
604  enddo
605  end subroutine write_debug_info
606 
607  subroutine check_ordering
608  implicit none
609  integer(kind=kint) :: ic, i, j, k
610  integer(kind=kint), allocatable :: iicolor(:)
611  ! check color dependence of neighbouring nodes
612  if (ncolor.gt.1) then
613  allocate(iicolor(n))
614  do ic=1,ncolor
615  do i= colorindex(ic-1)+1, colorindex(ic)
616  iicolor(i) = ic
617  enddo ! i
618  enddo ! ic
619  ! FORWARD: L-part
620  do ic=1,ncolor
621  do i= colorindex(ic-1)+1, colorindex(ic)
622  do j= indexl(i-1)+1, indexl(i)
623  k= iteml(j)
624  if (iicolor(i).eq.iicolor(k)) then
625  write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
626  endif
627  enddo ! j
628  enddo ! i
629  enddo ! ic
630  ! BACKWARD: U-part
631  do ic=ncolor, 1, -1
632  do i= colorindex(ic), colorindex(ic-1)+1, -1
633  do j= indexu(i-1)+1, indexu(i)
634  k= itemu(j)
635  if (iicolor(i).eq.iicolor(k)) then
636  write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
637  endif
638  enddo ! j
639  enddo ! i
640  enddo ! ic
641  deallocate(iicolor)
642  endif ! if (NColor.gt.1)
643  !--------------------< debug: shizawa
644  end subroutine check_ordering
645 
646 end module hecmw_precond_ssor_66
subroutine, public hecmw_cmat_lu(hecMAT)
subroutine, public hecmw_cmat_lu_free(hecMAT)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
subroutine, public hecmw_precond_ssor_66_setup(hecMAT)
subroutine, public hecmw_precond_ssor_66_clear(hecMAT)
subroutine, public hecmw_precond_ssor_66_apply(ZP)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)