model/src/cg2d_ex0.F

015c19607c Jean*0001 #include "CPP_OPTIONS.h"
                0002 #ifdef TARGET_NEC_SX
                0003 C     set a sensible default for the outer loop unrolling parameter that can
                0004 C     be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
                0005 #ifndef CG2D_OUTERLOOPITERS
                0006 # define CG2D_OUTERLOOPITERS 10
                0007 #endif
                0008 #endif /* TARGET_NEC_SX */
                0009
                0010 CBOP
                0011 C     !ROUTINE: CG2D_EX0
                0012 C     !INTERFACE:
                0013       SUBROUTINE CG2D_EX0(
                0014      U                cg2d_b, cg2d_x,
                0015      O                firstResidual, minResidualSq, lastResidual,
                0016      U                numIters, nIterMin,
                0017      I                myThid )
                0018 C     !DESCRIPTION: \bv
aecc8b0f47 Mart*0019 C     *================================================================*
015c19607c Jean*0020 C     | SUBROUTINE CG2D_EX0
aecc8b0f47 Mart*0021 C     | o Two-dimensional grid problem conjugate-gradient inverter
                0022 C     |   (with preconditioner).
015c19607c Jean*0023 C     | This is the disconnected-tile version (each tile treated
                0024 C     |   independently as a small domain, with locally periodic
                0025 C     |   BC at the edges.
aecc8b0f47 Mart*0026 C     *================================================================*
015c19607c Jean*0027 C     | Con. grad is an iterative procedure for solving Ax = b.
                0028 C     | It requires the A be symmetric.
                0029 C     | This implementation assumes A is a five-diagonal matrix
                0030 C     | of the form that arises in the discrete representation of
                0031 C     | the del^2 operator in a two-dimensional space.
aecc8b0f47 Mart*0032 C     *================================================================*
015c19607c Jean*0033 C     \ev
                0034
                0035 C     !USES:
                0036       IMPLICIT NONE
                0037 C     === Global data ===
                0038 #include "SIZE.h"
                0039 #include "EEPARAMS.h"
                0040 #include "PARAMS.h"
                0041 #include "CG2D.h"
                0042
                0043 C     !INPUT/OUTPUT PARAMETERS:
                0044 C     cg2d_b    :: The source term or "right hand side" (output: normalised RHS)
                0045 C     cg2d_x    :: The solution (input: first guess)
                0046 C     firstResidual :: the initial residual before any iterations
                0047 C     minResidualSq :: the lowest residual reached (squared)
                0048 C     lastResidual  :: the actual residual reached
                0049 C     numIters  :: Inp: the maximum number of iterations allowed
                0050 C                  Out: the actual number of iterations used
                0051 C     nIterMin  :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
                0052 C                  Out: iteration number corresponding to lowest residual
                0053 C     myThid    :: Thread on which I am working.
                0054       _RL  cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
                0055       _RL  cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
                0056       _RL  firstResidual
                0057       _RL  minResidualSq
                0058       _RL  lastResidual
                0059       INTEGER numIters
                0060       INTEGER nIterMin
                0061       INTEGER myThid
                0062
                0063 C     !LOCAL VARIABLES:
                0064 C     bi, bj     :: tile index in X and Y.
                0065 C     i, j, it2d :: Loop counters ( it2d counts CG iterations )
                0066 C     actualIts  :: actual CG iteration number
                0067 C     err_sq     :: Measure of the square of the residual of Ax - b.
                0068 C     eta_qrN    :: Used in computing search directions; suffix N and NM1
                0069 C     eta_qrNM1     denote current and previous iterations respectively.
                0070 C     cgBeta     :: coeff used to update conjugate direction vector "s".
                0071 C     alpha      :: coeff used to update solution & residual
                0072 C     sumRHS     :: Sum of right-hand-side. Sometimes this is a useful
                0073 C                   debugging/trouble shooting diagnostic. For neumann problems
                0074 C                   sumRHS needs to be ~0 or it converge at a non-zero residual.
                0075 C     cg2d_min   :: used to store solution corresponding to lowest residual.
                0076 C     msgBuf     :: Informational/error message buffer
aecc8b0f47 Mart*0077 C--   local working array (used to be in CG2D.h common block:
                0078 C     cg2d_q     :: Intermediate matrix-vector product term
                0079 C     cg2d_r     ::   *same*
                0080 C     cg2d_s     ::   *same*
015c19607c Jean*0081       INTEGER bi, bj
                0082       INTEGER i, j, it2d
                0083       INTEGER actualIts(nSx,nSy)
                0084       INTEGER minResIter(nSx,nSy)
                0085       _RL    err_sq,  errTile(nSx,nSy)
                0086       _RL    eta_qrNtile(nSx,nSy)
                0087       _RL    eta_qrNM1(nSx,nSy)
                0088       _RL    cgBeta
                0089       _RL    alpha,   alphaTile(nSx,nSy)
                0090       _RL    sumRHS,  sumRHStile(nSx,nSy)
                0091       _RL    rhsMax, rhsMaxLoc
                0092       _RL    rhsNorm(nSx,nSy)
                0093       _RL    minResTile(nSx,nSy)
                0094       _RL    cg2d_min(1:sNx,1:sNy,nSx,nSy)
d68b36a485 Mart*0095       _RL    cg2d_q  (1:sNx,1:sNy,nSx,nSy)
aecc8b0f47 Mart*0096       _RL    cg2d_r(1-1:sNx+1,1-1:sNy+1,nSx,nSy)
                0097       _RL    cg2d_s(1-1:sNx+1,1-1:sNy+1,nSx,nSy)
015c19607c Jean*0098       CHARACTER*(MAX_LEN_MBUF) msgBuf
                0099       LOGICAL printResidual
                0100 CEOP
                0101
                0102 C--   Initialise auxiliary constant, some output variable
                0103
                0104 C--   Initialise inverter and Normalise RHS
                0105       rhsMax = 0. _d 0
                0106       DO bj=myByLo(myThid),myByHi(myThid)
                0107        DO bi=myBxLo(myThid),myBxHi(myThid)
                0108
                0109         actualIts(bi,bj)  = 0
                0110         minResIter(bi,bj) = 0
                0111         minResTile(bi,bj) = -1. _d 0
                0112         eta_qrNM1(bi,bj) =  1. _d 0
                0113         rhsMaxLoc = 0. _d 0
                0114         DO j=1,sNy
                0115          DO i=1,sNx
                0116           cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
                0117           rhsMaxLoc = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMaxLoc)
                0118          ENDDO
                0119         ENDDO
                0120         rhsNorm(bi,bj) = 1. _d 0
                0121         IF ( rhsMaxLoc .NE. 0. ) rhsNorm(bi,bj) = 1. _d 0 / rhsMaxLoc
                0122         IF (cg2dNormaliseRHS) THEN
                0123          DO j=1,sNy
                0124           DO i=1,sNx
                0125            cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm(bi,bj)
                0126            cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm(bi,bj)
                0127           ENDDO
                0128          ENDDO
                0129         ENDIF
                0130         rhsMax = MAX( rhsMaxLoc, rhsMax )
                0131
                0132        ENDDO
                0133       ENDDO
                0134       _GLOBAL_MAX_RL( rhsMax, myThid )
                0135
                0136 C--   Update overlaps
                0137       CALL EXCH_XY_RL( cg2d_x, myThid )
                0138
                0139 C--   Initial residual calculation
                0140       err_sq = 0.
                0141       sumRHS = 0.
                0142       DO bj=myByLo(myThid),myByHi(myThid)
                0143        DO bi=myBxLo(myThid),myBxHi(myThid)
ff51fd05c6 Jean*0144 C-    Initialise local working arrays:
                0145         DO j=0,sNy+1
                0146          DO i=0,sNx+1
                0147           cg2d_r(i,j,bi,bj) = 0. _d 0
                0148           cg2d_s(i,j,bi,bj) = 0. _d 0
                0149          ENDDO
                0150         ENDDO
015c19607c Jean*0151         IF ( nIterMin.GE.0 ) THEN
ff51fd05c6 Jean*0152 C-    Initialise saved solution
015c19607c Jean*0153          DO j=1,sNy
                0154           DO i=1,sNx
ff51fd05c6 Jean*0155            cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
015c19607c Jean*0156           ENDDO
                0157          ENDDO
                0158         ENDIF
                0159         sumRHStile(bi,bj) = 0. _d 0
                0160         errTile(bi,bj)    = 0. _d 0
                0161 #ifdef TARGET_NEC_SX
                0162 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
                0163 #endif /* TARGET_NEC_SX */
                0164         DO j=1,sNy
                0165          DO i=1,sNx
                0166           cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
                0167      &    (aW2d(i  ,j  ,bi,bj)*cg2d_x(i-1,j  ,bi,bj)
                0168      &    +aW2d(i+1,j  ,bi,bj)*cg2d_x(i+1,j  ,bi,bj)
                0169      &    +aS2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j-1,bi,bj)
                0170      &    +aS2d(i  ,j+1,bi,bj)*cg2d_x(i  ,j+1,bi,bj)
                0171      &    +aC2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j  ,bi,bj)
                0172      &    )
                0173           errTile(bi,bj)    = errTile(bi,bj)
                0174      &                      + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
                0175           sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
                0176          ENDDO
                0177         ENDDO
                0178         IF ( nIterMin.GE.0 ) THEN
                0179           minResTile(bi,bj) = errTile(bi,bj)
                0180         ENDIF
                0181         err_sq = MAX( errTile(bi,bj), err_sq )
                0182         sumRHS = MAX( ABS(sumRHStile(bi,bj)), sumRHS )
                0183
                0184        ENDDO
                0185       ENDDO
                0186       CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
                0187       _GLOBAL_MAX_RL( err_sq, myThid )
                0188       _GLOBAL_MAX_RL( sumRHS, myThid )
                0189       firstResidual = SQRT(err_sq)
                0190
                0191       printResidual = .FALSE.
                0192       IF ( debugLevel .GE. debLevZero ) THEN
                0193         _BEGIN_MASTER( myThid )
                0194         printResidual = printResidualFreq.GE.1
e6e223b277 Jean*0195         WRITE(standardMessageUnit,'(A,1P2E22.14)')
015c19607c Jean*0196      &  ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
                0197         _END_MASTER( myThid )
                0198       ENDIF
                0199
                0200 c     IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
                0201
                0202 C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
                0203       DO it2d=1, numIters
                0204       IF ( err_sq.GE.cg2dTolerance_sq ) THEN
                0205        err_sq = 0. _d 0
                0206
                0207        DO bj=myByLo(myThid),myByHi(myThid)
                0208        DO bi=myBxLo(myThid),myBxHi(myThid)
                0209         IF ( errTile(bi,bj).GE.cg2dTolerance_sq ) THEN
                0210
                0211 C--    Solve preconditioning equation and update
                0212 C--    conjugate direction vector "s".
                0213          eta_qrNtile(bi,bj) = 0. _d 0
                0214 #ifdef TARGET_NEC_SX
                0215 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
                0216 #endif /* TARGET_NEC_SX */
                0217          DO j=1,sNy
                0218           DO i=1,sNx
                0219            cg2d_q(i,j,bi,bj) =
                0220      &      pC(i  ,j  ,bi,bj)*cg2d_r(i  ,j  ,bi,bj)
                0221      &     +pW(i  ,j  ,bi,bj)*cg2d_r(i-1,j  ,bi,bj)
                0222      &     +pW(i+1,j  ,bi,bj)*cg2d_r(i+1,j  ,bi,bj)
                0223      &     +pS(i  ,j  ,bi,bj)*cg2d_r(i  ,j-1,bi,bj)
                0224      &     +pS(i  ,j+1,bi,bj)*cg2d_r(i  ,j+1,bi,bj)
                0225            eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
                0226      &     +cg2d_q(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
                0227           ENDDO
                0228          ENDDO
                0229
                0230          cgBeta   = eta_qrNtile(bi,bj)/eta_qrNM1(bi,bj)
                0231          eta_qrNM1(bi,bj) = eta_qrNtile(bi,bj)
                0232
                0233          DO j=1,sNy
                0234           DO i=1,sNx
                0235            cg2d_s(i,j,bi,bj) = cg2d_q(i,j,bi,bj)
                0236      &                       + cgBeta*cg2d_s(i,j,bi,bj)
                0237           ENDDO
                0238          ENDDO
                0239
                0240 C--    Do exchanges that require messages i.e. between processes.
                0241          CALL FILL_HALO_LOCAL_RL(
                0242      U                  cg2d_s(0,0,bi,bj),
                0243      I                  1, 1, 1, 1, 1,
                0244      I                  EXCH_IGNORE_CORNERS, bi, bj, myThid )
                0245
                0246 C==    Evaluate laplace operator on conjugate gradient vector
                0247 C==    q = A.s
                0248          alphaTile(bi,bj) = 0. _d 0
                0249 #ifdef TARGET_NEC_SX
                0250 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
                0251 #endif /* TARGET_NEC_SX */
                0252          DO j=1,sNy
                0253           DO i=1,sNx
                0254            cg2d_q(i,j,bi,bj) =
                0255      &     aW2d(i  ,j  ,bi,bj)*cg2d_s(i-1,j  ,bi,bj)
                0256      &    +aW2d(i+1,j  ,bi,bj)*cg2d_s(i+1,j  ,bi,bj)
                0257      &    +aS2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j-1,bi,bj)
                0258      &    +aS2d(i  ,j+1,bi,bj)*cg2d_s(i  ,j+1,bi,bj)
                0259      &    +aC2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j  ,bi,bj)
                0260           alphaTile(bi,bj) = alphaTile(bi,bj)
                0261      &                     + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
                0262           ENDDO
                0263          ENDDO
                0264          alpha = eta_qrNtile(bi,bj)/alphaTile(bi,bj)
                0265
                0266 C==    Update simultaneously solution and residual vectors (and Iter number)
                0267 C      Now compute "interior" points.
                0268          errTile(bi,bj) = 0. _d 0
                0269          DO j=1,sNy
                0270           DO i=1,sNx
                0271            cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj)
                0272            cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj)
                0273            errTile(bi,bj) = errTile(bi,bj)
                0274      &                    + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
                0275           ENDDO
                0276          ENDDO
                0277          actualIts(bi,bj) = it2d
                0278
                0279          IF ( printResidual ) THEN
                0280           IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
                0281            WRITE(msgBuf,'(A,2I4,A,I6,A,1PE21.14)') ' cg2d(bi,bj=', bi,
                0282      &      bj, '): iter=', it2d, ' ; resid.= ', SQRT(errTile(bi,bj))
                0283            CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
                0284      &                         SQUEEZE_RIGHT, myThid )
                0285           ENDIF
                0286          ENDIF
                0287          IF ( errTile(bi,bj) .GE. cg2dTolerance_sq .AND.
                0288      &        errTile(bi,bj) .LT. minResTile(bi,bj) ) THEN
                0289 C-     Store lowest residual solution
                0290            minResTile(bi,bj) = errTile(bi,bj)
                0291            minResIter(bi,bj) = it2d
                0292            DO j=1,sNy
                0293             DO i=1,sNx
                0294              cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
                0295             ENDDO
                0296            ENDDO
                0297          ENDIF
                0298
                0299          CALL FILL_HALO_LOCAL_RL(
                0300      U                  cg2d_r(0,0,bi,bj),
                0301      I                  1, 1, 1, 1, 1,
                0302      I                  EXCH_IGNORE_CORNERS, bi, bj, myThid )
                0303
                0304         ENDIF
                0305         err_sq = MAX( errTile(bi,bj), err_sq )
                0306 C-    end bi,bj loops
                0307        ENDDO
                0308        ENDDO
                0309 C-    end cg-2d iteration loop
                0310       ENDIF
                0311       ENDDO
                0312
                0313 c  11 CONTINUE
                0314
                0315       IF ( nIterMin.GE.0 ) THEN
                0316 C-    use the lowest residual solution (instead of current one = last residual)
                0317        DO bj=myByLo(myThid),myByHi(myThid)
                0318         DO bi=myBxLo(myThid),myBxHi(myThid)
                0319 c         minResidualSq = MAX( minResTile(bi,bj), minResidualSq )
                0320 c         nIterMin = MAX( minResIter(bi,bj), nIterMin )
                0321          IF ( errTile(bi,bj) .GT. minResTile(bi,bj) ) THEN
                0322           DO j=1,sNy
                0323            DO i=1,sNx
                0324              cg2d_x(i,j,bi,bj) = cg2d_min(i,j,bi,bj)
                0325            ENDDO
                0326           ENDDO
                0327          ENDIF
                0328         ENDDO
                0329        ENDDO
                0330       ENDIF
                0331
                0332       IF (cg2dNormaliseRHS) THEN
                0333 C--   Un-normalise the answer
                0334         DO bj=myByLo(myThid),myByHi(myThid)
                0335          DO bi=myBxLo(myThid),myBxHi(myThid)
                0336           DO j=1,sNy
                0337            DO i=1,sNx
                0338             cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm(bi,bj)
                0339            ENDDO
                0340           ENDDO
                0341          ENDDO
                0342         ENDDO
                0343       ENDIF
                0344
                0345 C--   Return parameters to caller
                0346 C     return largest Iter # and Max residual     in numIters and lastResidual
                0347 C     return lowest  Iter # and Min residual(^2) in nIterMin and minResidualSq
                0348       _GLOBAL_MAX_RL( err_sq, myThid )
                0349       nIterMin = numIters
                0350       numIters = 0
                0351       minResidualSq = err_sq
                0352       DO bj=myByLo(myThid),myByHi(myThid)
                0353        DO bi=myBxLo(myThid),myBxHi(myThid)
                0354          nIterMin = MIN( actualIts(bi,bj), nIterMin )
                0355          numIters = MAX( actualIts(bi,bj), numIters )
                0356          minResidualSq = MIN( errTile(bi,bj), minResidualSq )
                0357        ENDDO
                0358       ENDDO
                0359       lastResidual = SQRT(err_sq)
                0360       alpha = -nIterMin
                0361       _GLOBAL_MAX_RL( alpha, myThid )
                0362       nIterMin = NINT( -alpha )
                0363       alpha = numIters
                0364       _GLOBAL_MAX_RL( alpha, myThid )
                0365       numIters = NINT( alpha )
                0366       alpha = -minResidualSq
                0367       _GLOBAL_MAX_RL( alpha, myThid )
                0368       minResidualSq = -alpha
                0369
                0370       RETURN
                0371       END