Back to home page

MITgcm

 
 

    


File indexing completed on 2024-09-20 05:10:17 UTC

view on githubraw file Latest commit e6e223b2 on 2024-09-19 22:01:15 UTC
88830be691 Alis*0001 #include "CPP_OPTIONS.h"
cb36328bb9 Mart*0002 #ifdef TARGET_NEC_SX
                0003 C     set a sensible default for the outer loop unrolling parameter that can
                0004 C     be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
                0005 #ifndef CG3D_OUTERLOOPITERS
                0006 # define CG3D_OUTERLOOPITERS 10
                0007 #endif
                0008 #endif /* TARGET_NEC_SX */
88830be691 Alis*0009 
9366854e02 Chri*0010 CBOP
                0011 C     !ROUTINE: CG3D
                0012 C     !INTERFACE:
0cdaaf4c8e Jean*0013       SUBROUTINE CG3D(
c11d209635 Jean*0014      U                cg3d_b, cg3d_x,
                0015      O                firstResidual, lastResidual,
ffc25bb3ad Alis*0016      U                numIters,
d540b62759 Jean*0017      I                myIter, myThid )
9366854e02 Chri*0018 C     !DESCRIPTION: \bv
                0019 C     *==========================================================*
a619b19bc9 Jean*0020 C     | SUBROUTINE CG3D
                0021 C     | o Three-dimensional grid problem conjugate-gradient
                0022 C     |   inverter (with preconditioner).
9366854e02 Chri*0023 C     *==========================================================*
a619b19bc9 Jean*0024 C     | Con. grad is an iterative procedure for solving Ax = b.
                0025 C     | It requires the A be symmetric.
                0026 C     | This implementation assumes A is a seven-diagonal
                0027 C     | matrix of the form that arises in the discrete
                0028 C     | representation of the del^2 operator in a
                0029 C     | three-dimensional space.
                0030 C     | Notes:
                0031 C     | ======
                0032 C     | This implementation can support shared-memory
                0033 C     | multi-threaded execution. In order to do this COMMON
                0034 C     | blocks are used for many of the arrays - even ones that
                0035 C     | are only used for intermedaite results. This design is
                0036 C     | OK if you want to all the threads to collaborate on
                0037 C     | solving the same problem. On the other hand if you want
                0038 C     | the threads to solve several different problems
                0039 C     | concurrently this implementation will not work.
9366854e02 Chri*0040 C     *==========================================================*
                0041 C     \ev
88830be691 Alis*0042 
9366854e02 Chri*0043 C     !USES:
                0044       IMPLICIT NONE
88830be691 Alis*0045 C     === Global data ===
                0046 #include "SIZE.h"
                0047 #include "EEPARAMS.h"
e6e223b277 Jean*0048 #ifdef ALLOW_NONHYDROSTATIC
                0049 # include "PARAMS.h"
                0050 # include "GRID.h"
                0051 # include "SURFACE.h"
                0052 # include "CG3D.h"
                0053 #endif /* ALLOW_NONHYDROSTATIC */
88830be691 Alis*0054 
9366854e02 Chri*0055 C     !INPUT/OUTPUT PARAMETERS:
c11d209635 Jean*0056 C     cg3d_b    :: The source term or "right hand side" (output: normalised RHS)
                0057 C     cg3d_x    :: The solution (input: first guess)
d540b62759 Jean*0058 C     firstResidual :: the initial residual before any iterations
c11d209635 Jean*0059 C     minResidualSq :: the lowest residual reached (squared)
                0060 CC    lastResidual  :: the actual residual reached
                0061 C     numIters  :: Inp: the maximum number of iterations allowed
                0062 C                  Out: the actual number of iterations used
                0063 CC    nIterMin  :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
                0064 CC                 Out: iteration number corresponding to lowest residual
d540b62759 Jean*0065 C     myIter    :: Current iteration number in simulation
                0066 C     myThid    :: my Thread Id number
                0067       _RL     cg3d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
                0068       _RL     cg3d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
                0069       _RL     firstResidual
                0070       _RL     lastResidual
ffc25bb3ad Alis*0071       INTEGER numIters
d540b62759 Jean*0072       INTEGER myIter
88830be691 Alis*0073       INTEGER myThid
                0074 
76c208745c Alis*0075 #ifdef ALLOW_NONHYDROSTATIC
9366854e02 Chri*0076 C     !LOCAL VARIABLES:
c11d209635 Jean*0077 C     bi, bj     :: tile index in X and Y.
d540b62759 Jean*0078 C     i, j, k    :: Loop counters
                0079 C     it3d       :: Loop counter for CG iterations
c11d209635 Jean*0080 C     actualIts  :: actual CG iteration number
                0081 C     err_sq     :: Measure of the square of the residual of Ax - b.
                0082 C     eta_qrN    :: Used in computing search directions; suffix N and NM1
                0083 C     eta_qrNM1     denote current and previous iterations respectively.
                0084 C     cgBeta     :: coeff used to update conjugate direction vector "s".
                0085 C     alpha      :: coeff used to update solution & residual
                0086 C     sumRHS     :: Sum of right-hand-side. Sometimes this is a useful
                0087 C                   debugging/trouble shooting diagnostic. For neumann problems
                0088 C                   sumRHS needs to be ~0 or it converge at a non-zero residual.
                0089 C     cg2d_min   :: used to store solution corresponding to lowest residual.
d540b62759 Jean*0090 C     msgBuf     :: Informational/error message buffer
a619b19bc9 Jean*0091       INTEGER bi, bj
d540b62759 Jean*0092       INTEGER i, j, k, it3d
c11d209635 Jean*0093       INTEGER actualIts
d540b62759 Jean*0094       INTEGER km1, kp1
                0095       _RL     maskM1, maskP1
c11d209635 Jean*0096       _RL     err_sq,  errTile(nSx,nSy)
                0097       _RL     eta_qrN, eta_qrNtile(nSx,nSy)
d540b62759 Jean*0098       _RL     eta_qrNM1
                0099       _RL     cgBeta
c11d209635 Jean*0100       _RL     alpha ,  alphaTile(nSx,nSy)
                0101       _RL     sumRHS,  sumRHStile(nSx,nSy)
d540b62759 Jean*0102       _RL     rhsMax
                0103       _RL     rhsNorm
                0104       CHARACTER*(MAX_LEN_MBUF) msgBuf
764aea3440 Jean*0105       LOGICAL printResidual
d540b62759 Jean*0106       _RL     surfFac
                0107 #ifdef NONLIN_FRSURF
                0108       INTEGER ks
                0109       _RL     surfTerm(sNx,sNy)
                0110 #endif /* NONLIN_FRSURF */
9366854e02 Chri*0111 CEOP
88830be691 Alis*0112 
c11d209635 Jean*0113 C--   Initialise auxiliary constant, some output variable
d540b62759 Jean*0114       IF ( select_rStar .NE. 0 ) THEN
                0115         surfFac = freeSurfFac
                0116       ELSE
                0117         surfFac = 0.
                0118       ENDIF
                0119 #ifdef NONLIN_FRSURF
                0120       DO j=1,sNy
                0121        DO i=1,sNx
                0122          surfTerm(i,j) = 0.
                0123        ENDDO
                0124       ENDDO
                0125 #endif /* NONLIN_FRSURF */
6d54cf9ca1 Ed H*0126 
88830be691 Alis*0127 C--   Initialise inverter
d540b62759 Jean*0128       eta_qrNM1 = 1. _d 0
88830be691 Alis*0129 
                0130 C--   Normalise RHS
                0131       rhsMax = 0. _d 0
                0132       DO bj=myByLo(myThid),myByHi(myThid)
                0133        DO bi=myBxLo(myThid),myBxHi(myThid)
d540b62759 Jean*0134         DO k=1,Nr
                0135          DO j=1,sNy
                0136           DO i=1,sNx
                0137            cg3d_b(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)*cg3dNorm
                0138      &                          * maskC(i,j,k,bi,bj)
                0139            rhsMax = MAX(ABS(cg3d_b(i,j,k,bi,bj)),rhsMax)
88830be691 Alis*0140           ENDDO
                0141          ENDDO
                0142         ENDDO
                0143        ENDDO
                0144       ENDDO
e6e223b277 Jean*0145 
                0146       IF ( cg3dNormaliseRHS ) THEN
                0147 C-  Normalise RHS :
                0148        _GLOBAL_MAX_RL( rhsMax, myThid )
                0149        rhsNorm = 1. _d 0
                0150        IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
                0151        DO bj=myByLo(myThid),myByHi(myThid)
                0152         DO bi=myBxLo(myThid),myBxHi(myThid)
                0153          DO k=1,Nr
                0154           DO j=1,sNy
                0155            DO i=1,sNx
                0156             cg3d_b(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)*rhsNorm
                0157             cg3d_x(i,j,k,bi,bj) = cg3d_x(i,j,k,bi,bj)*rhsNorm
                0158            ENDDO
88830be691 Alis*0159           ENDDO
                0160          ENDDO
                0161         ENDDO
                0162        ENDDO
e6e223b277 Jean*0163 C- end Normalise RHS
                0164       ENDIF
88830be691 Alis*0165 
                0166 C--   Update overlaps
12c8b75709 Jean*0167       _EXCH_XYZ_RL( cg3d_x, myThid )
88830be691 Alis*0168 
d2f5c0b29f Jean*0169 C--   Initial residual calculation (with free-Surface term)
88830be691 Alis*0170       DO bj=myByLo(myThid),myByHi(myThid)
                0171        DO bi=myBxLo(myThid),myBxHi(myThid)
a619b19bc9 Jean*0172         errTile(bi,bj)    = 0. _d 0
                0173         sumRHStile(bi,bj) = 0. _d 0
d540b62759 Jean*0174 #ifdef NONLIN_FRSURF
                0175         IF ( select_rStar .NE. 0 ) THEN
                0176           DO j=1,sNy
                0177            DO i=1,sNx
                0178              surfTerm(i,j) = 0.
                0179            ENDDO
                0180           ENDDO
                0181           DO k=1,Nr
                0182            DO j=1,sNy
                0183             DO i=1,sNx
                0184              surfTerm(i,j) = surfTerm(i,j)
                0185      &         +cg3d_x(i,j,k,bi,bj)*drF(k)*h0FacC(i,j,k,bi,bj)
                0186             ENDDO
                0187            ENDDO
                0188           ENDDO
                0189           DO j=1,sNy
                0190            DO i=1,sNx
764aea3440 Jean*0191              ks = kSurfC(i,j,bi,bj)
d540b62759 Jean*0192              surfTerm(i,j) = surfTerm(i,j)*cg3dNorm
                0193      &              *recip_Rcol(i,j,bi,bj)*recip_Rcol(i,j,bi,bj)
                0194      &              *rA(i,j,bi,bj)*deepFac2F(ks)
e6e223b277 Jean*0195      &              *recip_Bo(i,j,bi,bj)/deltaTMom/deltaTFreeSurf
d540b62759 Jean*0196            ENDDO
                0197           ENDDO
                0198         ENDIF
                0199 #endif /* NONLIN_FRSURF */
                0200         DO k=1,Nr
                0201          km1 = MAX(k-1, 1 )
                0202          kp1 = MIN(k+1, Nr)
e4f5a8bbfa Jean*0203          maskM1 = 1. _d 0
                0204          maskP1 = 1. _d 0
d540b62759 Jean*0205          IF ( k .EQ. 1 ) maskM1 = 0. _d 0
                0206          IF ( k .EQ. Nr) maskP1 = 0. _d 0
cb36328bb9 Mart*0207 #ifdef TARGET_NEC_SX
                0208 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0209 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0210          DO j=1,sNy
                0211           DO i=1,sNx
                0212            cg3d_r(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)
                0213      &     -( 0.
                0214      &       +aW3d( i, j, k, bi,bj)*cg3d_x(i-1,j, k, bi,bj)
                0215      &       +aW3d(i+1,j, k, bi,bj)*cg3d_x(i+1,j, k, bi,bj)
                0216      &       +aS3d( i, j, k, bi,bj)*cg3d_x( i,j-1,k, bi,bj)
                0217      &       +aS3d( i,j+1,k, bi,bj)*cg3d_x( i,j+1,k, bi,bj)
                0218      &       +aV3d( i, j, k, bi,bj)*cg3d_x( i, j,km1,bi,bj)*maskM1
                0219      &       +aV3d( i, j,kp1,bi,bj)*cg3d_x( i, j,kp1,bi,bj)*maskP1
                0220      &       +aC3d( i, j, k, bi,bj)*cg3d_x( i, j, k, bi,bj)
                0221 #ifdef NONLIN_FRSURF
                0222      &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
                0223 #endif /* NONLIN_FRSURF */
                0224      &      )
a619b19bc9 Jean*0225            errTile(bi,bj) = errTile(bi,bj)
d540b62759 Jean*0226      &                    +cg3d_r(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
                0227            sumRHStile(bi,bj) = sumRHStile(bi,bj)+cg3d_b(i,j,k,bi,bj)
88830be691 Alis*0228           ENDDO
                0229          ENDDO
c11d209635 Jean*0230          DO j=0,sNy+1
                0231           DO i=0,sNx+1
d540b62759 Jean*0232            cg3d_s(i,j,k,bi,bj) = 0.
4905195236 Jean*0233           ENDDO
                0234          ENDDO
88830be691 Alis*0235         ENDDO
                0236        ENDDO
                0237       ENDDO
0cdaaf4c8e Jean*0238        CALL EXCH_S3D_RL( cg3d_r, Nr, myThid )
a619b19bc9 Jean*0239        CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
c11d209635 Jean*0240        CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq, myThid )
764aea3440 Jean*0241       IF ( debugLevel.GE.debLevC .AND. diagFreq.GT.0. ) THEN
debbfce040 Jean*0242         CALL WRITE_FLD_S3D_RL(
d540b62759 Jean*0243      I        'cg3d_r_I', 'I10', 1, Nr, cg3d_r, myIter, myThid )
                0244       ENDIF
0cdaaf4c8e Jean*0245 
c11d209635 Jean*0246       actualIts = 0
                0247       firstResidual = SQRT(err_sq)
                0248 
764aea3440 Jean*0249       printResidual = .FALSE.
eb7553bdcd Jean*0250       IF ( debugLevel .GE. debLevZero ) THEN
                0251         _BEGIN_MASTER( myThid )
764aea3440 Jean*0252         printResidual = printResidualFreq.GE.1
e6e223b277 Jean*0253         WRITE(standardMessageUnit,'(A,1P2E22.14)')
ffc25bb3ad Alis*0254      &     ' cg3d: Sum(rhs),rhsMax = ',sumRHS,rhsMax
eb7553bdcd Jean*0255         _END_MASTER( myThid )
                0256       ENDIF
88830be691 Alis*0257 
c11d209635 Jean*0258       IF ( err_sq .LT. cg3dTolerance_sq ) GOTO 11
88830be691 Alis*0259 
                0260 C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
e4f5a8bbfa Jean*0261       DO 10 it3d=1, numIters
88830be691 Alis*0262 
                0263 C--    Solve preconditioning equation and update
                0264 C--    conjugate direction vector "s".
cb36328bb9 Mart*0265 C      Note. On the next two loops over all tiles the inner loop ranges
a619b19bc9 Jean*0266 C            in sNx and sNy are expanded by 1 to avoid a communication
                0267 C            step. However this entails a bit of gynamastics because we only
980a7d9757 Jean*0268 C            want eta_qrN for the interior points.
88830be691 Alis*0269        DO bj=myByLo(myThid),myByHi(myThid)
                0270         DO bi=myBxLo(myThid),myBxHi(myThid)
a619b19bc9 Jean*0271          eta_qrNtile(bi,bj) = 0. _d 0
d540b62759 Jean*0272          DO k=1,1
cb36328bb9 Mart*0273 #ifdef TARGET_NEC_SX
                0274 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0275 #endif /* TARGET_NEC_SX */
c11d209635 Jean*0276           DO j=0,sNy+1
                0277            DO i=0,sNx+1
d540b62759 Jean*0278             cg3d_q(i,j,k,bi,bj) = zMC(i,j,k,bi,bj)
                0279      &                        *cg3d_r(i,j,k,bi,bj)
88830be691 Alis*0280            ENDDO
                0281           ENDDO
                0282          ENDDO
d540b62759 Jean*0283          DO k=2,Nr
cb36328bb9 Mart*0284 #ifdef TARGET_NEC_SX
                0285 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0286 #endif /* TARGET_NEC_SX */
c11d209635 Jean*0287           DO j=0,sNy+1
                0288            DO i=0,sNx+1
d540b62759 Jean*0289             cg3d_q(i,j,k,bi,bj) = zMC(i,j,k,bi,bj)
                0290      &                      *( cg3d_r(i,j,k,bi,bj)
                0291      &                         -zML(i,j,k,bi,bj)*cg3d_q(i,j,k-1,bi,bj)
                0292      &                       )
88830be691 Alis*0293            ENDDO
                0294           ENDDO
                0295          ENDDO
d540b62759 Jean*0296          DO k=Nr,Nr
cb36328bb9 Mart*0297 #ifdef TARGET_NEC_SX
                0298 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0299 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0300           DO j=1,sNy
                0301            DO i=1,sNx
a619b19bc9 Jean*0302             eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
d540b62759 Jean*0303      &                        +cg3d_q(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
88830be691 Alis*0304            ENDDO
                0305           ENDDO
                0306          ENDDO
d540b62759 Jean*0307          DO k=Nr-1,1,-1
cb36328bb9 Mart*0308 #ifdef TARGET_NEC_SX
                0309 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0310 #endif /* TARGET_NEC_SX */
c11d209635 Jean*0311           DO j=0,sNy+1
                0312            DO i=0,sNx+1
d540b62759 Jean*0313             cg3d_q(i,j,k,bi,bj) = cg3d_q(i,j,k,bi,bj)
                0314      &                         -zMU(i,j,k,bi,bj)*cg3d_q(i,j,k+1,bi,bj)
88830be691 Alis*0315            ENDDO
                0316           ENDDO
cb36328bb9 Mart*0317 #ifdef TARGET_NEC_SX
                0318 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0319 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0320           DO j=1,sNy
                0321            DO i=1,sNx
a619b19bc9 Jean*0322             eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
d540b62759 Jean*0323      &                        +cg3d_q(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
88830be691 Alis*0324            ENDDO
                0325           ENDDO
                0326          ENDDO
                0327         ENDDO
                0328        ENDDO
                0329 
a619b19bc9 Jean*0330        CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
980a7d9757 Jean*0331        cgBeta   = eta_qrN/eta_qrNM1
88830be691 Alis*0332 CcnhDebugStarts
c11d209635 Jean*0333 c      WRITE(*,*) ' CG3D: Iteration ', it3d-1,
                0334 c    &            ' eta_qrN=', eta_qrN, ' beta=', cgBeta
88830be691 Alis*0335 CcnhDebugEnds
980a7d9757 Jean*0336        eta_qrNM1 = eta_qrN
88830be691 Alis*0337 
                0338        DO bj=myByLo(myThid),myByHi(myThid)
                0339         DO bi=myBxLo(myThid),myBxHi(myThid)
d540b62759 Jean*0340          DO k=1,Nr
c11d209635 Jean*0341           DO j=0,sNy+1
                0342            DO i=0,sNx+1
d540b62759 Jean*0343             cg3d_s(i,j,k,bi,bj) = cg3d_q(i,j,k,bi,bj)
                0344      &                   + cgBeta*cg3d_s(i,j,k,bi,bj)
88830be691 Alis*0345            ENDDO
                0346           ENDDO
                0347          ENDDO
                0348         ENDDO
                0349        ENDDO
                0350 
                0351 C==    Evaluate laplace operator on conjugate gradient vector
                0352 C==    q = A.s
                0353        DO bj=myByLo(myThid),myByHi(myThid)
                0354         DO bi=myBxLo(myThid),myBxHi(myThid)
a619b19bc9 Jean*0355          alphaTile(bi,bj) = 0. _d 0
d540b62759 Jean*0356 #ifdef NONLIN_FRSURF
                0357          IF ( select_rStar .NE. 0 ) THEN
                0358           DO j=1,sNy
                0359            DO i=1,sNx
                0360              surfTerm(i,j) = 0.
                0361            ENDDO
                0362           ENDDO
                0363           DO k=1,Nr
                0364            DO j=1,sNy
                0365             DO i=1,sNx
                0366              surfTerm(i,j) = surfTerm(i,j)
                0367      &         +cg3d_s(i,j,k,bi,bj)*drF(k)*h0FacC(i,j,k,bi,bj)
88830be691 Alis*0368             ENDDO
                0369            ENDDO
                0370           ENDDO
d540b62759 Jean*0371           DO j=1,sNy
                0372            DO i=1,sNx
764aea3440 Jean*0373              ks = kSurfC(i,j,bi,bj)
d540b62759 Jean*0374              surfTerm(i,j) = surfTerm(i,j)*cg3dNorm
                0375      &              *recip_Rcol(i,j,bi,bj)*recip_Rcol(i,j,bi,bj)
                0376      &              *rA(i,j,bi,bj)*deepFac2F(ks)
e6e223b277 Jean*0377      &              *recip_Bo(i,j,bi,bj)/deltaTMom/deltaTFreeSurf
d540b62759 Jean*0378            ENDDO
                0379           ENDDO
                0380          ENDIF
                0381 #endif /* NONLIN_FRSURF */
                0382          IF ( Nr .GT. 1 ) THEN
                0383           k=1
cb36328bb9 Mart*0384 #ifdef TARGET_NEC_SX
                0385 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0386 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0387           DO j=1,sNy
                0388            DO i=1,sNx
                0389             cg3d_q(i,j,k,bi,bj) =
                0390      &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
                0391      &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
                0392      &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
                0393      &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
                0394      &       +aV3d( i, j,k+1,bi,bj)*cg3d_s( i, j,k+1,bi,bj)
                0395      &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
                0396 #ifdef NONLIN_FRSURF
                0397      &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
                0398 #endif /* NONLIN_FRSURF */
                0399             alphaTile(bi,bj) = alphaTile(bi,bj)
                0400      &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
                0401            ENDDO
                0402           ENDDO
88830be691 Alis*0403          ELSE
d540b62759 Jean*0404           k=1
cb36328bb9 Mart*0405 #ifdef TARGET_NEC_SX
                0406 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0407 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0408           DO j=1,sNy
                0409            DO i=1,sNx
                0410             cg3d_q(i,j,k,bi,bj) =
                0411      &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
                0412      &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
                0413      &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
                0414      &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
                0415      &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
                0416 #ifdef NONLIN_FRSURF
                0417      &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
                0418 #endif /* NONLIN_FRSURF */
                0419             alphaTile(bi,bj) = alphaTile(bi,bj)
                0420      &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
88830be691 Alis*0421            ENDDO
                0422           ENDDO
                0423          ENDIF
d540b62759 Jean*0424          DO k=2,Nr-1
cb36328bb9 Mart*0425 #ifdef TARGET_NEC_SX
                0426 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0427 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0428           DO j=1,sNy
                0429            DO i=1,sNx
                0430             cg3d_q(i,j,k,bi,bj) =
                0431      &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
                0432      &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
                0433      &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
                0434      &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
                0435      &       +aV3d( i, j, k, bi,bj)*cg3d_s( i, j,k-1,bi,bj)
                0436      &       +aV3d( i, j,k+1,bi,bj)*cg3d_s( i, j,k+1,bi,bj)
                0437      &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
                0438 #ifdef NONLIN_FRSURF
                0439      &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
                0440 #endif /* NONLIN_FRSURF */
a619b19bc9 Jean*0441             alphaTile(bi,bj) = alphaTile(bi,bj)
d540b62759 Jean*0442      &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
88830be691 Alis*0443            ENDDO
                0444           ENDDO
                0445          ENDDO
                0446          IF ( Nr .GT. 1 ) THEN
d540b62759 Jean*0447           k=Nr
cb36328bb9 Mart*0448 #ifdef TARGET_NEC_SX
                0449 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0450 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0451           DO j=1,sNy
                0452            DO i=1,sNx
                0453             cg3d_q(i,j,k,bi,bj) =
                0454      &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
                0455      &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
                0456      &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
                0457      &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
                0458      &       +aV3d( i, j, k, bi,bj)*cg3d_s( i, j,k-1,bi,bj)
                0459      &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
                0460 #ifdef NONLIN_FRSURF
                0461      &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
                0462 #endif /* NONLIN_FRSURF */
                0463             alphaTile(bi,bj) = alphaTile(bi,bj)
                0464      &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
88830be691 Alis*0465            ENDDO
                0466           ENDDO
                0467          ENDIF
                0468         ENDDO
                0469        ENDDO
a619b19bc9 Jean*0470        CALL GLOBAL_SUM_TILE_RL( alphaTile,  alpha,  myThid )
88830be691 Alis*0471 CcnhDebugStarts
c11d209635 Jean*0472 c      WRITE(*,*) ' CG3D: Iteration ', it3d-1,
                0473 c    &            ' SUM(s*q)=', alpha, ' alpha=', eta_qrN/alpha
88830be691 Alis*0474 CcnhDebugEnds
980a7d9757 Jean*0475        alpha = eta_qrN/alpha
a619b19bc9 Jean*0476 
c11d209635 Jean*0477 C==    Update simultaneously solution and residual vectors (and Iter number)
88830be691 Alis*0478 C      Now compute "interior" points.
                0479        DO bj=myByLo(myThid),myByHi(myThid)
                0480         DO bi=myBxLo(myThid),myBxHi(myThid)
d540b62759 Jean*0481          errTile(bi,bj) = 0. _d 0
                0482          DO k=1,Nr
cb36328bb9 Mart*0483 #ifdef TARGET_NEC_SX
                0484 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0485 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0486           DO j=1,sNy
                0487            DO i=1,sNx
                0488             cg3d_x(i,j,k,bi,bj)=cg3d_x(i,j,k,bi,bj)
                0489      &            +alpha*cg3d_s(i,j,k,bi,bj)
                0490             cg3d_r(i,j,k,bi,bj)=cg3d_r(i,j,k,bi,bj)
                0491      &            -alpha*cg3d_q(i,j,k,bi,bj)
a619b19bc9 Jean*0492             errTile(bi,bj) = errTile(bi,bj)
d540b62759 Jean*0493      &            +cg3d_r(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
88830be691 Alis*0494            ENDDO
                0495           ENDDO
                0496          ENDDO
                0497         ENDDO
                0498        ENDDO
c11d209635 Jean*0499        actualIts = it3d
88830be691 Alis*0500 
c11d209635 Jean*0501        CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq, myThid )
764aea3440 Jean*0502        IF ( printResidual ) THEN
                0503         IF ( MOD( it3d-1, printResidualFreq ).EQ.0 ) THEN
d540b62759 Jean*0504          WRITE(msgBuf,'(A,I6,A,1PE21.14)')
c11d209635 Jean*0505      &    ' cg3d: iter=', it3d, ' ; resid.= ', SQRT(err_sq)
d540b62759 Jean*0506          CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
                0507      &                       SQUEEZE_RIGHT, myThid )
764aea3440 Jean*0508         ENDIF
d540b62759 Jean*0509        ENDIF
c11d209635 Jean*0510        IF ( err_sq .LT. cg3dTolerance_sq ) GOTO 11
0cdaaf4c8e Jean*0511        CALL EXCH_S3D_RL( cg3d_r, Nr, myThid )
88830be691 Alis*0512 
                0513    10 CONTINUE
                0514    11 CONTINUE
                0515 
764aea3440 Jean*0516       IF ( debugLevel.GE.debLevC .AND. diagFreq.GT.0. ) THEN
debbfce040 Jean*0517         CALL WRITE_FLD_S3D_RL(
d540b62759 Jean*0518      I        'cg3d_r_F', 'I10', 1, Nr, cg3d_r, myIter, myThid )
                0519       ENDIF
                0520 
e6e223b277 Jean*0521       IF ( cg3dNormaliseRHS ) THEN
88830be691 Alis*0522 C--   Un-normalise the answer
e6e223b277 Jean*0523 c      rhsMax = 1. _d 0 / rhsNorm
                0524        DO bj=myByLo(myThid),myByHi(myThid)
                0525         DO bi=myBxLo(myThid),myBxHi(myThid)
                0526          DO k=1,Nr
                0527           DO j=1,sNy
                0528            DO i=1,sNx
                0529             cg3d_x(i,j,k,bi,bj) = cg3d_x(i,j,k,bi,bj)/rhsNorm
                0530 c           cg3d_x(i,j,k,bi,bj) = cg3d_x(i,j,k,bi,bj)*rhsMax
                0531            ENDDO
88830be691 Alis*0532           ENDDO
                0533          ENDDO
                0534         ENDDO
                0535        ENDDO
e6e223b277 Jean*0536       ENDIF
88830be691 Alis*0537 
c11d209635 Jean*0538 C--   Return parameters to caller
                0539       lastResidual = SQRT(err_sq)
d540b62759 Jean*0540       numIters = actualIts
88830be691 Alis*0541 
                0542 #endif /* ALLOW_NONHYDROSTATIC */
                0543 
                0544       RETURN
                0545       END