Back to home page

MITgcm

 
 

    


File indexing completed on 2018-03-02 18:36:32 UTC

view on githubraw file Latest commit add29e06 on 2018-01-31 20:35:05 UTC
88830be691 Alis*0001 #include "CPP_OPTIONS.h"
cb36328bb9 Mart*0002 #ifdef TARGET_NEC_SX
                0003 C     set a sensible default for the outer loop unrolling parameter that can
                0004 C     be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
                0005 #ifndef CG3D_OUTERLOOPITERS
                0006 # define CG3D_OUTERLOOPITERS 10
                0007 #endif
                0008 #endif /* TARGET_NEC_SX */
88830be691 Alis*0009 
9366854e02 Chri*0010 CBOP
                0011 C     !ROUTINE: CG3D
                0012 C     !INTERFACE:
0cdaaf4c8e Jean*0013       SUBROUTINE CG3D(
c11d209635 Jean*0014      U                cg3d_b, cg3d_x,
                0015      O                firstResidual, lastResidual,
ffc25bb3ad Alis*0016      U                numIters,
d540b62759 Jean*0017      I                myIter, myThid )
9366854e02 Chri*0018 C     !DESCRIPTION: \bv
                0019 C     *==========================================================*
a619b19bc9 Jean*0020 C     | SUBROUTINE CG3D
                0021 C     | o Three-dimensional grid problem conjugate-gradient
                0022 C     |   inverter (with preconditioner).
9366854e02 Chri*0023 C     *==========================================================*
a619b19bc9 Jean*0024 C     | Con. grad is an iterative procedure for solving Ax = b.
                0025 C     | It requires the A be symmetric.
                0026 C     | This implementation assumes A is a seven-diagonal
                0027 C     | matrix of the form that arises in the discrete
                0028 C     | representation of the del^2 operator in a
                0029 C     | three-dimensional space.
                0030 C     | Notes:
                0031 C     | ======
                0032 C     | This implementation can support shared-memory
                0033 C     | multi-threaded execution. In order to do this COMMON
                0034 C     | blocks are used for many of the arrays - even ones that
                0035 C     | are only used for intermedaite results. This design is
                0036 C     | OK if you want to all the threads to collaborate on
                0037 C     | solving the same problem. On the other hand if you want
                0038 C     | the threads to solve several different problems
                0039 C     | concurrently this implementation will not work.
9366854e02 Chri*0040 C     *==========================================================*
                0041 C     \ev
88830be691 Alis*0042 
9366854e02 Chri*0043 C     !USES:
                0044       IMPLICIT NONE
88830be691 Alis*0045 C     === Global data ===
                0046 #include "SIZE.h"
                0047 #include "EEPARAMS.h"
                0048 #include "PARAMS.h"
                0049 #include "GRID.h"
d540b62759 Jean*0050 #include "SURFACE.h"
88830be691 Alis*0051 #include "CG3D.h"
                0052 
9366854e02 Chri*0053 C     !INPUT/OUTPUT PARAMETERS:
88830be691 Alis*0054 C     === Routine arguments ===
c11d209635 Jean*0055 C     cg3d_b    :: The source term or "right hand side" (output: normalised RHS)
                0056 C     cg3d_x    :: The solution (input: first guess)
d540b62759 Jean*0057 C     firstResidual :: the initial residual before any iterations
c11d209635 Jean*0058 C     minResidualSq :: the lowest residual reached (squared)
                0059 CC    lastResidual  :: the actual residual reached
                0060 C     numIters  :: Inp: the maximum number of iterations allowed
                0061 C                  Out: the actual number of iterations used
                0062 CC    nIterMin  :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
                0063 CC                 Out: iteration number corresponding to lowest residual
d540b62759 Jean*0064 C     myIter    :: Current iteration number in simulation
                0065 C     myThid    :: my Thread Id number
                0066       _RL     cg3d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
                0067       _RL     cg3d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
                0068       _RL     firstResidual
                0069       _RL     lastResidual
ffc25bb3ad Alis*0070       INTEGER numIters
d540b62759 Jean*0071       INTEGER myIter
88830be691 Alis*0072       INTEGER myThid
                0073 
76c208745c Alis*0074 #ifdef ALLOW_NONHYDROSTATIC
                0075 
9366854e02 Chri*0076 C     !LOCAL VARIABLES:
88830be691 Alis*0077 C     === Local variables ====
c11d209635 Jean*0078 C     bi, bj     :: tile index in X and Y.
d540b62759 Jean*0079 C     i, j, k    :: Loop counters
                0080 C     it3d       :: Loop counter for CG iterations
c11d209635 Jean*0081 C     actualIts  :: actual CG iteration number
                0082 C     err_sq     :: Measure of the square of the residual of Ax - b.
                0083 C     eta_qrN    :: Used in computing search directions; suffix N and NM1
                0084 C     eta_qrNM1     denote current and previous iterations respectively.
                0085 C     cgBeta     :: coeff used to update conjugate direction vector "s".
                0086 C     alpha      :: coeff used to update solution & residual
                0087 C     sumRHS     :: Sum of right-hand-side. Sometimes this is a useful
                0088 C                   debugging/trouble shooting diagnostic. For neumann problems
                0089 C                   sumRHS needs to be ~0 or it converge at a non-zero residual.
                0090 C     cg2d_min   :: used to store solution corresponding to lowest residual.
d540b62759 Jean*0091 C     msgBuf     :: Informational/error message buffer
a619b19bc9 Jean*0092       INTEGER bi, bj
d540b62759 Jean*0093       INTEGER i, j, k, it3d
c11d209635 Jean*0094       INTEGER actualIts
d540b62759 Jean*0095       INTEGER km1, kp1
                0096       _RL     maskM1, maskP1
c11d209635 Jean*0097       _RL     cg3dTolerance_sq
                0098       _RL     err_sq,  errTile(nSx,nSy)
                0099       _RL     eta_qrN, eta_qrNtile(nSx,nSy)
d540b62759 Jean*0100       _RL     eta_qrNM1
                0101       _RL     cgBeta
c11d209635 Jean*0102       _RL     alpha ,  alphaTile(nSx,nSy)
                0103       _RL     sumRHS,  sumRHStile(nSx,nSy)
d540b62759 Jean*0104       _RL     rhsMax
                0105       _RL     rhsNorm
                0106       CHARACTER*(MAX_LEN_MBUF) msgBuf
764aea3440 Jean*0107       LOGICAL printResidual
d540b62759 Jean*0108       _RL     surfFac
                0109 #ifdef NONLIN_FRSURF
                0110       INTEGER ks
                0111       _RL     surfTerm(sNx,sNy)
                0112 #endif /* NONLIN_FRSURF */
9366854e02 Chri*0113 CEOP
88830be691 Alis*0114 
c11d209635 Jean*0115 C--   Initialise auxiliary constant, some output variable
                0116       cg3dTolerance_sq = cg3dTargetResidual*cg3dTargetResidual
d540b62759 Jean*0117       IF ( select_rStar .NE. 0 ) THEN
                0118         surfFac = freeSurfFac
                0119       ELSE
                0120         surfFac = 0.
                0121       ENDIF
                0122 #ifdef NONLIN_FRSURF
                0123       DO j=1,sNy
                0124        DO i=1,sNx
                0125          surfTerm(i,j) = 0.
                0126        ENDDO
                0127       ENDDO
                0128 #endif /* NONLIN_FRSURF */
6d54cf9ca1 Ed H*0129 
88830be691 Alis*0130 C--   Initialise inverter
d540b62759 Jean*0131       eta_qrNM1 = 1. _d 0
88830be691 Alis*0132 
                0133 C--   Normalise RHS
                0134       rhsMax = 0. _d 0
                0135       DO bj=myByLo(myThid),myByHi(myThid)
                0136        DO bi=myBxLo(myThid),myBxHi(myThid)
d540b62759 Jean*0137         DO k=1,Nr
                0138          DO j=1,sNy
                0139           DO i=1,sNx
                0140            cg3d_b(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)*cg3dNorm
                0141      &                          * maskC(i,j,k,bi,bj)
                0142            rhsMax = MAX(ABS(cg3d_b(i,j,k,bi,bj)),rhsMax)
88830be691 Alis*0143           ENDDO
                0144          ENDDO
                0145         ENDDO
                0146        ENDDO
                0147       ENDDO
12c8b75709 Jean*0148       _GLOBAL_MAX_RL( rhsMax, myThid )
88830be691 Alis*0149       rhsNorm = 1. _d 0
                0150       IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
                0151       DO bj=myByLo(myThid),myByHi(myThid)
                0152        DO bi=myBxLo(myThid),myBxHi(myThid)
d540b62759 Jean*0153         DO k=1,Nr
                0154          DO j=1,sNy
                0155           DO i=1,sNx
                0156            cg3d_b(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)*rhsNorm
                0157            cg3d_x(i,j,k,bi,bj) = cg3d_x(i,j,k,bi,bj)*rhsNorm
88830be691 Alis*0158           ENDDO
                0159          ENDDO
                0160         ENDDO
                0161        ENDDO
                0162       ENDDO
                0163 
                0164 C--   Update overlaps
12c8b75709 Jean*0165       _EXCH_XYZ_RL( cg3d_x, myThid )
88830be691 Alis*0166 
d2f5c0b29f Jean*0167 C--   Initial residual calculation (with free-Surface term)
88830be691 Alis*0168       DO bj=myByLo(myThid),myByHi(myThid)
                0169        DO bi=myBxLo(myThid),myBxHi(myThid)
a619b19bc9 Jean*0170         errTile(bi,bj)    = 0. _d 0
                0171         sumRHStile(bi,bj) = 0. _d 0
d540b62759 Jean*0172 #ifdef NONLIN_FRSURF
                0173         IF ( select_rStar .NE. 0 ) THEN
                0174           DO j=1,sNy
                0175            DO i=1,sNx
                0176              surfTerm(i,j) = 0.
                0177            ENDDO
                0178           ENDDO
                0179           DO k=1,Nr
                0180            DO j=1,sNy
                0181             DO i=1,sNx
                0182              surfTerm(i,j) = surfTerm(i,j)
                0183      &         +cg3d_x(i,j,k,bi,bj)*drF(k)*h0FacC(i,j,k,bi,bj)
                0184             ENDDO
                0185            ENDDO
                0186           ENDDO
                0187           DO j=1,sNy
                0188            DO i=1,sNx
764aea3440 Jean*0189              ks = kSurfC(i,j,bi,bj)
d540b62759 Jean*0190              surfTerm(i,j) = surfTerm(i,j)*cg3dNorm
                0191      &              *recip_Rcol(i,j,bi,bj)*recip_Rcol(i,j,bi,bj)
                0192      &              *rA(i,j,bi,bj)*deepFac2F(ks)
                0193      &              *recip_Bo(i,j,bi,bj)/deltaTMom/deltaTfreesurf
                0194            ENDDO
                0195           ENDDO
                0196         ENDIF
                0197 #endif /* NONLIN_FRSURF */
                0198         DO k=1,Nr
                0199          km1 = MAX(k-1, 1 )
                0200          kp1 = MIN(k+1, Nr)
e4f5a8bbfa Jean*0201          maskM1 = 1. _d 0
                0202          maskP1 = 1. _d 0
d540b62759 Jean*0203          IF ( k .EQ. 1 ) maskM1 = 0. _d 0
                0204          IF ( k .EQ. Nr) maskP1 = 0. _d 0
cb36328bb9 Mart*0205 #ifdef TARGET_NEC_SX
                0206 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0207 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0208          DO j=1,sNy
                0209           DO i=1,sNx
                0210            cg3d_r(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)
                0211      &     -( 0.
                0212      &       +aW3d( i, j, k, bi,bj)*cg3d_x(i-1,j, k, bi,bj)
                0213      &       +aW3d(i+1,j, k, bi,bj)*cg3d_x(i+1,j, k, bi,bj)
                0214      &       +aS3d( i, j, k, bi,bj)*cg3d_x( i,j-1,k, bi,bj)
                0215      &       +aS3d( i,j+1,k, bi,bj)*cg3d_x( i,j+1,k, bi,bj)
                0216      &       +aV3d( i, j, k, bi,bj)*cg3d_x( i, j,km1,bi,bj)*maskM1
                0217      &       +aV3d( i, j,kp1,bi,bj)*cg3d_x( i, j,kp1,bi,bj)*maskP1
                0218      &       +aC3d( i, j, k, bi,bj)*cg3d_x( i, j, k, bi,bj)
                0219 #ifdef NONLIN_FRSURF
                0220      &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
                0221 #endif /* NONLIN_FRSURF */
                0222      &      )
a619b19bc9 Jean*0223            errTile(bi,bj) = errTile(bi,bj)
d540b62759 Jean*0224      &                    +cg3d_r(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
                0225            sumRHStile(bi,bj) = sumRHStile(bi,bj)+cg3d_b(i,j,k,bi,bj)
88830be691 Alis*0226           ENDDO
                0227          ENDDO
c11d209635 Jean*0228          DO j=0,sNy+1
                0229           DO i=0,sNx+1
d540b62759 Jean*0230            cg3d_s(i,j,k,bi,bj) = 0.
4905195236 Jean*0231           ENDDO
                0232          ENDDO
88830be691 Alis*0233         ENDDO
                0234        ENDDO
                0235       ENDDO
0cdaaf4c8e Jean*0236        CALL EXCH_S3D_RL( cg3d_r, Nr, myThid )
a619b19bc9 Jean*0237        CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
c11d209635 Jean*0238        CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq, myThid )
764aea3440 Jean*0239       IF ( debugLevel.GE.debLevC .AND. diagFreq.GT.0. ) THEN
debbfce040 Jean*0240         CALL WRITE_FLD_S3D_RL(
d540b62759 Jean*0241      I        'cg3d_r_I', 'I10', 1, Nr, cg3d_r, myIter, myThid )
                0242       ENDIF
0cdaaf4c8e Jean*0243 
c11d209635 Jean*0244       actualIts = 0
                0245       firstResidual = SQRT(err_sq)
                0246 
764aea3440 Jean*0247       printResidual = .FALSE.
eb7553bdcd Jean*0248       IF ( debugLevel .GE. debLevZero ) THEN
                0249         _BEGIN_MASTER( myThid )
764aea3440 Jean*0250         printResidual = printResidualFreq.GE.1
d540b62759 Jean*0251         WRITE(standardmessageunit,'(A,1P2E22.14)')
ffc25bb3ad Alis*0252      &     ' cg3d: Sum(rhs),rhsMax = ',sumRHS,rhsMax
eb7553bdcd Jean*0253         _END_MASTER( myThid )
                0254       ENDIF
88830be691 Alis*0255 
c11d209635 Jean*0256       IF ( err_sq .LT. cg3dTolerance_sq ) GOTO 11
88830be691 Alis*0257 
                0258 C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
e4f5a8bbfa Jean*0259       DO 10 it3d=1, numIters
88830be691 Alis*0260 
                0261 C--    Solve preconditioning equation and update
                0262 C--    conjugate direction vector "s".
cb36328bb9 Mart*0263 C      Note. On the next two loops over all tiles the inner loop ranges
a619b19bc9 Jean*0264 C            in sNx and sNy are expanded by 1 to avoid a communication
                0265 C            step. However this entails a bit of gynamastics because we only
980a7d9757 Jean*0266 C            want eta_qrN for the interior points.
88830be691 Alis*0267        DO bj=myByLo(myThid),myByHi(myThid)
                0268         DO bi=myBxLo(myThid),myBxHi(myThid)
a619b19bc9 Jean*0269          eta_qrNtile(bi,bj) = 0. _d 0
d540b62759 Jean*0270          DO k=1,1
cb36328bb9 Mart*0271 #ifdef TARGET_NEC_SX
                0272 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0273 #endif /* TARGET_NEC_SX */
c11d209635 Jean*0274           DO j=0,sNy+1
                0275            DO i=0,sNx+1
d540b62759 Jean*0276             cg3d_q(i,j,k,bi,bj) = zMC(i,j,k,bi,bj)
                0277      &                        *cg3d_r(i,j,k,bi,bj)
88830be691 Alis*0278            ENDDO
                0279           ENDDO
                0280          ENDDO
d540b62759 Jean*0281          DO k=2,Nr
cb36328bb9 Mart*0282 #ifdef TARGET_NEC_SX
                0283 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0284 #endif /* TARGET_NEC_SX */
c11d209635 Jean*0285           DO j=0,sNy+1
                0286            DO i=0,sNx+1
d540b62759 Jean*0287             cg3d_q(i,j,k,bi,bj) = zMC(i,j,k,bi,bj)
                0288      &                      *( cg3d_r(i,j,k,bi,bj)
                0289      &                         -zML(i,j,k,bi,bj)*cg3d_q(i,j,k-1,bi,bj)
                0290      &                       )
88830be691 Alis*0291            ENDDO
                0292           ENDDO
                0293          ENDDO
d540b62759 Jean*0294          DO k=Nr,Nr
cb36328bb9 Mart*0295 #ifdef TARGET_NEC_SX
                0296 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0297 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0298           DO j=1,sNy
                0299            DO i=1,sNx
a619b19bc9 Jean*0300             eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
d540b62759 Jean*0301      &                        +cg3d_q(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
88830be691 Alis*0302            ENDDO
                0303           ENDDO
                0304          ENDDO
d540b62759 Jean*0305          DO k=Nr-1,1,-1
cb36328bb9 Mart*0306 #ifdef TARGET_NEC_SX
                0307 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0308 #endif /* TARGET_NEC_SX */
c11d209635 Jean*0309           DO j=0,sNy+1
                0310            DO i=0,sNx+1
d540b62759 Jean*0311             cg3d_q(i,j,k,bi,bj) = cg3d_q(i,j,k,bi,bj)
                0312      &                         -zMU(i,j,k,bi,bj)*cg3d_q(i,j,k+1,bi,bj)
88830be691 Alis*0313            ENDDO
                0314           ENDDO
cb36328bb9 Mart*0315 #ifdef TARGET_NEC_SX
                0316 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0317 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0318           DO j=1,sNy
                0319            DO i=1,sNx
a619b19bc9 Jean*0320             eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
d540b62759 Jean*0321      &                        +cg3d_q(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
88830be691 Alis*0322            ENDDO
                0323           ENDDO
                0324          ENDDO
                0325         ENDDO
                0326        ENDDO
                0327 
a619b19bc9 Jean*0328        CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
980a7d9757 Jean*0329        cgBeta   = eta_qrN/eta_qrNM1
88830be691 Alis*0330 CcnhDebugStarts
c11d209635 Jean*0331 c      WRITE(*,*) ' CG3D: Iteration ', it3d-1,
                0332 c    &            ' eta_qrN=', eta_qrN, ' beta=', cgBeta
88830be691 Alis*0333 CcnhDebugEnds
980a7d9757 Jean*0334        eta_qrNM1 = eta_qrN
88830be691 Alis*0335 
                0336        DO bj=myByLo(myThid),myByHi(myThid)
                0337         DO bi=myBxLo(myThid),myBxHi(myThid)
d540b62759 Jean*0338          DO k=1,Nr
c11d209635 Jean*0339           DO j=0,sNy+1
                0340            DO i=0,sNx+1
d540b62759 Jean*0341             cg3d_s(i,j,k,bi,bj) = cg3d_q(i,j,k,bi,bj)
                0342      &                   + cgBeta*cg3d_s(i,j,k,bi,bj)
88830be691 Alis*0343            ENDDO
                0344           ENDDO
                0345          ENDDO
                0346         ENDDO
                0347        ENDDO
                0348 
                0349 C==    Evaluate laplace operator on conjugate gradient vector
                0350 C==    q = A.s
                0351        DO bj=myByLo(myThid),myByHi(myThid)
                0352         DO bi=myBxLo(myThid),myBxHi(myThid)
a619b19bc9 Jean*0353          alphaTile(bi,bj) = 0. _d 0
d540b62759 Jean*0354 #ifdef NONLIN_FRSURF
                0355          IF ( select_rStar .NE. 0 ) THEN
                0356           DO j=1,sNy
                0357            DO i=1,sNx
                0358              surfTerm(i,j) = 0.
                0359            ENDDO
                0360           ENDDO
                0361           DO k=1,Nr
                0362            DO j=1,sNy
                0363             DO i=1,sNx
                0364              surfTerm(i,j) = surfTerm(i,j)
                0365      &         +cg3d_s(i,j,k,bi,bj)*drF(k)*h0FacC(i,j,k,bi,bj)
88830be691 Alis*0366             ENDDO
                0367            ENDDO
                0368           ENDDO
d540b62759 Jean*0369           DO j=1,sNy
                0370            DO i=1,sNx
764aea3440 Jean*0371              ks = kSurfC(i,j,bi,bj)
d540b62759 Jean*0372              surfTerm(i,j) = surfTerm(i,j)*cg3dNorm
                0373      &              *recip_Rcol(i,j,bi,bj)*recip_Rcol(i,j,bi,bj)
                0374      &              *rA(i,j,bi,bj)*deepFac2F(ks)
                0375      &              *recip_Bo(i,j,bi,bj)/deltaTMom/deltaTfreesurf
                0376            ENDDO
                0377           ENDDO
                0378          ENDIF
                0379 #endif /* NONLIN_FRSURF */
                0380          IF ( Nr .GT. 1 ) THEN
                0381           k=1
cb36328bb9 Mart*0382 #ifdef TARGET_NEC_SX
                0383 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0384 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0385           DO j=1,sNy
                0386            DO i=1,sNx
                0387             cg3d_q(i,j,k,bi,bj) =
                0388      &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
                0389      &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
                0390      &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
                0391      &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
                0392      &       +aV3d( i, j,k+1,bi,bj)*cg3d_s( i, j,k+1,bi,bj)
                0393      &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
                0394 #ifdef NONLIN_FRSURF
                0395      &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
                0396 #endif /* NONLIN_FRSURF */
                0397             alphaTile(bi,bj) = alphaTile(bi,bj)
                0398      &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
                0399            ENDDO
                0400           ENDDO
88830be691 Alis*0401          ELSE
d540b62759 Jean*0402           k=1
cb36328bb9 Mart*0403 #ifdef TARGET_NEC_SX
                0404 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0405 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0406           DO j=1,sNy
                0407            DO i=1,sNx
                0408             cg3d_q(i,j,k,bi,bj) =
                0409      &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
                0410      &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
                0411      &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
                0412      &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
                0413      &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
                0414 #ifdef NONLIN_FRSURF
                0415      &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
                0416 #endif /* NONLIN_FRSURF */
                0417             alphaTile(bi,bj) = alphaTile(bi,bj)
                0418      &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
88830be691 Alis*0419            ENDDO
                0420           ENDDO
                0421          ENDIF
d540b62759 Jean*0422          DO k=2,Nr-1
cb36328bb9 Mart*0423 #ifdef TARGET_NEC_SX
                0424 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0425 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0426           DO j=1,sNy
                0427            DO i=1,sNx
                0428             cg3d_q(i,j,k,bi,bj) =
                0429      &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
                0430      &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
                0431      &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
                0432      &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
                0433      &       +aV3d( i, j, k, bi,bj)*cg3d_s( i, j,k-1,bi,bj)
                0434      &       +aV3d( i, j,k+1,bi,bj)*cg3d_s( i, j,k+1,bi,bj)
                0435      &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
                0436 #ifdef NONLIN_FRSURF
                0437      &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
                0438 #endif /* NONLIN_FRSURF */
a619b19bc9 Jean*0439             alphaTile(bi,bj) = alphaTile(bi,bj)
d540b62759 Jean*0440      &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
88830be691 Alis*0441            ENDDO
                0442           ENDDO
                0443          ENDDO
                0444          IF ( Nr .GT. 1 ) THEN
d540b62759 Jean*0445           k=Nr
cb36328bb9 Mart*0446 #ifdef TARGET_NEC_SX
                0447 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0448 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0449           DO j=1,sNy
                0450            DO i=1,sNx
                0451             cg3d_q(i,j,k,bi,bj) =
                0452      &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
                0453      &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
                0454      &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
                0455      &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
                0456      &       +aV3d( i, j, k, bi,bj)*cg3d_s( i, j,k-1,bi,bj)
                0457      &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
                0458 #ifdef NONLIN_FRSURF
                0459      &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
                0460 #endif /* NONLIN_FRSURF */
                0461             alphaTile(bi,bj) = alphaTile(bi,bj)
                0462      &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
88830be691 Alis*0463            ENDDO
                0464           ENDDO
                0465          ENDIF
                0466         ENDDO
                0467        ENDDO
a619b19bc9 Jean*0468        CALL GLOBAL_SUM_TILE_RL( alphaTile,  alpha,  myThid )
88830be691 Alis*0469 CcnhDebugStarts
c11d209635 Jean*0470 c      WRITE(*,*) ' CG3D: Iteration ', it3d-1,
                0471 c    &            ' SUM(s*q)=', alpha, ' alpha=', eta_qrN/alpha
88830be691 Alis*0472 CcnhDebugEnds
980a7d9757 Jean*0473        alpha = eta_qrN/alpha
a619b19bc9 Jean*0474 
c11d209635 Jean*0475 C==    Update simultaneously solution and residual vectors (and Iter number)
88830be691 Alis*0476 C      Now compute "interior" points.
                0477        DO bj=myByLo(myThid),myByHi(myThid)
                0478         DO bi=myBxLo(myThid),myBxHi(myThid)
d540b62759 Jean*0479          errTile(bi,bj) = 0. _d 0
                0480          DO k=1,Nr
cb36328bb9 Mart*0481 #ifdef TARGET_NEC_SX
                0482 !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
                0483 #endif /* TARGET_NEC_SX */
d540b62759 Jean*0484           DO j=1,sNy
                0485            DO i=1,sNx
                0486             cg3d_x(i,j,k,bi,bj)=cg3d_x(i,j,k,bi,bj)
                0487      &            +alpha*cg3d_s(i,j,k,bi,bj)
                0488             cg3d_r(i,j,k,bi,bj)=cg3d_r(i,j,k,bi,bj)
                0489      &            -alpha*cg3d_q(i,j,k,bi,bj)
a619b19bc9 Jean*0490             errTile(bi,bj) = errTile(bi,bj)
d540b62759 Jean*0491      &            +cg3d_r(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
88830be691 Alis*0492            ENDDO
                0493           ENDDO
                0494          ENDDO
                0495         ENDDO
                0496        ENDDO
c11d209635 Jean*0497        actualIts = it3d
88830be691 Alis*0498 
c11d209635 Jean*0499        CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq, myThid )
764aea3440 Jean*0500        IF ( printResidual ) THEN
                0501         IF ( MOD( it3d-1, printResidualFreq ).EQ.0 ) THEN
d540b62759 Jean*0502          WRITE(msgBuf,'(A,I6,A,1PE21.14)')
c11d209635 Jean*0503      &    ' cg3d: iter=', it3d, ' ; resid.= ', SQRT(err_sq)
d540b62759 Jean*0504          CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
                0505      &                       SQUEEZE_RIGHT, myThid )
764aea3440 Jean*0506         ENDIF
d540b62759 Jean*0507        ENDIF
c11d209635 Jean*0508        IF ( err_sq .LT. cg3dTolerance_sq ) GOTO 11
0cdaaf4c8e Jean*0509        CALL EXCH_S3D_RL( cg3d_r, Nr, myThid )
88830be691 Alis*0510 
                0511    10 CONTINUE
                0512    11 CONTINUE
                0513 
764aea3440 Jean*0514       IF ( debugLevel.GE.debLevC .AND. diagFreq.GT.0. ) THEN
debbfce040 Jean*0515         CALL WRITE_FLD_S3D_RL(
d540b62759 Jean*0516      I        'cg3d_r_F', 'I10', 1, Nr, cg3d_r, myIter, myThid )
                0517       ENDIF
                0518 
88830be691 Alis*0519 C--   Un-normalise the answer
                0520       DO bj=myByLo(myThid),myByHi(myThid)
                0521        DO bi=myBxLo(myThid),myBxHi(myThid)
d540b62759 Jean*0522         DO k=1,Nr
                0523          DO j=1,sNy
                0524           DO i=1,sNx
                0525            cg3d_x(i,j,k,bi,bj) = cg3d_x(i,j,k,bi,bj)/rhsNorm
88830be691 Alis*0526           ENDDO
                0527          ENDDO
                0528         ENDDO
                0529        ENDDO
                0530       ENDDO
                0531 
c11d209635 Jean*0532 C--   Return parameters to caller
                0533       lastResidual = SQRT(err_sq)
d540b62759 Jean*0534       numIters = actualIts
88830be691 Alis*0535 
                0536 #endif /* ALLOW_NONHYDROSTATIC */
                0537 
                0538       RETURN
                0539       END