Back to home page

MITgcm

 
 

    


File indexing completed on 2018-03-02 18:39:43 UTC

view on githubraw file Latest commit add29e06 on 2018-01-31 20:35:05 UTC
ca64f811ee Jean*0001 #include "CPP_EEOPTIONS.h"
                0002 #include "W2_OPTIONS.h"
                0003 
                0004 C--   File w2_cumulsum_z_tile.F: Routines that perform cumulated sum
                0005 C                                on a tiled array, corner grid-cell location
                0006 C      Contents
                0007 C      o W2_CUMULSUM_Z_TILE_RL
                0008 C      o W2_CUMULSUM_Z_TILE_RS <- not yet coded
                0009 
                0010 C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
                0011 CBOP
                0012 C     !ROUTINE: W2_CUMULSUM_Z_TILE_RL
                0013 
                0014 C     !INTERFACE:
                0015       SUBROUTINE W2_CUMULSUM_Z_TILE_RL(
                0016      O                       psiZ, psiLoc,
                0017      I                       dPsiX, dPsiY, myThid )
                0018 
                0019 C     !DESCRIPTION:
                0020 C     *==========================================================*
                0021 C     | SUBROUTINE W2\_CUMULSUM\_Z\_TILE\_RL
                0022 C     | o Handle cumulated sum for _RL tile data.
                0023 C     *==========================================================*
                0024 C     | Cumulate sum on tiled array, corner grid-cell location:
                0025 C     |  Starts from 1rst tile and, going through all tiles & all
                0026 C     |  the processes, add increment in both directions
                0027 C     *==========================================================*
                0028 
                0029 C     !USES:
                0030       IMPLICIT NONE
                0031 
                0032 C     == Global data ==
                0033 #include "SIZE.h"
                0034 #include "EEPARAMS.h"
                0035 #include "EESUPPORT.h"
                0036 #include "W2_EXCH2_SIZE.h"
936dcd2159 Jean*0037 #include "W2_EXCH2_PARAMS.h"
ca64f811ee Jean*0038 #include "W2_EXCH2_TOPOLOGY.h"
                0039 #include "CUMULSUM.h"
                0040 
                0041 C     !INPUT/OUTPUT PARAMETERS:
                0042 C     == Routine arguments ==
                0043 C     psiZ    :: results of cumulated sum, corresponds to tile South-East corner
                0044 C     psiLoc  :: cumulated sum at special locations
                0045 C     dPsiX   :: tile increment in X direction
                0046 C     dPsiY   :: tile increment in Y direction
                0047 C     myThid  :: my Thread Id. number
                0048       _RL     psiZ  (nSx,nSy)
                0049       _RL     psiLoc(2)
                0050       _RL     dPsiX (nSx,nSy)
                0051       _RL     dPsiY (nSx,nSy)
                0052       INTEGER myThid
                0053 
                0054 C     !LOCAL VARIABLES:
                0055 C     == Local variables ==
                0056 C     bi,bj   :: tile indices
                0057 C- type declaration of: loc[1,2]Buf and shareBufCS[1,2]_R8 :
                0058 C         all 4 needs to have the same length as MPI_DOUBLE_PRECISION
                0059       INTEGER bi,bj
                0060       INTEGER tN, tS
                0061       Real*8  globalBuf(3,W2_maxNbTiles)
936dcd2159 Jean*0062 #ifndef W2_CUMSUM_USE_MATRIX
                0063       Real*8 facetXYSum(2,W2_maxNbFacets)
                0064       Real*8 facet_CSum(  W2_maxNbFacets)
                0065       INTEGER fNx, fNy, nbTx, nbTy
                0066       INTEGER i, j
                0067 #endif
ca64f811ee Jean*0068 #ifdef ALLOW_USE_MPI
2790c2105f Jean*0069       INTEGER np, pId
ca64f811ee Jean*0070       INTEGER lbuf1, lbuf2, idest, itag, ready_to_receive
                0071       INTEGER istatus(MPI_STATUS_SIZE), ierr
                0072       Real*8  loc1Buf  (nSx,nSy)
                0073       Real*8  loc2Buf(2,nSx,nSy)
                0074 #endif /* ALLOW_USE_MPI */
                0075 CEOP
                0076 
                0077 C--   Initialise to zero:
                0078       psiLoc(1) = 0.
                0079       psiLoc(2) = 0.
                0080       DO tN = 1,exch2_nTiles
                0081         globalBuf(1,tN) = 0.
                0082         globalBuf(2,tN) = 0.
                0083         globalBuf(3,tN) = 0.
                0084       ENDDO
                0085 
                0086 C--   write input into shared-buffer array
                0087       DO bj = myByLo(myThid), myByHi(myThid)
                0088        DO bi = myBxLo(myThid), myBxHi(myThid)
                0089          shareBufCS2_R8(1,bi,bj) = dPsiX(bi,bj)
                0090          shareBufCS2_R8(2,bi,bj) = dPsiY(bi,bj)
                0091        ENDDO
                0092       ENDDO
                0093 
                0094 C--   Master thread cannot start until everyone is ready:
                0095       CALL BAR2( myThid )
                0096       _BEGIN_MASTER( myThid )
                0097 
                0098 #ifdef ALLOW_USE_MPI
                0099       IF ( usingMPI ) THEN
                0100 
                0101        lbuf1 = nSx*nSy
                0102        lbuf2 = 2*lbuf1
                0103        idest = 0
                0104        itag  = 0
                0105        ready_to_receive = 0
                0106 
                0107        IF ( mpiMyId.NE.0 ) THEN
                0108 
                0109 C--   All proceses except 0 wait to be polled then send local array
                0110 #ifndef DISABLE_MPI_READY_TO_RECEIVE
                0111            CALL MPI_RECV (ready_to_receive, 1, MPI_INTEGER,
                0112      &                    idest, itag, MPI_COMM_MODEL, istatus, ierr)
                0113 #endif
                0114            CALL MPI_SEND (shareBufCS2_R8, lbuf2, MPI_DOUBLE_PRECISION,
                0115      &                    idest, itag, MPI_COMM_MODEL, ierr)
                0116 
                0117 C--   All proceses except 0 receive result from process 0
                0118            CALL MPI_RECV (shareBufCS1_R8, lbuf1, MPI_DOUBLE_PRECISION,
                0119      &                    idest, itag, MPI_COMM_MODEL, istatus, ierr)
                0120 
                0121        ELSE
                0122 
                0123 C--   Process 0 polls and receives data from each process in turn
2790c2105f Jean*0124          DO np = 2, nPx*nPy
                0125            pId = np - 1
ca64f811ee Jean*0126 #ifndef DISABLE_MPI_READY_TO_RECEIVE
                0127            CALL MPI_SEND (ready_to_receive, 1, MPI_INTEGER,
2790c2105f Jean*0128      &                    pId, itag, MPI_COMM_MODEL, ierr)
ca64f811ee Jean*0129 #endif
                0130            CALL MPI_RECV (loc2Buf, lbuf2, MPI_DOUBLE_PRECISION,
2790c2105f Jean*0131      &                    pId, itag, MPI_COMM_MODEL, istatus, ierr)
ca64f811ee Jean*0132 
                0133 C--   Process 0 gathers the local arrays into a global array.
                0134            DO bj=1,nSy
                0135             DO bi=1,nSx
2790c2105f Jean*0136               tN = W2_procTileList(bi,bj,np)
ca64f811ee Jean*0137               globalBuf(1,tN) = loc2Buf(1,bi,bj)
                0138               globalBuf(2,tN) = loc2Buf(2,bi,bj)
                0139             ENDDO
                0140            ENDDO
2790c2105f Jean*0141 C-       end loop on np
ca64f811ee Jean*0142          ENDDO
                0143 
                0144 C--   end if process not 0 / else = 0
                0145        ENDIF
                0146 
                0147       ENDIF
                0148 #endif /* ALLOW_USE_MPI */
                0149 
                0150       IF ( myProcId.EQ.0 ) THEN
                0151 
                0152 C--   Process 0 fills-in its local data
                0153          DO bj=1,nSy
                0154           DO bi=1,nSx
                0155             tN = W2_myTileList(bi,bj)
                0156             globalBuf(1,tN) = shareBufCS2_R8(1,bi,bj)
                0157             globalBuf(2,tN) = shareBufCS2_R8(2,bi,bj)
                0158           ENDDO
                0159          ENDDO
                0160 
                0161 C--   Cumulate Sum over all tiles:
575f97366a Jean*0162 #ifdef W2_CUMSUM_USE_MATRIX
936dcd2159 Jean*0163 C-    Using tile x tile matrix:
ca64f811ee Jean*0164          DO tN = 1,exch2_nTiles
                0165            globalBuf(3,tN) = 0.
                0166            DO tS = 1,exch2_nTiles
                0167              globalBuf(3,tN) = globalBuf(3,tN)
                0168      &                       + W2_cumSum_tiles(1,tS,tN)*globalBuf(1,tS)
                0169      &                       + W2_cumSum_tiles(2,tS,tN)*globalBuf(2,tS)
                0170            ENDDO
                0171          ENDDO
575f97366a Jean*0172 #else /* W2_CUMSUM_USE_MATRIX */
936dcd2159 Jean*0173 C-    Cumulate per facet and then use facet x facet matrix:
                0174 
                0175          DO j=1,W2_maxNbFacets
                0176            facetXYSum(1,j) = 0
                0177            facetXYSum(2,j) = 0
                0178            facet_CSum(j) = 0
                0179          ENDDO
                0180 
                0181 C-    First within each face:
                0182          DO j=1,nFacets
                0183           fNx = facet_dims(2*j-1)
                0184           fNy = facet_dims( 2*j )
                0185           IF ( fNx*fNy .GE. 1 ) THEN
                0186            nbTx = fNx/sNx
                0187            nbTy = fNy/sNy
                0188 
                0189            DO bi=1,nbTx-1
                0190              tN = facet_owns(1,j) + bi-1
                0191              globalBuf(3,tN+1) = globalBuf(3,tN) + globalBuf(1,tN)
                0192            ENDDO
                0193            DO bj=1,nbTy-1
                0194             tS = facet_owns(1,j) - 1 + (bj-1)*nbTx
                0195             DO bi=1,nbTx
                0196              tN = tS + bi
                0197              globalBuf(3,tN+nbTx) = globalBuf(3,tN) + globalBuf(2,tN)
                0198             ENDDO
                0199            ENDDO
                0200 
                0201 C-    facet increment in X & Y dir
                0202            DO bi=1,nbTx
                0203             tN = facet_owns(1,j) + bi-1
                0204             facetXYSum(1,j) = facetXYSum(1,j) + globalBuf(1,tN)
                0205            ENDDO
                0206            DO bj=1,nbTy
                0207             tN = facet_owns(1,j) + (bj-1)*nbTx
                0208             facetXYSum(2,j) = facetXYSum(2,j) + globalBuf(2,tN)
                0209            ENDDO
                0210 
                0211           ENDIF
                0212          ENDDO
                0213 
                0214 C-    Calculate cumulated sum at facet origin using facet matrix:
                0215          DO j=1,nFacets
                0216           DO i=1,nFacets
                0217            facet_CSum(j) = facet_CSum(j)
                0218      &                   + W2_cumSum_facet(1,i,j)*facetXYSum(1,i)
                0219      &                   + W2_cumSum_facet(2,i,j)*facetXYSum(2,i)
                0220           ENDDO
                0221          ENDDO
                0222 
                0223 C-    Finally, add cumulated sum at facet origin:
                0224          DO tN = 1,exch2_nTiles
                0225           j = exch2_myFace(tN)
                0226           IF ( j.NE.0 ) THEN
                0227             globalBuf(3,tN) = globalBuf(3,tN) + facet_CSum(j)
                0228           ENDIF
                0229          ENDDO
575f97366a Jean*0230 #endif /* W2_CUMSUM_USE_MATRIX */
                0231 
ca64f811ee Jean*0232 C-    Value at Special location (e.g., Missing-Corner values)
                0233          IF ( W2_tMC1.GE.1 )
                0234      &     psiLoc(1) = globalBuf(3,W2_tMC1) + globalBuf(2,W2_tMC1)
                0235          IF ( W2_tMC2.GE.1 )
                0236      &     psiLoc(2) = globalBuf(3,W2_tMC2) + globalBuf(1,W2_tMC2)
                0237 
                0238 C--   Process 0 fills-in its local data
                0239          DO bj=1,nSy
                0240           DO bi=1,nSx
                0241             tN = W2_myTileList(bi,bj)
                0242             shareBufCS1_R8(bi,bj) = globalBuf(3,tN)
                0243           ENDDO
                0244          ENDDO
                0245 
                0246 #ifdef ALLOW_USE_MPI
                0247         IF ( usingMPI ) THEN
                0248 C--   Process 0 sends result to all other processes
2790c2105f Jean*0249          DO np = 2, nPx*nPy
                0250            pId = np - 1
ca64f811ee Jean*0251 C-    fill local array with relevant portion of global array
                0252            DO bj=1,nSy
                0253             DO bi=1,nSx
2790c2105f Jean*0254               tN = W2_procTileList(bi,bj,np)
ca64f811ee Jean*0255               loc1Buf(bi,bj) = globalBuf(3,tN)
                0256             ENDDO
                0257            ENDDO
                0258            CALL MPI_SEND (loc1Buf, lbuf1, MPI_DOUBLE_PRECISION,
2790c2105f Jean*0259      &                    pId, itag, MPI_COMM_MODEL, ierr)
ca64f811ee Jean*0260          ENDDO
                0261 
                0262         ENDIF
                0263 #endif /* ALLOW_USE_MPI */
                0264 
                0265 C--   end if process 0
                0266       ENDIF
                0267 
                0268       _END_MASTER( myThid )
                0269 C--   Everyone wait for Master thread to be ready
                0270       CALL BAR2( myThid )
                0271 
                0272 C--   set result for every threads
                0273       DO bj = myByLo(myThid), myByHi(myThid)
                0274        DO bi = myBxLo(myThid), myBxHi(myThid)
                0275          psiZ(bi,bj) = shareBufCS1_R8(bi,bj)
                0276        ENDDO
                0277       ENDDO
                0278 
                0279       RETURN
                0280       END