Back to home page

MITgcm

 
 

    


File indexing completed on 2024-08-29 05:10:34 UTC

view on githubraw file Latest commit 5237154b on 2024-08-28 14:56:27 UTC
5237154b93 Jean*0001 #include "CPP_EEOPTIONS.h"
                0002 
                0003 C--   File global_sum_vector.F: Routines that perform global sum
                0004 C                               on a tile array, vector field
                0005 C--    Contents
                0006 C--    o GLB_SUM_VEC           - Global-Sum vector from Master thread
                0007 C--    o GLOBAL_SUM_VECTOR_RL  - Global-Sum vector (input: nSx,nSy,nVec)
                0008 C--    o GLOBAL_SUM_VECTOR_RS  - Global-Sum vector (input: nSx,nSy,nVec)
                0009 C--    o GLOBAL_SUM_VEC_ALT_RL - Global-Sum vector (input: nDim,nSx,nSy)
                0010 C--    o GLOBAL_SUM_VEC_ALT_RS - Global-Sum vector (input: nDim,nSx,nSy)
                0011 C--    o GLOBAL_SUM_VECTOR_INT - Global-Sum vector (input: nSx,nSy,nVec)
                0012 
                0013 C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
                0014 CBOP
                0015 C     !ROUTINE: GLB_SUM_VEC
                0016 
                0017 C     !INTERFACE:
                0018       SUBROUTINE GLB_SUM_VEC(
                0019      I                        nVec, tiledVec,
                0020      O                        sumVec,
                0021      I                        myThid )
                0022 
                0023 C     !DESCRIPTION:
                0024 C     *==========================================================*
                0025 C     | SUBROUTINE GLB\_SUM\_VEC
                0026 C     | o Handle global sum from master thread
                0027 C     *==========================================================*
                0028 C     | Apply sum on a shared array of tiled vector
                0029 C     |  and operate over all tiles & all the processes.
                0030 C     *==========================================================*
                0031 
                0032 C     !USES:
                0033       IMPLICIT NONE
                0034 
                0035 C     == Global data ==
                0036 #include "SIZE.h"
                0037 #include "EEPARAMS.h"
                0038 #include "EESUPPORT.h"
                0039 
                0040 C     !INPUT/OUTPUT PARAMETERS:
                0041 C     nVec     :: length of input/output vector
                0042 C     tiledVec :: Input tiled vector (dims: nSx,nSy,nVec)
                0043 C     sumVec   :: Result of sum over all tiles & procs
                0044 C     myThid   :: My thread id.
                0045       INTEGER nVec
                0046       Real*8  tiledVec(nSx,nSy,nVec)
                0047       Real*8  sumVec(nVec)
                0048       INTEGER myThid
                0049 
                0050 C     !LOCAL VARIABLES:
                0051 C     n, bi,bj :: Loop counters
                0052 C     mpiRC    :: MPI return code
                0053 C     msgBuf   :: Informational/error message buffer
                0054 C- type declaration of: sumMyPr, sumVec, localSum and globalSum
                0055 C          all 4 needs to have the same length as MPI_DOUBLE_PRECISION
                0056       INTEGER n, bi, bj
                0057 #ifdef ALLOW_USE_MPI
                0058       INTEGER mpiRC
                0059 # ifdef GLOBAL_SUM_ORDER_TILES
                0060       INTEGER biG, bjG, fullLength
                0061       Real*8  localSum (nSx*nPx,nSy*nPy,GSVec_size)
                0062       Real*8  globalSum(nSx*nPx,nSy*nPy,GSVec_size)
                0063 # endif /* GLOBAL_SUM_ORDER_TILES */
                0064 #endif /* ALLOW_USE_MPI */
                0065       Real*8  sumMyPr(GSVec_size)
                0066       CHARACTER*(MAX_LEN_MBUF) msgBuf
                0067 CEOP
                0068 
                0069       IF ( nVec.GT.GSVec_size ) THEN
                0070         WRITE(msgBuf,'(A,2(A,I7))') 'GLB_SUM_VEC: ',
                0071      &       'nVec=', nVec, ' > GSVec_size=', GSVec_size
                0072         CALL PRINT_ERROR( msgBuf, myThid )
                0073         WRITE(msgBuf,'(2A)') 'GLB_SUM_VEC: ',
                0074      &       'increase "GSVec_size" in EEPARAMS.h and recompile'
                0075         CALL PRINT_ERROR( msgBuf, myThid )
                0076         STOP 'ABNORMAL END: S/R GLB_SUM_VEC'
                0077       ENDIF
                0078 
                0079       _BEGIN_MASTER( myThid )
                0080 
                0081 #if ( defined GLOBAL_SUM_ORDER_TILES && defined ALLOW_USE_MPI )
                0082       IF ( usingMPI ) THEN
                0083 
                0084 C--   Initialise local buffer
                0085        DO n = 1,nVec
                0086         DO bjG=1,nSy*nPy
                0087          DO biG=1,nSx*nPx
                0088           localSum(biG,bjG,n) = 0.
                0089          ENDDO
                0090         ENDDO
                0091        ENDDO
                0092 
                0093 C--   Put my own data in local buffer
                0094        DO bj=1,nSy
                0095         bjG = (myYGlobalLo-1)/sNy+bj
                0096         DO bi=1,nSx
                0097          biG = (myXGlobalLo-1)/sNx+bi
                0098          DO n = 1,nVec
                0099           localSum(biG,bjG,n) = tiledVec(bi,bj,n)
                0100          ENDDO
                0101         ENDDO
                0102        ENDDO
                0103 
                0104 C--   Collect data from all procs
                0105        fullLength = nSx*nPx*nSy*nPy*nVec
                0106        CALL MPI_Allreduce( localSum, globalSum, fullLength,
                0107      &          MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_MODEL, mpiRC )
                0108 
                0109 C--   Sum over all tiles:
                0110        DO n = 1,nVec
                0111         sumVec(n) = 0.
                0112        ENDDO
                0113        DO bjG = 1,nSy*nPy
                0114         DO biG = 1,nSx*nPx
                0115          DO n = 1,nVec
                0116           sumVec(n) = sumVec(n) + globalSum(biG,bjG,n)
                0117          ENDDO
                0118         ENDDO
                0119        ENDDO
                0120 
                0121       ELSE
                0122 #else /* not (GLOBAL_SUM_ORDER_TILES & ALLOW_USE_MPI) */
                0123       IF ( .TRUE. ) THEN
                0124 #endif /* not (GLOBAL_SUM_ORDER_TILES & ALLOW_USE_MPI) */
                0125 
                0126 C--   Sum over all tiles (of the same process) first
                0127        DO n = 1,nVec
                0128         sumMyPr(n) = 0.
                0129        ENDDO
                0130        DO bj = 1,nSy
                0131         DO bi = 1,nSx
                0132          DO n = 1,nVec
                0133           sumMyPr(n) = sumMyPr(n) + tiledVec(bi,bj,n)
                0134          ENDDO
                0135         ENDDO
                0136        ENDDO
                0137 
                0138 #ifdef ALLOW_USE_MPI
                0139        IF ( usingMPI ) THEN
                0140 C--   sum over all procs and put result into shared buffer:
                0141         CALL MPI_Allreduce( sumMyPr, sumVec, nVec,
                0142      &           MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_MODEL, mpiRC )
                0143        ELSE
                0144 #else /* ALLOW_USE_MPI */
                0145        IF ( .TRUE. ) THEN
                0146 #endif /* ALLOW_USE_MPI */
                0147 C--   in case MPI is not used, put result directly in shared buffer:
                0148         DO n = 1,nVec
                0149          sumVec(n) = sumMyPr(n)
                0150         ENDDO
                0151        ENDIF
                0152 
                0153       ENDIF
                0154 
                0155       _END_MASTER( myThid )
                0156 
                0157       RETURN
                0158       END
                0159 
                0160 C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
                0161 CBOP
                0162 C     !ROUTINE: GLOBAL_SUM_VECTOR_RL
                0163 
                0164 C     !INTERFACE:
                0165       SUBROUTINE GLOBAL_SUM_VECTOR_RL(
                0166      I                       nVec, tiledVec,
                0167      O                       sumVec,
                0168      I                       myThid )
                0169 
                0170 C     !DESCRIPTION:
                0171 C     *==========================================================*
                0172 C     | SUBROUTINE GLOBAL\_SUM\_VECTOR\_RL
                0173 C     | o Handle sum for _RL vector.
                0174 C     *==========================================================*
                0175 C     | Apply sum on an array of tiled vector (input: nSx,nSy,nVec)
                0176 C     |  and operate over all tiles & all the processes.
                0177 C     | Oversize vector is cut and processed in "nCuts" pieces
                0178 C     |  that fit in buffer
                0179 C     *==========================================================*
                0180 
                0181 C     !USES:
                0182       IMPLICIT NONE
                0183 
                0184 C     == Global data ==
                0185 #include "SIZE.h"
                0186 #include "EEPARAMS.h"
                0187 #include "GLOBAL_SUM.h"
                0188 
                0189 C     !INPUT/OUTPUT PARAMETERS:
                0190 C     nVec     :: length of input/output vector
                0191 C     tiledVec :: Input tiled vector (dims: nSx,nSy,nVec)
                0192 C     sumVec   :: Result of sum over all tiles, threads & procs
                0193 C     myThid   :: My thread id.
                0194       INTEGER nVec
                0195       _RL     tiledVec(nSx,nSy,nVec)
                0196       _RL     sumVec(nVec)
                0197       INTEGER myThid
                0198 
                0199 C     !LOCAL VARIABLES:
                0200 C     bi, bj   :: tile indices
                0201 C     n, j, j0 :: Loop counters
                0202 C     nCuts    :: range of outside loop
                0203 C     nLoc     :: range of inner loop
                0204       INTEGER bi, bj
                0205       INTEGER n, nCuts
                0206       INTEGER j, j0, nLoc
                0207 CEOP
                0208 
                0209 C--   starts outside loop over number of "cuts", each fitting into buffer size
                0210       nCuts = 1 + INT( (nVec-1)/GSVec_size )
                0211       DO n = 1,nCuts
                0212        j0 = (n-1)*GSVec_size
                0213        nLoc = MIN( n*GSVec_size, nVec ) - j0
                0214 
                0215 C--   write local sum into shared-buffer array
                0216        DO j = 1,nLoc
                0217         DO bj = myByLo(myThid), myByHi(myThid)
                0218          DO bi = myBxLo(myThid), myBxHi(myThid)
                0219           shareBufGSVec(bi,bj,j) = tiledVec(bi,bj,j+j0)
                0220          ENDDO
                0221         ENDDO
                0222        ENDDO
                0223 
                0224 C--   Master thread cannot start until everyone is ready:
                0225        CALL BAR2( myThid )
                0226        _BEGIN_MASTER( myThid )
                0227 
                0228 C--   From master-thread, do the global-sum on shared-buffer
                0229        CALL GLB_SUM_VEC(
                0230      I                   nLoc, shareBufGSVec,
                0231      O                   shareGSVector,
                0232      I                   myThid )
                0233 
                0234        _END_MASTER( myThid )
                0235 C--   Everyone wait for Master thread to be ready
                0236        CALL BAR2( myThid )
                0237 
                0238 C--   set result for every threads
                0239        DO j=1,nLoc
                0240         sumVec(j+j0) = shareGSVector(j)
                0241        ENDDO
                0242 
                0243 C--   end outside do-loop over nCuts
                0244       ENDDO
                0245 
                0246       RETURN
                0247       END
                0248 
                0249 C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
                0250 CBOP
                0251 C     !ROUTINE: GLOBAL_SUM_VECTOR_RS
                0252 
                0253 C     !INTERFACE:
                0254       SUBROUTINE GLOBAL_SUM_VECTOR_RS(
                0255      I                       nVec, tiledVec,
                0256      O                       sumVec,
                0257      I                       myThid )
                0258 
                0259 C     !DESCRIPTION:
                0260 C     *==========================================================*
                0261 C     | SUBROUTINE GLOBAL\_SUM\_VECTOR\_RS
                0262 C     | o Handle sum for _RS vector.
                0263 C     *==========================================================*
                0264 C     | Apply sum on an array of tiled vector (input: nSx,nSy,nVec)
                0265 C     |  and operate over all tiles & all the processes.
                0266 C     | Oversize vector is cut and processed in "nCuts" pieces
                0267 C     |  that fit in buffer
                0268 C     *==========================================================*
                0269 
                0270 C     !USES:
                0271       IMPLICIT NONE
                0272 
                0273 C     == Global data ==
                0274 #include "SIZE.h"
                0275 #include "EEPARAMS.h"
                0276 #include "GLOBAL_SUM.h"
                0277 
                0278 C     !INPUT/OUTPUT PARAMETERS:
                0279 C     nVec     :: length of input/output vector
                0280 C     tiledVec :: Input tiled vector (dims: nSx,nSy,nVec)
                0281 C     sumVec   :: Result of sum over all tiles, threads & procs
                0282 C     myThid   :: My thread id.
                0283       INTEGER nVec
                0284       _RS     tiledVec(nSx,nSy,nVec)
                0285       _RS     sumVec(nVec)
                0286       INTEGER myThid
                0287 
                0288 C     !LOCAL VARIABLES:
                0289 C     bi, bj   :: tile indices
                0290 C     n, j, j0 :: Loop counters
                0291 C     nCuts    :: range of outside loop
                0292 C     nLoc     :: range of inner loop
                0293       INTEGER bi, bj
                0294       INTEGER n, nCuts
                0295       INTEGER j, j0, nLoc
                0296 CEOP
                0297 
                0298 C--   starts outside loop over number of "cuts", each fitting into buffer size
                0299       nCuts = 1 + INT( (nVec-1)/GSVec_size )
                0300       DO n = 1,nCuts
                0301        j0 = (n-1)*GSVec_size
                0302        nLoc = MIN( n*GSVec_size, nVec ) - j0
                0303 
                0304 C--   write local sum into shared-buffer array (+ convert _RS to r8)
                0305        DO j = 1,nLoc
                0306         DO bj = myByLo(myThid), myByHi(myThid)
                0307          DO bi = myBxLo(myThid), myBxHi(myThid)
                0308           shareBufGSVec(bi,bj,j) = tiledVec(bi,bj,j+j0)
                0309          ENDDO
                0310         ENDDO
                0311        ENDDO
                0312 
                0313 C--   Master thread cannot start until everyone is ready:
                0314        CALL BAR2( myThid )
                0315        _BEGIN_MASTER( myThid )
                0316 
                0317 C--   From master-thread, do the global-sum on shared-buffer
                0318        CALL GLB_SUM_VEC(
                0319      I                   nLoc, shareBufGSVec,
                0320      O                   shareGSVector,
                0321      I                   myThid )
                0322 
                0323        _END_MASTER( myThid )
                0324 C--   Everyone wait for Master thread to be ready
                0325        CALL BAR2( myThid )
                0326 
                0327 C--   set result for every threads (+ convert back r8 to _RS)
                0328        DO j=1,nLoc
                0329         sumVec(j+j0) = shareGSVector(j)
                0330        ENDDO
                0331 
                0332 C--   end outside do-loop over nCuts
                0333       ENDDO
                0334 
                0335       RETURN
                0336       END
                0337 
                0338 C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
                0339 CBOP
                0340 C     !ROUTINE: GLOBAL_SUM_VEC_ALT_RL
                0341 
                0342 C     !INTERFACE:
                0343       SUBROUTINE GLOBAL_SUM_VEC_ALT_RL(
                0344      I                       nDim, nVec, vecTile,
                0345      O                       vecSum,
                0346      I                       myThid )
                0347 
                0348 C     !DESCRIPTION:
                0349 C     *==========================================================*
                0350 C     | SUBROUTINE GLOBAL\_SUM\_VEC\_ALT\_RL
                0351 C     | o Handle sum for _RL vector.
                0352 C     *==========================================================*
                0353 C     | Apply sum on an array of tiled vector (input: nDim,nSx,nSy)
                0354 C     |  and operate over all tiles & all the processes.
                0355 C     | Oversize vector is cut and processed in "nCuts" pieces
                0356 C     |  that fit in buffer
                0357 C     *==========================================================*
                0358 
                0359 C     !USES:
                0360       IMPLICIT NONE
                0361 
                0362 C     == Global data ==
                0363 #include "SIZE.h"
                0364 #include "EEPARAMS.h"
                0365 #include "GLOBAL_SUM.h"
                0366 
                0367 C     !INPUT/OUTPUT PARAMETERS:
                0368 C     nDim     :: dimension of input vector
                0369 C     nVec     :: length of input/output vector to process
                0370 C     vecTile  :: Input tiled vector (dims: nDim,nSx,nSy)
                0371 C     vecSum   :: Result of sum over all tiles, threads & procs
                0372 C     myThid   :: My thread id.
                0373       INTEGER nDim, nVec
                0374       _RL     vecTile(nDim,nSx,nSy)
                0375       _RL     vecSum(nVec)
                0376       INTEGER myThid
                0377 
                0378 C     !LOCAL VARIABLES:
                0379 C     bi, bj   :: tile indices
                0380 C     n, j, j0 :: Loop counters
                0381 C     nCuts    :: range of outside loop
                0382 C     nLoc     :: range of inner loop
                0383 C     msgBuf   :: Informational/error message buffer
                0384       INTEGER bi, bj
                0385       INTEGER n, nCuts
                0386       INTEGER j, j0, nLoc
                0387       CHARACTER*(MAX_LEN_MBUF) msgBuf
                0388 CEOP
                0389 
                0390       IF ( nVec.GT.nDim ) THEN
                0391        WRITE(msgBuf,'(A,2(A,I7))') 'GLOBAL_SUM_VEC_ALT_RL: ',
                0392      &      'nVec=', nVec, ' > nDim =', nDim
                0393        CALL PRINT_ERROR( msgBuf, myThid )
                0394        STOP 'ABNORMAL END: S/R GLOBAL_SUM_VEC_ALT_RL'
                0395       ENDIF
                0396 
                0397 C--   starts outside loop over number of "cuts", each fitting into buffer size
                0398       nCuts = 1 + INT( (nVec-1)/GSVec_size )
                0399       DO n = 1,nCuts
                0400        j0 = (n-1)*GSVec_size
                0401        nLoc = MIN( n*GSVec_size, nVec ) - j0
                0402 
                0403 C--   write local sum into shared-buffer array
                0404        DO j = 1,nLoc
                0405         DO bj = myByLo(myThid), myByHi(myThid)
                0406          DO bi = myBxLo(myThid), myBxHi(myThid)
                0407           shareBufGSVec(bi,bj,j) = vecTile(j+j0,bi,bj)
                0408          ENDDO
                0409         ENDDO
                0410        ENDDO
                0411 
                0412 C--   Master thread cannot start until everyone is ready:
                0413        CALL BAR2( myThid )
                0414        _BEGIN_MASTER( myThid )
                0415 
                0416 C--   From master-thread, do the global-sum on shared-buffer
                0417        CALL GLB_SUM_VEC(
                0418      I                   nLoc, shareBufGSVec,
                0419      O                   shareGSVector,
                0420      I                   myThid )
                0421 
                0422        _END_MASTER( myThid )
                0423 C--   Everyone wait for Master thread to be ready
                0424        CALL BAR2( myThid )
                0425 
                0426 C--   set result for every threads
                0427        DO j=1,nLoc
                0428         vecSum(j+j0) = shareGSVector(j)
                0429        ENDDO
                0430 
                0431 C--   end outside do-loop over nCuts
                0432       ENDDO
                0433 
                0434       RETURN
                0435       END
                0436 
                0437 C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
                0438 CBOP
                0439 C     !ROUTINE: GLOBAL_SUM_VEC_ALT_RS
                0440 
                0441 C     !INTERFACE:
                0442       SUBROUTINE GLOBAL_SUM_VEC_ALT_RS(
                0443      I                       nDim, nVec, vecTile,
                0444      O                       vecSum,
                0445      I                       myThid )
                0446 
                0447 C     !DESCRIPTION:
                0448 C     *==========================================================*
                0449 C     | SUBROUTINE GLOBAL\_SUM\_VEC\_ALT\_RS
                0450 C     | o Handle sum for _RS vector.
                0451 C     *==========================================================*
                0452 C     | Apply sum on an array of tiled vector (input: nDim,nSx,nSy)
                0453 C     |  and operate over all tiles & all the processes.
                0454 C     | Oversize vector is cut and processed in "nCuts" pieces
                0455 C     |  that fit in buffer
                0456 C     *==========================================================*
                0457 
                0458 C     !USES:
                0459       IMPLICIT NONE
                0460 
                0461 C     == Global data ==
                0462 #include "SIZE.h"
                0463 #include "EEPARAMS.h"
                0464 #include "GLOBAL_SUM.h"
                0465 
                0466 C     !INPUT/OUTPUT PARAMETERS:
                0467 C     nDim     :: dimension of input vector
                0468 C     nVec     :: length of input/output vector to process
                0469 C     vecTile  :: Input tiled vector (dims: nDim,nSx,nSy)
                0470 C     vecSum   :: Result of sum over all tiles, threads & procs
                0471 C     myThid   :: My thread id.
                0472       INTEGER nDim, nVec
                0473       _RS     vecTile(nDim,nSx,nSy)
                0474       _RS     vecSum(nVec)
                0475       INTEGER myThid
                0476 
                0477 C     !LOCAL VARIABLES:
                0478 C     bi, bj   :: tile indices
                0479 C     n, j, j0 :: Loop counters
                0480 C     nCuts    :: range of outside loop
                0481 C     nLoc     :: range of inner loop
                0482 C     msgBuf   :: Informational/error message buffer
                0483       INTEGER bi, bj
                0484       INTEGER n, nCuts
                0485       INTEGER j, j0, nLoc
                0486       CHARACTER*(MAX_LEN_MBUF) msgBuf
                0487 CEOP
                0488 
                0489       IF ( nVec.GT.nDim ) THEN
                0490        WRITE(msgBuf,'(A,2(A,I7))') 'GLOBAL_SUM_VEC_ALT_RS: ',
                0491      &      'nVec=', nVec, ' > nDim =', nDim
                0492        CALL PRINT_ERROR( msgBuf, myThid )
                0493        STOP 'ABNORMAL END: S/R GLOBAL_SUM_VEC_ALT_RS'
                0494       ENDIF
                0495 
                0496 C--   starts outside loop over number of "cuts", each fitting into buffer size
                0497       nCuts = 1 + INT( (nVec-1)/GSVec_size )
                0498       DO n = 1,nCuts
                0499        j0 = (n-1)*GSVec_size
                0500        nLoc = MIN( n*GSVec_size, nVec ) - j0
                0501 
                0502 C--   write local sum into shared-buffer array (+ convert _RS to r8)
                0503        DO j = 1,nLoc
                0504         DO bj = myByLo(myThid), myByHi(myThid)
                0505          DO bi = myBxLo(myThid), myBxHi(myThid)
                0506           shareBufGSVec(bi,bj,j) = vecTile(j+j0,bi,bj)
                0507          ENDDO
                0508         ENDDO
                0509        ENDDO
                0510 
                0511 C--   Master thread cannot start until everyone is ready:
                0512        CALL BAR2( myThid )
                0513        _BEGIN_MASTER( myThid )
                0514 
                0515 C--   From master-thread, do the global-sum on shared-buffer
                0516        CALL GLB_SUM_VEC(
                0517      I                   nLoc, shareBufGSVec,
                0518      O                   shareGSVector,
                0519      I                   myThid )
                0520 
                0521        _END_MASTER( myThid )
                0522 C--   Everyone wait for Master thread to be ready
                0523        CALL BAR2( myThid )
                0524 
                0525 C--   set result for every threads (+ convert back r8 to _RS)
                0526        DO j=1,nLoc
                0527         vecSum(j+j0) = shareGSVector(j)
                0528        ENDDO
                0529 
                0530 C--   end outside do-loop over nCuts
                0531       ENDDO
                0532 
                0533       RETURN
                0534       END
                0535 
                0536 C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
                0537 CBOP
                0538 C     !ROUTINE: GLOBAL_SUM_VECTOR_INT
                0539 
                0540 C     !INTERFACE:
                0541       SUBROUTINE GLOBAL_SUM_VECTOR_INT(
                0542      I                       nVec, tiledVec,
                0543      O                       sumVec,
                0544      I                       myThid )
                0545 
                0546 C     !DESCRIPTION:
                0547 C     *==========================================================*
                0548 C     | SUBROUTINE GLOBAL\_SUM\_VECTOR\_INT
                0549 C     | o Handle sum for Integer vector.
                0550 C     *==========================================================*
                0551 C     | Apply sum on an array of tiled vector (input: nSx,nSy,nVec)
                0552 C     |  and operate over all tiles & all the processes.
                0553 C     | Oversize vector is cut and processed in "nCuts" pieces
                0554 C     |  that fit in buffer
                0555 C     *==========================================================*
                0556 
                0557 C     !USES:
                0558       IMPLICIT NONE
                0559 
                0560 C     == Global data ==
                0561 #include "SIZE.h"
                0562 #include "EEPARAMS.h"
                0563 #include "EESUPPORT.h"
                0564 #include "GLOBAL_SUM.h"
                0565 
                0566 C     !INPUT/OUTPUT PARAMETERS:
                0567 C     nVec     :: length of input/output vector
                0568 C     tiledVec :: Input tiled vector (dims: nSx,nSy,nVec)
                0569 C     sumVec   :: Result of sum over all tiles, threads & procs
                0570 C     myThid   :: My thread id.
                0571       INTEGER nVec
                0572       INTEGER tiledVec(nSx,nSy,nVec)
                0573       INTEGER sumVec(nVec)
                0574       INTEGER myThid
                0575 
                0576 C     !LOCAL VARIABLES:
                0577 C     bi, bj   :: tile indices
                0578 C     n, j, j0 :: Loop counters
                0579 C     nCuts    :: range of outside loop
                0580 C     nLoc     :: range of inner loop
                0581 C     mpiRC    :: MPI return code
                0582       INTEGER bi, bj
                0583       INTEGER n, nCuts
                0584       INTEGER j, j0, nLoc
                0585       INTEGER sumMyPr(GSVec_size)
                0586 C- type declaration of: sumMyPr & shareGSVectInt need to have
                0587 C                       the same length as MPI_INTEGER
                0588 #ifdef ALLOW_USE_MPI
                0589       INTEGER mpiRC
                0590 #endif /* ALLOW_USE_MPI */
                0591 CEOP
                0592 
                0593 C--   starts outside loop over number of "cuts", each fitting into buffer size
                0594       nCuts = 1 + INT( (nVec-1)/GSVec_size )
                0595       DO n = 1,nCuts
                0596        j0 = (n-1)*GSVec_size
                0597        nLoc = MIN( n*GSVec_size, nVec ) - j0
                0598 
                0599 C--   write local sum into shared-buffer array
                0600        DO j = 1,nLoc
                0601         DO bj = myByLo(myThid), myByHi(myThid)
                0602          DO bi = myBxLo(myThid), myBxHi(myThid)
                0603           shareBufGSVecI(bi,bj,j) = tiledVec(bi,bj,j+j0)
                0604          ENDDO
                0605         ENDDO
                0606        ENDDO
                0607 
                0608 C--   Master thread cannot start until everyone is ready:
                0609        CALL BAR2( myThid )
                0610        _BEGIN_MASTER( myThid )
                0611 
                0612 C--   Sum over all tiles (of the same process) first
                0613         DO j = 1,nLoc
                0614          sumMyPr(n) = 0
                0615         ENDDO
                0616         DO bj = 1,nSy
                0617          DO bi = 1,nSx
                0618           DO j = 1,nLoc
                0619            sumMyPr(j) = sumMyPr(j) + shareBufGSVecI(bi,bj,j)
                0620           ENDDO
                0621          ENDDO
                0622         ENDDO
                0623 
                0624 #ifdef ALLOW_USE_MPI
                0625         IF ( usingMPI ) THEN
                0626 C--   sum over all procs and put result into shared buffer:
                0627          CALL MPI_Allreduce( sumMyPr, shareGSVectInt, nLoc,
                0628      &            MPI_INTEGER, MPI_SUM, MPI_COMM_MODEL, mpiRC )
                0629         ELSE
                0630 #else /* ALLOW_USE_MPI */
                0631         IF ( .TRUE. ) THEN
                0632 #endif /* ALLOW_USE_MPI */
                0633 C--   in case MPI is not used, put result directly in shared buffer:
                0634          DO j = 1,nLoc
                0635           shareGSVectInt(j) = sumMyPr(j)
                0636          ENDDO
                0637         ENDIF
                0638 
                0639        _END_MASTER( myThid )
                0640 
                0641 C--   Everyone wait for Master thread to be ready
                0642        CALL BAR2( myThid )
                0643 
                0644 C--   set result for every threads
                0645        DO j = 1,nLoc
                0646         sumVec(j+j0) = shareGSVectInt(j)
                0647        ENDDO
                0648 
                0649 C--   end outside do-loop over nCuts
                0650       ENDDO
                0651 
                0652       RETURN
                0653       END