Back to home page

MITgcm

 
 

    


File indexing completed on 2018-03-02 18:36:06 UTC

view on githubraw file Latest commit add29e06 on 2018-01-31 20:35:05 UTC
924557e60a Chri*0001 #include "CPP_EEOPTIONS.h"
                0002 
4c563c2ee9 Chri*0003 CBOP
                0004 C     !ROUTINE: CHECK_THREADS
                0005 
                0006 C     !INTERFACE:
924557e60a Chri*0007       SUBROUTINE CHECK_THREADS( myThid )
                0008 
4c563c2ee9 Chri*0009 C     !DESCRIPTION:
                0010 C     *==========================================================
b2715cd9c6 Jean*0011 C     | SUBROUTINE CHECK\_THREADS
                0012 C     | o Check that all the threads we need did indeed start.
                0013 C     *==========================================================
                0014 C     | This routine is called during the initialisation phase
                0015 C     | to check whether all the threads have started.
                0016 C     | It is invoked by every thread and if any thread finds an
                0017 C     | error it should set its error flag.
                0018 C     | Notes:
                0019 C     |  Different mechanisms may be required on different
                0020 C     | platforms to actually perform the check. For example as
                0021 C     | coded here each thread checks for a semaphore set by the
                0022 C     | other threads to see if they are running.
                0023 C     | It is also possible for a system to schedule threads
                0024 C     | sequentially, unless some system call is made to yield
                0025 C     | the process. This routine would detect this situation too
                0026 C     | and allow a programmer to modify this routine and the
                0027 C     | barrier code to allow threads to be scheduled more
                0028 C     | appropriately.
                0029 C     *==========================================================
4c563c2ee9 Chri*0030 
                0031 C     !USES:
b2715cd9c6 Jean*0032       IMPLICIT NONE
4c563c2ee9 Chri*0033 C     == Global variables ==
924557e60a Chri*0034 #include "SIZE.h"
                0035 #include "EEPARAMS.h"
                0036 #include "EESUPPORT.h"
                0037 
4c563c2ee9 Chri*0038 C     !INPUT PARAMETERS:
                0039 C     == Routine arguments ==
                0040 C     myThid :: My thread number
                0041       INTEGER myThid
924557e60a Chri*0042 
b2715cd9c6 Jean*0043 C     !FUNCTIONS:
                0044 #ifdef USE_OMP_THREADING
                0045       INTEGER  OMP_GET_NUM_THREADS
                0046       EXTERNAL OMP_GET_NUM_THREADS
                0047 #endif
                0048 
4c563c2ee9 Chri*0049 C     !LOCAL VARIABLES:
                0050 C     == Local variables ==
b2715cd9c6 Jean*0051 C     I         :: Loop counter
4c563c2ee9 Chri*0052 C     numberThreadRunning :: Count of number of threads this thread
                0053 C                            thinks are running.
                0054 C     nChecks   :: Number of times checked for all threads. After so
                0055 C                  many checks give up and report an error.
b2715cd9c6 Jean*0056 C     msgBuf    :: Informational/error message buffer
924557e60a Chri*0057       INTEGER nChecks
b2715cd9c6 Jean*0058       CHARACTER*(MAX_LEN_MBUF) msgBuf
c0add436ef Jean*0059 #ifdef USE_OMP_THREADING
                0060 #ifdef ALLOW_USE_MPI
                0061       INTEGER myErr, mpiRC
                0062 #endif
                0063 #else /* USE_OMP_THREADING */
                0064       INTEGER I, numberThreadsRunning
                0065 #endif /* USE_OMP_THREADING */
4c563c2ee9 Chri*0066 CEOP
                0067 
b2715cd9c6 Jean*0068 C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
                0069 
                0070 #ifdef USE_OMP_THREADING
                0071 C--   Check early-on that number of threads match
c0add436ef Jean*0072 
b2715cd9c6 Jean*0073       IF ( OMP_GET_NUM_THREADS() .NE. nThreads ) THEN
c0add436ef Jean*0074 C-    This process has problems in multi-threads setting (detected by
                0075 C     all pseudo-threads); note: cannot use any Barrier in this context
                0076 
b2715cd9c6 Jean*0077        WRITE(msgBuf,'(2A,I6)') 'CHECK_THREADS:',
                0078      &              ' from "eedata", nThreads=', nThreads
                0079        CALL PRINT_ERROR( msgBuf, myThid )
                0080        WRITE(msgBuf,'(2A,I6)') ' not equal to ',
                0081      &              'Env.Var. OMP_NUM_THREADS=', OMP_GET_NUM_THREADS()
                0082        CALL PRINT_ERROR( msgBuf, myThid )
c0add436ef Jean*0083        thError(myThid) = .TRUE.
                0084        eeBootError     = .TRUE.
                0085        IF ( myThid.EQ.1 ) THEN
                0086 C-    one pseudo-thread (thId=1) export the error to other MPI processes
                0087         nChecks = 1
                0088 #ifdef ALLOW_USE_MPI
                0089         IF ( usingMPI ) THEN
                0090          myErr = nChecks
                0091          CALL MPI_Allreduce( myErr,nChecks,1,MPI_INTEGER,
                0092      &                       MPI_SUM,MPI_COMM_MODEL,mpiRC )
                0093         ENDIF
                0094 #endif /* ALLOW_USE_MPI */
                0095        ENDIF
                0096 
                0097       ELSE
                0098 C-    this process has a working multi-threads setting
                0099 
                0100        threadIsRunning(myThid) = .TRUE.
                0101        IF ( myThid.EQ.1 ) THEN
                0102 C-    master collects error from other MPI processes
                0103         nChecks = 0
                0104 #ifdef ALLOW_USE_MPI
                0105         IF ( usingMPI ) THEN
                0106          myErr = nChecks
                0107          CALL MPI_Allreduce( myErr,nChecks,1,MPI_INTEGER,
                0108      &                       MPI_SUM,MPI_COMM_MODEL,mpiRC )
                0109         ENDIF
                0110 #endif /* ALLOW_USE_MPI */
                0111         IF ( nChecks.NE.0 ) THEN
                0112           WRITE(msgBuf,'(A,I5,A)') 'CHECK_THREADS:', nChecks,
                0113      &                     ' error(s) from other Processes'
                0114           CALL PRINT_ERROR( msgBuf, myThid )
                0115           eeBootError = .TRUE.
                0116         ENDIF
                0117        ENDIF
                0118 C-    ensure all threads leave with updated eeBootError (shared) value
                0119 C$OMP BARRIER
                0120 
b2715cd9c6 Jean*0121       ENDIF
c0add436ef Jean*0122 
                0123 #else /* ndef USE_OMP_THREADING */
b2715cd9c6 Jean*0124 
                0125       threadIsRunning(myThid) = .TRUE.
924557e60a Chri*0126       nChecks                 = 0
b2715cd9c6 Jean*0127 
924557e60a Chri*0128    10 CONTINUE
                0129       numberThreadsRunning = 0
                0130       nChecks = nChecks + 1
                0131       DO I = 1, nThreads
                0132        IF ( threadIsRunning(I) )
                0133      &  numberThreadsRunning = numberThreadsRunning+1
                0134       ENDDO
                0135       IF ( nChecks .GT. 10 ) THEN
                0136        thError(myThid) = .TRUE.
                0137        eeBootError     = .TRUE.
b2715cd9c6 Jean*0138        WRITE(msgBuf,'(A,I5,A,I5,A)')
c0add436ef Jean*0139      &  'CHECK_THREADS: Only ',numberThreadsRunning,
                0140      &  ' thread(s), ',nThreads,' are needed for this config!'
b2715cd9c6 Jean*0141         CALL PRINT_ERROR( msgBuf, myThid )
924557e60a Chri*0142 C--     Not enough threads are running so halt the program.
46dc4f419b Chri*0143 C       I did not want this here but it is the only place I have found that
924557e60a Chri*0144 C       KAP deadlocks if it there are fewer threads than iterations in a BLOCKED(1)
b2715cd9c6 Jean*0145 C       loop. The deadlock appears to be in the routine mppjoin which never
924557e60a Chri*0146 C       returns. I tried putting the STOP in main or breaking out of the loop in main
                0147 C       but this causes KAP to insert a call to mppjoin - which then deadlocks!
                0148         IF ( myThid .EQ. 1 ) THEN
                0149          STOP 'ABNORMAL END: S/R CHECK_THREADS'
                0150         ENDIF
                0151        GOTO 11
                0152       ENDIF
                0153       IF ( numberThreadsRunning .NE. nThreads ) THEN
5b6c0e8b66 Patr*0154 #ifdef HAVE_SYSTEM
924557e60a Chri*0155        CALL SYSTEM('sleep 1')
79f5b9efed Alis*0156 #endif
924557e60a Chri*0157        GOTO 10
                0158       ENDIF
                0159    11 CONTINUE
aea29c8517 Alis*0160 
c0add436ef Jean*0161 #endif /* ndef USE_OMP_THREADING */
                0162 
8b9104b167 Jean*0163 C--   check barrier synchronization: 1rst (initial) call.
c0add436ef Jean*0164       IF ( .NOT. eeBootError ) THEN
                0165         CALL BAR_CHECK( 1, myThid )
                0166       ENDIF
8b9104b167 Jean*0167 
924557e60a Chri*0168       RETURN
                0169       END