
!=======================================================================
!      Boundary conditions and PE exchange
!=======================================================================


 subroutine my_mpi_init(comm_,ierr)
!--------------------------------------------------------------
!     intitialize mpi system for model
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer, intent(out) :: ierr
      integer :: comm_,nlen
      include "mpif.h"
      character (len=MPI_MAX_PROCESSOR_NAME) :: pname
      ierr=0
      if (comm_ == MPI_COMM_NULL) then
        print *, 'You passed MPI_COMM_NULL !!!'
        ierr=1
        return
       end if
       my_comm=comm_
       call MPI_Comm_rank(my_comm, my_pe, ierr)
       if (ierr/=0) return
       call MPI_Comm_size(my_comm, n_pes, ierr)
       if (ierr/=0) return
       call MPI_Get_processor_name(pname, nlen, ierr)
       if (ierr/=0) return
       print*,'this is process ', my_pe,' of ', n_pes,' on ', pname(1:nlen)
       call my_mpi_test(my_comm,ierr)
 end subroutine my_mpi_init


subroutine halt_stop(string)
!--------------------------------------------------------------
!     controlled stop, should not be called from python
!--------------------------------------------------------------
      implicit none
      character*(*) :: string
      integer :: ierr,code,my_pe
      include "mpif.h"
      call mpi_comm_rank(MPI_COMM_WORLD,my_pe,ierr)
      print*,' global pe #',my_pe,' : ',string
      print*,' global pe #',my_pe,' aborting '
      code=99
      call MPI_ABORT(mpi_comm_world, code, IERR)
end subroutine halt_stop



subroutine fortran_barrier
!--------------------------------------------------------------
!     A barrier for the local sub domain
!     for use in fortran part only
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: ierr
      call mpi_barrier(my_comm, ierr)
end subroutine fortran_barrier



 subroutine my_mpi_test(my_comm,ierr)
!--------------------------------------------------------------
!     test some basic mpi routines
!--------------------------------------------------------------
      implicit none
      integer, intent(out) :: ierr
      integer :: my_comm
      integer :: my_pe=-1,all_pes,xint,xint2
      real*8    :: xreal,xreal2
      include "mpif.h"
!   get some mpi infos first
      ierr=0
      call mpi_comm_rank(my_comm       ,my_pe,ierr)
      if (ierr/=0) return
      call mpi_comm_size(my_comm       ,all_pes,ierr)
      if (ierr/=0) return
!   try first global barrier
      print*,'tyring first global barrier for PE #',my_pe
      call mpi_barrier(my_comm       , ierr)
      if (ierr/=0) return
!   try broadcasting
      print*,'tyring global broadcast for PE #',my_pe
      xreal = 1.0
      call mpi_bcast(xreal,1,mpi_real8,0,my_comm       ,ierr)
      if (ierr/=0) return
      xint = 1
      call mpi_bcast(xint,1,mpi_integer,0,my_comm       ,ierr)
      if (ierr/=0) return
!   check results of broadcasting
      if (xreal /= 1.0 ) then
       print*,'fatal: MPI test failed on broadcasting reals for PE #',my_pe
       ierr=-1 ; return
      endif
      if (xint /= 1 ) then
       print*,'fatal: MPI test failed on broadcasting integer for PE #',my_pe
       ierr=-1 ; return
      endif
      call mpi_barrier(my_comm       , ierr)
      if (ierr/=0) return
!   try global sum
      xreal = 2.0
      call mpi_allreduce(xreal,xreal2,1,mpi_real8,MPI_SUM,my_comm       ,ierr)
      if (ierr/=0) return
      xint = 2.0
      call mpi_allreduce(xint,xint2,1,mpi_integer,MPI_SUM,my_comm       ,ierr)
      if (ierr/=0) return
!   check results 
      xreal = xreal2/all_pes
      if (xreal /= 2.0 ) then
       print*,'fatal: MPI test failed on global sum (real) for PE #',my_pe
       ierr=-1 ; return
      endif
      xint = xint2/all_pes
      if (xint /= 2.0 ) then
       print*,'fatal: MPI test failed on global sum (int) for PE #',my_pe
       ierr=-1 ; return
      endif
      call mpi_barrier(my_comm       , ierr)
      if (ierr/=0) return
      print*,' my_pe=',my_pe,' ierr = ',ierr
      return
 end subroutine my_mpi_test



 subroutine print_mpi_error(ierr)
!--------------------------------------------------------------
!     print out MPI error message
!--------------------------------------------------------------
      implicit none
      integer :: ierr,ierr2,len
      character*80 :: string
      if (ierr/=0) then
       call MPI_ERROR_STRING(ierr,string, LEN, ierr2)
       print*,' MPI ERROR:',ierr
       print*,'           ',string(1:len)
      endif
 end subroutine print_mpi_error



  subroutine exchg_prog3Dfield_complete(nx_,ny_,nz_,a)
      implicit none
      integer :: nx_,ny_,nz_
      real*8  :: a(nx_,ny_,nz_,3)
      call setcyclic3D(nx_,ny_,nz_,a(1,1,1,1) )
      call setcyclic3D(nx_,ny_,nz_,a(1,1,1,2) )
      call setcyclic3D(nx_,ny_,nz_,a(1,1,1,3) )
      call border_exchg3D(nx_,ny_,nz_,a(1,1,1,1),2)
      call border_exchg3D(nx_,ny_,nz_,a(1,1,1,2),2)
      call border_exchg3D(nx_,ny_,nz_,a(1,1,1,3),2)
  end subroutine exchg_prog3Dfield_complete

  subroutine exchg_prog2Dfield_complete(nx_,ny_,a)
      implicit none
      integer :: nx_,ny_
      real*8  :: a(nx_,ny_,3)
      call setcyclic2D(nx_,ny_,a(1,1,1) )
      call setcyclic2D(nx_,ny_,a(1,1,2) )
      call setcyclic2D(nx_,ny_,a(1,1,3) )
      call border_exchg2D(nx_,ny_,a(1,1,1),2)
      call border_exchg2D(nx_,ny_,a(1,1,2),2)
      call border_exchg2D(nx_,ny_,a(1,1,3),2)
  end subroutine exchg_prog2Dfield_complete

  subroutine exchg_3Dfield_complete(nx_,ny_,nz_,a)
      implicit none
      integer :: nx_,ny_,nz_
      real*8  :: a(nx_,ny_,nz_)
      call setcyclic3D(nx_,ny_,nz_,a)
      call border_exchg3D(nx_,ny_,nz_,a,2)
  end subroutine exchg_3Dfield_complete

  subroutine exchg_2Dfield_complete(nx_,ny_,a)
      implicit none
      integer :: nx_,ny_
      real*8  :: a(nx_,ny_)
      call setcyclic2D(nx_,ny_,a)
      call border_exchg2D(nx_,ny_,a,2)
  end subroutine exchg_2Dfield_complete



  subroutine setcyclic3D(nx_,ny_,nz_,p1)
!--------------------------------------------------------------
!       set cyclic boundary conditions for 3D array
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      real*8 :: p1(nx_,ny_,nz_)
      integer :: j,nx_,ny_,nz_
      include "mpif.h"
      integer  :: tag=0, ier, Sta(MPI_STATUS_SIZE),len
      if (enable_cyclic_x) then
        do j=max(1,js_pe-1),min(ny,je_pe+1)
          p1(1  ,j,:)=p1(nx-1,j,:); 
          p1(nx,j,:)=p1(2    ,j,:)
         enddo
      endif
      if (enable_cyclic_y) then
       if ( n_pes_j > 1) then
        len=nx*nz
        call mpi_barrier(my_comm       ,ier)
        if (my_pe ==0 ) then
         call mpi_send(p1(:,2,:),len,mpi_real8,n_pes-1,tag,my_comm       ,ier)
         call mpi_recv(p1(:,1,:),len,mpi_real8,n_pes-1,tag,my_comm       ,Sta,ier)
        endif
        if (my_pe == n_pes-1) then 
         call mpi_recv(p1(:,ny  ,:),len,mpi_real8,0,tag,my_comm       ,Sta,ier)
         call mpi_send(p1(:,ny-1,:),len,mpi_real8,0,tag,my_comm       ,ier)
        endif
        call mpi_barrier(my_comm       ,ier)
       else
        p1(:,1  ,:)=p1(:,ny-1,:); 
        p1(:,ny,:)=p1(:,2    ,:)
       endif
      endif
 end subroutine setcyclic3D



 subroutine setcyclic3D_j2(nx_,ny_,nz_,p1)
!--------------------------------------------------------------
!       set cyclic boundary conditions for 3D array
!       communicate 2 rows in meridional direction
!       needed for higher order advection schemes
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      real*8 :: p1(nx_,ny_,nz_)
      integer :: nx_,ny_,nz_
      include "mpif.h"
      integer  :: tag=0, ier, Sta(MPI_STATUS_SIZE),len
      if (enable_cyclic_y .and. n_pes_j > 1) then
        len=nx*nz
        call mpi_barrier(my_comm       ,ier)
        if (my_pe ==0 ) then 
         call mpi_send(p1(:,3   ,:),len,mpi_real8,n_pes-1,tag,my_comm       ,ier)
         call mpi_recv(p1(:,ny-2,:),len,mpi_real8,n_pes-1,tag,my_comm       ,Sta,ier)
        endif
        if (my_pe == n_pes-1) then 
         call mpi_recv(p1(:,3   ,:),len,mpi_real8,0,tag,my_comm       ,Sta,ier)
         call mpi_send(p1(:,ny-2,:),len,mpi_real8,0,tag,my_comm       ,ier)
        endif
        call mpi_barrier(my_comm       ,ier)
      endif
 end subroutine setcyclic3D_j2



 subroutine setcyclic2D(nx_,ny_,p1)
!--------------------------------------------------------------
!       set cyclic boundary conditions for 2D array
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      real*8 :: p1(nx_,ny_)
      integer :: j,nx_,ny_
      include "mpif.h"
      integer  :: tag=0, ier, Sta(MPI_STATUS_SIZE)
      if (enable_cyclic_x) then
        do j=max(1,js_pe-1),min(ny,je_pe+1)
         p1(1  ,j)=p1(nx-1,j); 
         p1(nx,j)=p1(2    ,j)
        enddo
      endif
      if (enable_cyclic_y) then
       if ( n_pes_j > 1) then
        call mpi_barrier(my_comm       ,ier)
        if (my_pe ==0 ) then
         call mpi_send(p1(:,2),nx,mpi_real8,n_pes-1,tag,my_comm       ,ier)
         call mpi_recv(p1(:,1),nx,mpi_real8,n_pes-1,tag,my_comm       ,Sta,ier)
        endif
        if (my_pe == n_pes-1) then 
         call mpi_recv(p1(:,ny  ),nx,mpi_real8,0,tag,my_comm       ,Sta,ier)
         call mpi_send(p1(:,ny-1),nx,mpi_real8,0,tag,my_comm       ,ier)
        endif
        call mpi_barrier(my_comm       ,ier)
       else
        p1(:,1  )=p1(:,ny-1); 
        p1(:,ny)=p1(:,2    )
       endif
      endif
 end subroutine setcyclic2D


 subroutine border_exchg3D(nx_,ny_,nz_,a,jx)
!--------------------------------------------------------------
!     Exchange overlapping areas of 3D array a in all PEs of sub 
!     domain. Number of overlapping indicees are given by jx.
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: jx,nx_,ny_,nz_,jj
      real*8  :: a(nx_,ny_,nz_)
      integer  ::  j, tag, ierr,len
      include "mpif.h"
      integer,dimension(MPI_STATUS_SIZE)  :: Status
      tag  = 0; len=nx*nz
      call mpi_barrier(my_comm       ,ierr)
      if ( n_pes_j > 1) then
!     Austausch von Norden nach Sueden
        if (my_blk_j /=1 ) then
         do j=2*jx-1,jx,-1
          jj=min(ny_, js_pe+j-jx)
          call mpi_send(a(:,jj,:),len,mpi_real8,my_pe-1,tag,my_comm,ierr)
         enddo
        endif
        if (my_blk_j /= n_pes_j) then 
         do j=0,jx-1
          jj=min(ny_, je_pe+jx-j)
          call mpi_recv(a(:,jj,:),len,mpi_real8,my_pe+1,tag,my_comm,Status,ierr)
         enddo
       endif
!      Austausch von Sueden nach Norden
       if (my_blk_j /= n_pes_j) then 
         do j=jx,2*jx-1
          jj=max(0, je_pe+jx-j)
          call mpi_send(a(:,jj,:),len,mpi_real8,my_pe+1,tag,my_comm,ierr)
         enddo
        endif
        if (my_blk_j /=1 ) then
         do j=jx-1,0,-1
          jj=max(0, js_pe-jx+j)
          call mpi_recv(a(:,jj,:),len,mpi_real8,my_pe-1,tag,my_comm,Status,ierr)
         enddo
        endif
      endif
      call mpi_barrier(my_comm,ierr)
 end subroutine border_exchg3D


 subroutine border_exchg2D(nx_,ny_,a,jx)
!--------------------------------------------------------------
!     Exchange overlapping areas of 2D array a in all PEs of sub 
!     domain. Number of overlapping indicees are given by jx.
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: jx,nx_,ny_,jj
      real*8  :: a(nx_,ny_)
      integer  ::  j, tag, ierr
      include "mpif.h"
      integer,dimension(MPI_STATUS_SIZE)  :: Status
      tag  = 0
      call mpi_barrier(my_comm ,ierr)
      if ( n_pes_j > 1) then
!       Austausch von Norden nach Sueden
        if (my_blk_j /=1 ) then
         do j=2*jx-1,jx,-1
          jj=min(ny_,js_pe+j-jx)
          call mpi_send(a(1,jj),nx,mpi_real8,my_pe-1,tag,my_comm ,ierr)
         enddo
        endif
        if (my_blk_j /= n_pes_j) then 
         do j=0,jx-1 
          jj=min(ny_, je_pe+jx-j )
          call mpi_recv(a(1,jj),nx,mpi_real8,my_pe+1,tag,my_comm ,Status,ierr)
         enddo
       endif
!      Austausch von Sueden nach Norden
       if (my_blk_j /= n_pes_j) then 
         do j=jx,2*jx-1
          jj=max(1, je_pe+jx-j )
          call mpi_send(a(1,jj),nx,mpi_real8,my_pe+1,tag,my_comm ,ierr)
         enddo
        endif
        if (my_blk_j /=1 ) then
         do j=jx-1,0,-1   
          jj=max(1, js_pe-jx+j)
          call mpi_recv(a(1,jj),nx,mpi_real8,my_pe-1,tag,my_comm ,Status,ierr)
         enddo
        endif
      endif
      call mpi_barrier(my_comm ,ierr)
 end subroutine border_exchg2D



 subroutine border_exchg_merid(ny_,nz_,a,jx)
!--------------------------------------------------------------
!     Exchange overlapping areas of array a in all PEs of sub 
!     domain. Number of overlapping indicees are given by jx.
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: jx,ny_,nz_,jj
      real*8  :: a(ny_,nz_)
      integer  ::  j, tag, ierr,len
      include "mpif.h"
      integer,dimension(MPI_STATUS_SIZE)  :: Status
      tag  = 0; len=nz
      call mpi_barrier(my_comm       ,ierr)
      if ( n_pes_j > 1) then
!    Austausch von Norden nach Sueden
        if (my_blk_j /=1 ) then
         do j=2*jx-1,jx,-1
          jj=min(ny_, js_pe+j-jx)
          call mpi_send(a(jj,:),len,mpi_real8,my_pe-1,tag,my_comm ,ierr)
         enddo
        endif
        if (my_blk_j /= n_pes_j) then 
         do j=0,jx-1
          jj=min(ny_, je_pe+jx-j)
          call mpi_recv(a(jj,:),len,mpi_real8,my_pe+1,tag,my_comm ,Status,ierr)
         enddo
       endif
!     Austausch von Sueden nach Norden
       if (my_blk_j /= n_pes_j) then 
         do j=jx,2*jx-1
          jj=max(1, je_pe+jx-j)
          call mpi_send(a(jj,:),len,mpi_real8,my_pe+1,tag,my_comm ,ierr)
         enddo
        endif
        if (my_blk_j /=1 ) then
         do j=jx-1,0,-1
          jj=max(1, js_pe-jx+j)
          call mpi_recv(a(jj,:),len,mpi_real8,my_pe-1,tag,my_comm ,Status,ierr)
         enddo
        endif
      endif
      call mpi_barrier(my_comm ,ierr)
 end subroutine border_exchg_merid


 subroutine pe0_recv_3D(nx_,ny_,nz_,a)
!--------------------------------------------------------------
!     all PEs send their data of a 3D array to PE0
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: nx_,ny_,nz_
      real*8  :: a(nx_,ny_,nz_)
      integer                     :: js,je,iproc
      integer                     :: tag=0, ierr,len
      include "mpif.h"
      integer, dimension(MPI_STATUS_SIZE) :: Status
      js=js_pe; je=je_pe
      do iproc=1,n_pes-1
       call mpi_barrier(my_comm       ,ierr)
       if ( my_pe == iproc ) then
        call mpi_send(js,1,mpi_integer,0,tag,my_comm       ,ierr)
        call mpi_send(je,1,mpi_integer,0,tag,my_comm       ,ierr)
        len=nx*(je-js+1)*nz
        call mpi_send(a(:,js:je,:),len,mpi_real8,0,tag,my_comm       ,ierr)
       endif
       if ( my_pe == 0 ) then
        call mpi_recv(js,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        call mpi_recv(je,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        len=nx*(je-js+1)*nz
        call mpi_recv(a(:,js:je,:),len,mpi_real8,iproc,tag,my_comm       ,Status,ierr)
       endif
       call mpi_barrier(my_comm       ,ierr)
      enddo
 end subroutine pe0_recv_3D


 subroutine pe0_send_3D(nx_,ny_,nz_,a)
!--------------------------------------------------------------
!     all PEs gets data of a 3D array from PE0
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: nx_,ny_,nz_
      real*8  :: a(nx_,ny_,nz_)
      integer                      :: js,je,iproc
      integer                      :: tag=0, ierr,len
      include "mpif.h"
      integer, dimension(MPI_STATUS_SIZE) :: Status
      js=js_pe; je=je_pe
      do iproc=1,n_pes-1
       call mpi_barrier(my_comm       ,ierr)
       if ( my_pe == iproc ) then
        call mpi_send(js,1,mpi_integer,0,tag,my_comm       ,ierr)
        call mpi_send(je,1,mpi_integer,0,tag,my_comm       ,ierr)
        len=nx*(je-js+1)*nz
        call mpi_recv(a(:,js:je,:),len,mpi_real8,0,tag,my_comm       ,status,ierr)
       endif
       if ( my_pe == 0 ) then
        call mpi_recv(js,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        call mpi_recv(je,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        len=nx*(je-js+1)*nz
        call mpi_send(a(:,js:je,:),len,mpi_real8,iproc,tag,my_comm       ,ierr)
       endif
       call mpi_barrier(my_comm       ,ierr)
      enddo
 end subroutine pe0_send_3D




 subroutine pe0_recv_2D(nx_,ny_,a)
!--------------------------------------------------------------
!     all PEs send their data of a 2D array to PE0
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: nx_,ny_
      real*8  :: a(nx_,ny_)
      integer                     :: js,je,iproc
      integer                     :: tag=0, ierr,len
      include "mpif.h"
      integer, dimension(MPI_STATUS_SIZE) :: Status
      js=js_pe; je=je_pe
      do iproc=1,n_pes-1
       call mpi_barrier(my_comm       ,ierr)
       if ( my_pe == iproc ) then
        call mpi_send(js,1,mpi_integer,0,tag,my_comm       ,ierr)
        call mpi_send(je,1,mpi_integer,0,tag,my_comm       ,ierr)
        len=nx*(je-js+1)
        call mpi_send(a(:,js:je),len,mpi_real8,0,tag,my_comm       ,ierr)
       endif
       if ( my_pe == 0 ) then
        call mpi_recv(js,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        call mpi_recv(je,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        len=nx*(je-js+1)
        call mpi_recv(a(:,js:je),len,mpi_real8,iproc,tag,my_comm       ,Status,ierr)
       endif
       call mpi_barrier(my_comm       ,ierr)
      enddo
 end subroutine pe0_recv_2D


 subroutine pe0_send_2D(nx_,ny_,a)
!--------------------------------------------------------------
!     all PEs gets data of a 2D array from PE0
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: nx_,ny_
      real*8  :: a(nx_,ny_)
      integer                      :: js,je,iproc
      integer                      :: tag=0, ierr,len
      include "mpif.h"
      integer, dimension(MPI_STATUS_SIZE) :: Status
      js=js_pe; je=je_pe
      do iproc=1,n_pes-1
       call mpi_barrier(my_comm       ,ierr)
       if ( my_pe == iproc ) then
        call mpi_send(js,1,mpi_integer,0,tag,my_comm       ,ierr)
        call mpi_send(je,1,mpi_integer,0,tag,my_comm       ,ierr)
        len=nx*(je-js+1)
        call mpi_recv(a(:,js:je),len,mpi_real8,0,tag,my_comm       ,status,ierr)
       endif
       if ( my_pe == 0 ) then
        call mpi_recv(js,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        call mpi_recv(je,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        len=nx*(je-js+1)
        call mpi_send(a(:,js:je),len,mpi_real8,iproc,tag,my_comm       ,ierr)
       endif
       call mpi_barrier(my_comm       ,ierr)
      enddo
 end subroutine pe0_send_2D


 subroutine pe0_recv_1D(nx_,a)
!--------------------------------------------------------------
!     all PEs send their data of a 1D array to PE0
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: nx_
      real*8  :: a(nx_)
      integer                     :: js,je,iproc
      integer                     :: tag=0, ierr,len
      include "mpif.h"
      integer, dimension(MPI_STATUS_SIZE) :: Status
      js=js_pe; je=je_pe
      do iproc=1,n_pes-1
       call mpi_barrier(my_comm       ,ierr)
       if ( my_pe == iproc ) then
        call mpi_send(js,1,mpi_integer,0,tag,my_comm       ,ierr)
        call mpi_send(je,1,mpi_integer,0,tag,my_comm       ,ierr)
        len=(je-js+1)
        call mpi_send(a(js:je),len,mpi_real8,0,tag,my_comm       ,ierr)
       endif
       if ( my_pe == 0 ) then
        call mpi_recv(js,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        call mpi_recv(je,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        len=(je-js+1)
        call  mpi_recv(a(js:je),len,mpi_real8,iproc,tag,my_comm       ,Status,ierr)
       endif
       call mpi_barrier(my_comm       ,ierr)
      enddo
 end subroutine pe0_recv_1D


 subroutine pe0_send_1D(nx_,a)
!--------------------------------------------------------------
!     all PEs gets data of a 1D array from PE0
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: nx_
      real*8  :: a(nx_)
      integer                      :: js,je,iproc
      integer                      :: tag=0, ierr,len
      include "mpif.h"
      integer, dimension(MPI_STATUS_SIZE) :: Status
      js=js_pe; je=je_pe
      do iproc=1,n_pes-1
       call mpi_barrier(my_comm       ,ierr)
       if ( my_pe == iproc ) then
        call mpi_send(js,1,mpi_integer,0,tag,my_comm       ,ierr)
        call mpi_send(je,1,mpi_integer,0,tag,my_comm       ,ierr)
        len=(je-js+1)
        call mpi_recv(a(js:je),len,mpi_real8,0,tag,my_comm       ,status,ierr)
       endif
       if ( my_pe == 0 ) then
        call mpi_recv(js,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        call mpi_recv(je,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
        len=(je-js+1)
        call mpi_send(a(js:je),len,mpi_real8,iproc,tag,my_comm       ,ierr)
       endif
       call mpi_barrier(my_comm       ,ierr)
      enddo
 end subroutine pe0_send_1D

 subroutine pe0_recv_int(a)
!--------------------------------------------------------------
!     all PEs send integer to PE0
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: a
      include "mpif.h"
      integer :: iproc,ierr, status(MPI_STATUS_SIZE),tag=0
      do iproc=1,n_pes-1
       call mpi_barrier(my_comm       ,ierr)
       if ( my_pe == iproc ) then
        call mpi_send(a,1,mpi_integer,0,tag,my_comm       ,ierr)
       endif
       if ( my_pe == 0 ) then
        call mpi_recv(a,1,mpi_integer,iproc,tag,my_comm       ,Status,ierr)
       endif
       call mpi_barrier(my_comm       ,ierr)
      enddo
 end subroutine pe0_recv_int


 subroutine pe0_send_int(a)
!--------------------------------------------------------------
!     all PEs gets integer from PE0
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer :: a
      include "mpif.h"
      integer :: iproc,ierr, status(MPI_STATUS_SIZE),tag=0
      do iproc=1,n_pes-1
       call mpi_barrier(my_comm       ,ierr)
       if ( my_pe == iproc ) then
        call mpi_recv(a,1,mpi_integer,0,tag,my_comm       ,Status,ierr)
       endif
       if ( my_pe == 0 ) then
        call mpi_send(a,1,mpi_integer,iproc,tag,my_comm       ,ierr)
       endif
       call mpi_barrier(my_comm       ,ierr)
      enddo
 end subroutine pe0_send_int



 subroutine global_max(x)
!--------------------------------------------------------------
!     Get the max of real x over all PEs in sub domain
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      real*8    :: x, x_sym,x_sym2
      integer :: ierr
      include "mpif.h"
      x_sym = x
      call mpi_allreduce(x_sym,x_sym2,1,mpi_real8,MPI_MAX,my_comm       ,ierr)
      x = x_sym2
 end subroutine global_max



 subroutine global_max_int(x,len)
!--------------------------------------------------------------
!     Get the max of integer x over all PEs in sub domain
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      include "mpif.h"
      integer  :: x(len), x_sym(len),x_sym2(len)
      integer :: ierr,len
!     skip this copying overhead  here
      x_sym = x
      call mpi_allreduce(x_sym,x_sym2,len,mpi_integer,MPI_MAX,my_comm,ierr)
!      call print_mpi_error(ierr)
      x = x_sym2
 end subroutine global_max_int


 subroutine global_sum(x)
!--------------------------------------------------------------
!     Do a sum of real x over all PEs in sub domain
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      real*8    :: x,x_sym,x_sym2
      integer :: ierr
      include "mpif.h"
      x_sym = x
      call mpi_allreduce(x_sym,x_sym2,1,mpi_real8,MPI_SUM,my_comm       ,ierr)
      x = x_sym2
 end subroutine global_sum


 subroutine global_sum_vec(x,len)
!--------------------------------------------------------------
!     Do a sum of real vecter x(len) over all PEs in sub domain
!--------------------------------------------------------------
      use pyOM_module
      implicit none
      include "mpif.h"
      integer :: ierr,len
      real*8  :: x(len), x_sym(len),x_sym2(len)
      x_sym(1:len) = x(1:len)
      call mpi_allreduce(x_sym,x_sym2,len,mpi_real8,MPI_SUM,my_comm,ierr)
      x(1:len) = x_sym2(1:len)
end subroutine global_sum_vec


 subroutine bcast_integer(x,len,pe)
!--------------------------------------------------------------
!     Broadcast a integer vector to the local sub domain
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer len,ierr,pe
      integer x(len)
      include "mpif.h"
      call mpi_barrier(my_comm, ierr)
      call mpi_bcast(x,len,mpi_integer,pe,my_comm,ierr)
 end subroutine bcast_integer


 subroutine bcast_real(x,len,pe)
!--------------------------------------------------------------
!     Broadcast a real vector to all sub domains
!--------------------------------------------------------------
      use pyOM_module   
      implicit none
      integer len,ierr,pe
      real*8 x(len)
      include "mpif.h"
      call mpi_barrier(my_comm, ierr)
      call mpi_bcast(x,len,mpi_real8,pe,my_comm,ierr)
 end subroutine bcast_real

