#include "options.inc"

c#define test_exchg
c
c-----------------------------------------------------------------------
c     all the routines which serve for the communication
c     between the sub domains are located here
c     c.eden
c-----------------------------------------------------------------------
c


      subroutine nmax_exchg(nmax)
c
c-----------------------------------------------------------------------
c     exchange number of time steps before communicate
c     over sub domain
c-----------------------------------------------------------------------
c
      use spflame_module
      implicit none
      integer nmax
#ifndef no_mpp
#include "sub_mpif.h"
      integer,dimension(MPI_STATUS_SIZE)  :: Status
      integer ierr,tag,nchild,all_pes
      tag=0; if (mother<0) nmax=1
c     see if there is anybody out there
      call mpi_comm_size(mpi_comm_world,all_pes,ierr)
      if (all_pes == n_pes) return
      if (mother>0) then
       if (my_pe==0)
     &  call mpi_recv(nmax,1,impiinteger,sub_domain_pe0(mother),
     &                tag,mpi_comm_world,status,ierr)
       call mpi_bcast(nmax,1,impiinteger,0,my_comm,ierr)
       nmax=nmax*zoom_fac
      endif
      do nchild=1,nr_childs
        if (my_pe == 0)
     &   call mpi_send(nmax,1,impiinteger,
     &        sub_domain_pe0(childs(nchild)),
     &                 tag,mpi_comm_world,ierr)
      enddo
#else
      nmax=1
#endif
      end subroutine nmax_exchg

 
      subroutine index_send(is,ie,js,je)
c
c-----------------------------------------------------------------------
c     send starting and end indicees of this sub domain to mother
c-----------------------------------------------------------------------
c
      use spflame_module
      implicit none
      integer is,ie,js,je
#ifndef no_mpp
#include "sub_mpif.h"
      integer,dimension(MPI_STATUS_SIZE)  :: Status
      integer ierr,tag,all_pes

      tag=101

      if (my_pe == 0) then
       call mpi_send(is_zoom,1,impiinteger,sub_domain_pe0(mother),
     &               tag,mpi_comm_world,ierr)
       call mpi_send(ie_zoom,1,impiinteger,sub_domain_pe0(mother),
     &              tag,mpi_comm_world,ierr)
       call mpi_send(js_zoom,1,impiinteger,sub_domain_pe0(mother),
     &               tag,mpi_comm_world,ierr)
       call mpi_send(je_zoom,1,impiinteger,sub_domain_pe0(mother),
     &               tag,mpi_comm_world,ierr)
      endif
#endif
      end subroutine index_send


 
      subroutine index_get(is,ie,js,je,n_domain)
c
c-----------------------------------------------------------------------
c     get the starting and end indicees of sub domain
c-----------------------------------------------------------------------
c
      use spflame_module
      implicit none
      integer is,ie,js,je,n_domain
#ifndef no_mpp
#include "sub_mpif.h"
      integer,dimension(MPI_STATUS_SIZE)  :: Status
      integer ierr,tag,all_pes

      tag=101

      if (my_pe == 0) then
       call mpi_recv(is,1,impiinteger,sub_domain_pe0(n_domain),
     &               tag,mpi_comm_world,status,ierr)
       call mpi_recv(ie,1,impiinteger,sub_domain_pe0(n_domain),
     &               tag,mpi_comm_world,status,ierr)
       call mpi_recv(js,1,impiinteger,sub_domain_pe0(n_domain),
     &               tag,mpi_comm_world,status,ierr)
       call mpi_recv(je,1,impiinteger,sub_domain_pe0(n_domain),
     &               tag,mpi_comm_world,status,ierr)
      endif
      call mpi_bcast(is,1,impiinteger,0,my_comm,ierr)
      call mpi_bcast(ie,1,impiinteger,0,my_comm,ierr)
      call mpi_bcast(js,1,impiinteger,0,my_comm,ierr)
      call mpi_bcast(je,1,impiinteger,0,my_comm,ierr)
c      check for consistency
      if (is<1 .or. ie>imt .or. js<1 .or. je>jmt) then
        print*,' sub domain chosen by ',
     &         'is_zoom,ie_zoom,js_zoom,je_zoom=',is,ie,js,je
        print*,' must be inside mother domain given by imt,jmt=1,1,',
     &         imt,jmt
        print*,' adjust namelist input for sub_domain '
        call halt_stop(' in index_get')
      endif
#endif
      end subroutine index_get





      subroutine initial_exchg
c
c---------------------------------------------------------------
c     Exchange initial conditions over sub domains
c---------------------------------------------------------------
c
      use spflame_module
      implicit none
#ifndef no_mpp
#include "sub_mpif.h"
      integer,dimension(MPI_STATUS_SIZE)  :: Status
      integer ierr,tag,all_pes,len
      integer is,ie,js,je,n,nn,nchild
      integer i,j,ii,jj,ismooth,k,kk
      real, allocatable :: buf(:,:,:,:,:),buf2(:,:,:,:)
      real, allocatable :: mbuf(:,:),cbuf(:,:)

c  see if there is anybody out there
      call mpi_comm_size(mpi_comm_world,all_pes,ierr)
      if (all_pes == n_pes) return
      tag=0

      if (mother>0) then
c
c      send a flag if initial conditions are needed
c
       i=1; if (read_restart) i=0
       if (my_pe==0) call send_world_integer(i,1,
     &                      sub_domain_pe0(mother),0)

       if (read_restart) then
c
c       read the restart 
c
        call read_restart_file
       else

        if (my_pe==0) print*,
     &     ' receiving initial conditions from mother domain'

c
c       get initial conditions from mother domain
c
        call index_send(is,ie,js,je)
c
c       receive T and S
c
        allocate(buf(is_zoom:ie_zoom,km_mother,js_zoom:je_zoom,2,2))
        buf=0.
        len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)*km_mother*2*2
        if (my_pe==0) 
     &   call mpi_recv(buf,len,impireal,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
        call mpi_bcast(buf,len,impireal,0,my_comm,ierr)
c
c       transfer coarse grid data to fine grid
c
        do k=1,km_mother
         do kk=(k-1)*zoom_fac_k + 1,k*zoom_fac_k  
          do j=js_zoom,je_zoom
           do ii=-zoom_fac/2,zoom_fac/2
            do jj=-zoom_fac/2,zoom_fac/2
             js=(j-js_zoom)*zoom_fac +jj+1
             do i=is_zoom,ie_zoom
              is=(i-is_zoom)*zoom_fac +ii+1
              if (is>=is_pe.and.is<=ie_pe.and.
     &            js>=js_pe.and.js<=je_pe ) then
                 t(is,kk,js,1:2,taum1)=buf(i,k,j,1:2,1)
                 t(is,kk,js,1:2,tau  )=buf(i,k,j,1:2,2)
              endif
             enddo
            enddo
           enddo
          enddo
         enddo
        enddo
c
c       extrapolate into blank areas and smooth
c
        is=zoom_fac/2
        allocate( mbuf(is_pe-is:ie_pe+is,js_pe-is:je_pe+is) )
        mbuf=0.
        do n=1,nt
         do k=1,km
          mbuf(is_pe:ie_pe,js_pe:je_pe)= 
     &             t(is_pe:ie_pe,k,js_pe:je_pe,n,tau)
          call border_exchg(mbuf,1,is)
          call extrapolate_hor(mbuf,is,-1.e23,'T',k)
          call border_exchg(mbuf,1,is)
          call smooth_hor(mbuf,is,'T',k)
          t(is_pe:ie_pe,k,js_pe:je_pe,n,tau)=
     &          mbuf(is_pe:ie_pe,js_pe:je_pe) 
          mbuf(is_pe:ie_pe,js_pe:je_pe)= 
     &             t(is_pe:ie_pe,k,js_pe:je_pe,n,taum1)
          call border_exchg(mbuf,1,is)
          call extrapolate_hor(mbuf,is,-1.e23,'T',k)
          call border_exchg(mbuf,1,is)
          call smooth_hor(mbuf,is,'T',k)
          t(is_pe:ie_pe,k,js_pe:je_pe,n,taum1)=
     &          mbuf(is_pe:ie_pe,js_pe:je_pe) 
         enddo
        enddo
        deallocate( mbuf)
c
c       receive U and V
c
        len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)
     &      *km_mother*2*2
        if (my_pe==0) 
     &   call mpi_recv(buf,len,impireal,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
        call mpi_bcast(buf,len,impireal,0,my_comm,ierr)
c
c       Note: velocity is only projected onto finer grid
c             errors arising since necessary extrapolating
c             is avoided here. However the blank areas should be
c             zero which is good.
c
c
c       transfer coarse grid data to fine grid
c
        do k=1,km_mother
         do kk=(k-1)*zoom_fac_k + 1,k*zoom_fac_k  
          do j=js_zoom,je_zoom
           do ii=-zoom_fac/2,zoom_fac/2
            do jj=-zoom_fac/2,zoom_fac/2
             js=(j-js_zoom)*zoom_fac + jj+1+zoom_fac/2
             do i=is_zoom,ie_zoom
              is=(i-is_zoom)*zoom_fac + ii+1+zoom_fac/2
              if (is>=is_pe.and.is<=ie_pe.and.
     &            js>=js_pe.and.js<=je_pe ) then
                 u(is,kk,js,1:2,taum1)=buf(i,k,j,1:2,1)
                 u(is,kk,js,1:2,tau  )=buf(i,k,j,1:2,2)
              endif
             enddo
            enddo
           enddo
          enddo
         enddo
        enddo
c
c       smooth velocity components
c
        is=zoom_fac/2
        allocate( mbuf(is_pe-is:ie_pe+is,js_pe-is:je_pe+is) )
        mbuf=0.
        do k=1,km
         do n=1,2
          mbuf(is_pe:ie_pe,js_pe:je_pe)= 
     &             u(is_pe:ie_pe,k,js_pe:je_pe,n,tau)
          call border_exchg(mbuf,1,is)
          call smooth_hor(mbuf,is,'U',k)
          u(is_pe:ie_pe,k,js_pe:je_pe,n,tau)=
     &          mbuf(is_pe:ie_pe,js_pe:je_pe) 
          mbuf(is_pe:ie_pe,js_pe:je_pe)= 
     &             u(is_pe:ie_pe,k,js_pe:je_pe,n,taum1)
          call border_exchg(mbuf,1,is)
          call smooth_hor(mbuf,is,'U',k)
          u(is_pe:ie_pe,k,js_pe:je_pe,n,taum1)=
     &          mbuf(is_pe:ie_pe,js_pe:je_pe) 
         enddo
        enddo
        deallocate( buf,mbuf)
c
c       receive psi
c
        allocate(buf(is_zoom:ie_zoom,1,js_zoom:je_zoom,1,1) )
        buf=0.; len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)
        if (my_pe==0) 
     &   call mpi_recv(buf,len,impireal,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
        call mpi_bcast(buf,len,impireal,0,my_comm,ierr)
c
c       transfer coarse grid data to fine grid
c
        do j=js_zoom,je_zoom
         do ii=-zoom_fac/2,zoom_fac/2
          do jj=-zoom_fac/2,zoom_fac/2
           js=(j-js_zoom)*zoom_fac +jj+1
           do i=is_zoom,ie_zoom
              is=(i-is_zoom)*zoom_fac +ii+1
              if (is>=is_pe.and.is<=ie_pe.and.
     &            js>=js_pe.and.js<=je_pe ) then
                psi(is,js,1)=buf(i,1,j,1,1)
                psi(is,js,2)=buf(i,1,j,1,1)
              endif
           enddo
          enddo
         enddo
        enddo
        deallocate(buf)
c
c       smooth psi, we have to adjust the boundary conditions
c       here in principle. However, neglect this at the moment
c
        is=zoom_fac/2
        allocate( mbuf(is_pe-is:ie_pe+is,js_pe-is:je_pe+is) )
        mbuf=0.
        mbuf(is_pe:ie_pe,js_pe:je_pe)= 
     &             psi(is_pe:ie_pe,js_pe:je_pe,1)
        call border_exchg(mbuf,1,is)
        call smooth_hor(mbuf,is,'T',1)
        psi(is_pe:ie_pe,js_pe:je_pe,1)=
     &          mbuf(is_pe:ie_pe,js_pe:je_pe) 
        mbuf(is_pe:ie_pe,js_pe:je_pe)= 
     &             psi(is_pe:ie_pe,js_pe:je_pe,2)
        call border_exchg(mbuf,1,is)
        call smooth_hor(mbuf,is,'T',1)
        psi(is_pe:ie_pe,js_pe:je_pe,2)=
     &          mbuf(is_pe:ie_pe,js_pe:je_pe) 
        deallocate( mbuf)

c       I do not care about that
        if (enable_ktmix) dml=0.

        itt_restart=1 ! restart_stamp, etc is exchanged afterwards
                      ! but here we have to make sure that mixing time
                      ! steps are reproducable, so do not exhange
                      ! itt_restart read from a restart

        if (my_pe==0) print*, ' done'

       endif  ! read_restart
c
c      receive informations about viscosity in mother domain
c
       call visc_parameter_exchg(-1)
c 
c      receive time stepping related stuff from mother
c
       if (my_pe == 0) then
        call mpi_recv(restart_stamp,32,mpi_character,
     &        sub_domain_pe0(mother),tag,mpi_comm_world,status,ierr)
        call mpi_recv(time_step,1,impireal,
     &        sub_domain_pe0(mother),tag,mpi_comm_world,status,ierr)
        call mpi_recv(runlen,1,impireal,
     &        sub_domain_pe0(mother),tag,mpi_comm_world,status,ierr)
        call mpi_recv(snap_int,1,impireal,
     &        sub_domain_pe0(mother),tag,mpi_comm_world,status,ierr)
       endif
       call mpi_bcast(restart_stamp,32,mpi_character,0,my_comm,ierr)
       call mpi_bcast(time_step,1,impireal,0,my_comm,ierr)
       call mpi_bcast(runlen,1,impireal,0,my_comm,ierr)
       call mpi_bcast(snap_int,1,impireal,0,my_comm,ierr)
       time_step=time_step/zoom_fac

      endif  ! mother >0
c
c     send initial conditions to sub domains
c
      do nchild=1,nr_childs
c
c      receive the flag from child
c
       if (my_pe==0) call recv_world_integer(i,1,
     &                      sub_domain_pe0(childs(nchild)),0)
       call bcast_integer(i,1,0)

       if (i==1) then

        if (my_pe==0) print*,
     &    ' sending initial conditions to child domain #',nchild
c
c       the child needs its initial conditions
c
        call index_get(is,ie,js,je,childs(nchild))
        allocate(buf(is:ie,km,js:je,2,2)); buf=0.
        allocate(cbuf(imt,jmt));cbuf=0.
c       collect t and s in buf
        do k=1,km
         do n=1,2
          do j=max(js_pe,js),min(je_pe,je)
           do i=max(is,is_pe),min(ie,ie_pe)
            cbuf(i,j)=t(i,k,j,n,tau)             
c           send also a mask
            if (tmask(i,k,j)==0.)cbuf(i,j)=-1e23
           enddo
          enddo
          call pe0_recv_2D(cbuf);
          buf(is:ie,k,js:je,n,1)=cbuf(is:ie,js:je) ! send just tau
          buf(is:ie,k,js:je,n,2)=cbuf(is:ie,js:je) ! instead of tau and taum1
         enddo
        enddo
c       send this stuff
        len=(ie-is+1)*(je-js+1)*km*2*2; 
        if (my_pe==0) 
     &   call mpi_send(buf,len,impireal,sub_domain_pe0(childs(nchild)),
     &                 tag,mpi_comm_world,ierr)
c       collect U and V in buf
        do k=1,km
         do n=1,2
          do j=max(js_pe,js),min(je_pe,je)
           do i=max(is,is_pe),min(ie,ie_pe)
            cbuf(i,j)=u(i,k,j,n,tau)            
           enddo
          enddo
          call pe0_recv_2D(cbuf);
          buf(is:ie,k,js:je,n,1)=cbuf(is:ie,js:je) ! send just tau
          buf(is:ie,k,js:je,n,2)=cbuf(is:ie,js:je) ! instead of tau and taum1
         enddo
        enddo
c       send this stuff
        len=(ie-is+1)*(je-js+1)*km*2*2; 
        if (my_pe==0) 
     &   call mpi_send(buf,len,impireal,sub_domain_pe0(childs(nchild)),
     &                 tag,mpi_comm_world,ierr)
c       collect psi
        do j=max(js_pe,js),min(je_pe,je)
          do i=max(is,is_pe),min(ie,ie_pe)
            cbuf(i,j)=psi(i,j,1)            
          enddo
        enddo
        call pe0_recv_2D(cbuf);
        buf(is:ie,1,js:je,1,1)=cbuf(is:ie,js:je)
c       send this stuff
        len=(ie-is+1)*(je-js+1); 
        if (my_pe==0) 
     &   call mpi_send(buf(is:ie,1,js:je,1,1),len,impireal,
     &                 sub_domain_pe0(childs(nchild)),tag,
     &                 mpi_comm_world,ierr)
        deallocate(buf,cbuf)

        if (my_pe==0) print*,' done '

       endif ! i==1
c
c      send informations about viscosity in this domain
c
       call visc_parameter_exchg(childs(nchild))
c 
c      send time stepping related stuff to child
c
       if (my_pe==0) then
        call mpi_send(restart_stamp,32,mpi_character,
     &   sub_domain_pe0(childs(nchild)),tag,mpi_comm_world,ierr)
        call mpi_send(time_step,1,impireal,
     &   sub_domain_pe0(childs(nchild)),tag,mpi_comm_world,ierr)
        call mpi_send(runlen,1,impireal,
     &   sub_domain_pe0(childs(nchild)),tag,mpi_comm_world,ierr)
        call mpi_send(snap_int,1,impireal,
     &   sub_domain_pe0(childs(nchild)),tag,mpi_comm_world,ierr)
       endif
      enddo ! nchild
#endif
      end subroutine initial_exchg



      subroutine domain_exchg
c
c---------------------------------------------------------------
c     Exchange necessary data over sub domains
c---------------------------------------------------------------
c
      use spflame_module
      use blue_module
      implicit none
#ifndef no_mpp
#include "sub_mpif.h"
      integer,dimension(MPI_STATUS_SIZE)  :: Status
      integer :: ierr,tag,all_pes,nchild,pe_child
      integer :: is,ie,js,je,len,idoit
      integer :: i,j,ii,jj,n,ismooth,k,nn,kk
      integer :: send_rho
      real, allocatable :: p_s(:),p_n(:),p_e(:),p_w(:)
      real, allocatable :: buf(:,:,:),buf2(:,:),buf3(:,:,:)
      real, allocatable :: buf4(:,:,:,:)
      real, allocatable :: cbuf(:,:)

c  see if there is anybody out there
      call mpi_comm_size(mpi_comm_world,all_pes,ierr)
      if (all_pes == n_pes) return
      tag=0

#ifdef test_exchg
      if (my_pe==0) print*,' exchanging data across sub domains '
#endif

      if (mother>0) then
c
c       send/receive data to/from mother
c
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' index exchg'
#endif
        call index_send(is,ie,js,je)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe, ' done'
#endif
        ismooth=zoom_fac/2
c       Note: if ismooth is greater than zoom_fac/2
c             umask/tmask must be also redimensioned
c
c       receive northern and southern streamfunction values
c
        allocate(p_s(is_zoom:ie_zoom),p_n(is_zoom:ie_zoom) )
        p_s=0.; p_n=0.; len=ie_zoom-is_zoom+1
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' receive psi 1'
#endif
        if (my_pe == 0) then
         call mpi_recv(p_s,len,impireal,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
         call mpi_recv(p_n,len,impireal,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
        endif
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe, ' done'
#endif
        call mpi_bcast(p_s,len,impireal,0,my_comm,ierr)
        call mpi_bcast(p_n,len,impireal,0,my_comm,ierr)
        do ii=-zoom_fac/2,zoom_fac/2
          do i=is_zoom,ie_zoom
           is=(i-is_zoom)*zoom_fac + ii+1
           if (is>=1.and.is<=imt) then
               psi_wall_south(is)=p_s(i)
               psi_wall_north(is)=p_n(i)
           endif
          enddo
        enddo
        deallocate( p_s,p_n)
c
c       smooth streamfunction at southern and northern border
c         
        is=ismooth
        call smooth_zonal(psi_wall_south(is_pe:ie_pe),is,2,1)
        call smooth_zonal(psi_wall_north(is_pe:ie_pe),is,jmt-1,1)
c
c       receive western and eastern streamfunction values
c
        allocate(p_w(js_zoom:je_zoom),p_e(js_zoom:je_zoom) )
        p_w=0.; p_e=0.; len=je_zoom-js_zoom+1
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' receive psi 2'
#endif
        if (my_pe == 0) then
         call mpi_recv(p_w,len,impireal,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
         call mpi_recv(p_e,len,impireal,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
        endif
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,' done'
#endif
        call mpi_bcast(p_w,len,impireal,0,my_comm,ierr)
        call mpi_bcast(p_e,len,impireal,0,my_comm,ierr)
        do jj=-zoom_fac/2,zoom_fac/2
           do j=js_zoom,je_zoom
            js=(j-js_zoom)*zoom_fac + jj+1
            if (js>=1.and.js<=jmt) then
               psi_wall_east(js)=p_e(j)
               psi_wall_west(js)=p_w(j)
            endif
           enddo
        enddo
        deallocate( p_e,p_w)
c
c       smooth streamfunction at eastern and western border
c         
        is=ismooth
        call smooth_merid(psi_wall_west(js_pe:je_pe),is,2,1)
        call smooth_merid(psi_wall_east(js_pe:je_pe),is,imt-1,1)
c
c       receive surface momentum fluxes values
c
        allocate(buf(is_zoom:ie_zoom,js_zoom:je_zoom,2) )
        buf=0.; len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)*2
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' reiceive tau'
#endif
        if (my_pe == 0) then
         call mpi_recv(buf,len,impireal,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
        endif
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,' done'
#endif
        call mpi_bcast(buf,len,impireal,0,my_comm,ierr)
        is=ismooth
        allocate(buf2(is_pe-is:ie_pe+is,js_pe-is:je_pe+is) )
        buf2=-1e23
        do n=1,2
c
c        transfer coarse grid wind stress to fine grid
c
         do j=js_zoom,je_zoom
          do ii=-zoom_fac/2,zoom_fac/2
           do jj=-zoom_fac/2,zoom_fac/2
             js=(j-js_zoom)*zoom_fac + jj+1+zoom_fac/2
             do i=is_zoom,ie_zoom
              is=(i-is_zoom)*zoom_fac + ii+1+zoom_fac/2
              if (is>=is_pe.and.is<=ie_pe.and.
     &            js>=js_pe.and.js<=je_pe ) buf2(is,js)=buf(i,j,n)
            enddo
           enddo
          enddo
         enddo
c
c        extrapolate wind stress into blank areas
c
         call border_exchg(buf2,1,ismooth)
         call extrapolate_hor(buf2,ismooth,-1.e23,'U',1)
c
c        now smooth surface momentum fluxes
c        which has nearly the same effect as interpolating
c
         call border_exchg(buf2,1,ismooth)
         call smooth_hor(buf2,ismooth,'U',1)
         smf(is_pe:ie_pe,js_pe:je_pe,n)=buf2(is_pe:ie_pe,js_pe:je_pe)

        enddo !n=1,2
        deallocate( buf,buf2)
c
c       receive restoring time scales and climatogical values
c       for the tracer fluxes 
c
        allocate(buf(is_zoom:ie_zoom,js_zoom:je_zoom,nt*2) )
        buf=0.
        len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)*nt*2
c
c       get number of tracers for which the fluxes are meant
c
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' receive tracer fluxes'
#endif
        if (my_pe == 0) 
     &    call mpi_recv(i,1,impiinteger,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,status,ierr)
        call mpi_bcast(i,1,impiinteger,0,my_comm,ierr)
        if (i/=nt) then
         if (my_pe==0) then
           print*,' ERROR:'
           print*,' number of restoring time scales and clim.'
           print*,' values do not match the number of tracer'
           print*,' in sub domain '
         endif
         call halt_stop(' in domain_exchg ')
        endif
        if (my_pe == 0) 
     &   call mpi_recv(buf,len,impireal,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,' done'
#endif
        call mpi_bcast(buf,len,impireal,0,my_comm,ierr)
c
c       transfer coarse grid data to fine grid
c
        do n=1,nt 
         do j=js_zoom,je_zoom
          do ii=-zoom_fac/2,zoom_fac/2
           do jj=-zoom_fac/2,zoom_fac/2
            js=(j-js_zoom)*zoom_fac + jj+1
            do i=is_zoom,ie_zoom
             is=(i-is_zoom)*zoom_fac + ii+1
             if (is>=is_pe.and.is<=ie_pe.and.
     &            js>=js_pe.and.js<=je_pe ) then
               stf_rest(is,js,n)=buf(i,j,n)
               stf_clim(is,js,n)=buf(i,j,n+nt)
             endif
            enddo
           enddo
          enddo
         enddo
        enddo
        deallocate( buf)
c
c       smooth tracer fluxes 
c
        is=zoom_fac/2
        allocate( buf2(is_pe-is:ie_pe+is,js_pe-is:je_pe+is) )
        buf2=0.
        do n=1,nt
         buf2(is_pe:ie_pe,js_pe:je_pe)= 
     &             stf_rest(is_pe:ie_pe,js_pe:je_pe,n)
         call border_exchg(buf2,1,is)
         call smooth_hor(buf2,is,'T',1)
         stf_rest(is_pe:ie_pe,js_pe:je_pe,n)= 
     &             buf2(is_pe:ie_pe,js_pe:je_pe)
         buf2(is_pe:ie_pe,js_pe:je_pe)= 
     &             stf_clim(is_pe:ie_pe,js_pe:je_pe,n)
         call border_exchg(buf2,1,is)
         call smooth_hor(buf2,is,'T',1)
         stf_clim(is_pe:ie_pe,js_pe:je_pe,n)= 
     &             buf2(is_pe:ie_pe,js_pe:je_pe)
        enddo
        deallocate( buf2)

        if (enable_ktmix.or.enable_tkemix) then
c
c        receive ustar fluxes
c
#ifdef test_exchg
         print*,' DOMAIN=',sub_domain, ' PE=',my_pe,' receive ktmix'
#endif
         allocate(buf(is_zoom:ie_zoom,js_zoom:je_zoom,1) )
         buf=0.; len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)
c
c        shake hands with mother domain
c
         if (my_pe == 0) then; i=1
          call mpi_send(i,1,impiinteger,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,ierr)
         endif
         if (my_pe == 0) 
     &   call mpi_recv(buf,len,impireal,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
         call mpi_bcast(buf,len,impireal,0,my_comm,ierr)
c
c        transfer coarse grid data to fine grid
c
         do j=js_zoom,je_zoom
          do ii=-zoom_fac/2,zoom_fac/2
           do jj=-zoom_fac/2,zoom_fac/2
            js=(j-js_zoom)*zoom_fac + jj+1
            do i=is_zoom,ie_zoom
             is=(i-is_zoom)*zoom_fac + ii+1
             if (is>=is_pe.and.is<=ie_pe.and.
     &            js>=js_pe.and.js<=je_pe ) then
               ustar(is,js)=buf(i,j,1)
             endif
            enddo
           enddo
          enddo
         enddo
         deallocate( buf)
c
c        smooth ustar 
c
         is=zoom_fac/2
         allocate( buf2(is_pe-is:ie_pe+is,js_pe-is:je_pe+is) )
         buf2=0.
         buf2(is_pe:ie_pe,js_pe:je_pe)=ustar(is_pe:ie_pe,js_pe:je_pe)
         call border_exchg(buf2,1,is)
         call smooth_hor(buf2,is,'T',1)
         ustar(is_pe:ie_pe,js_pe:je_pe)=buf2(is_pe:ie_pe,js_pe:je_pe)
         deallocate( buf2)

#ifdef test_exchg
         print*,' DOMAIN=',sub_domain, ' PE=',my_pe,' done'
#endif
        elseif (my_pe==0) then
c
c        send a signal that no ustar is wanted
c
         i=0; call mpi_send(i,1,impiinteger,sub_domain_pe0(mother),
     &                      tag,mpi_comm_world,ierr)

        endif ! enable_ktmix.or.enable_tkemix

#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' receive T.S. o.b.c.'
#endif
        allocate(buf(km_mother,js_zoom:je_zoom,2) ); buf=0.

        if (restore_TS_obc_west) then
c
c        receive western TS boundaries
c
         i=1; if (my_pe==0) 
     &    call mpi_send(i,1,impiinteger,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,ierr)
         len=(je_zoom-js_zoom+1)*2*km_mother
         if (my_pe == 0) 
     &    call mpi_recv(buf,len,impireal,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,status,ierr)
         call mpi_bcast(buf,len,impireal,0,my_comm,ierr)
c
c        transfer coarse grid data to fine grid
c
         do k=1,km_mother
          do kk=(k-1)*zoom_fac_k + 1,k*zoom_fac_k  
           do jj=-zoom_fac/2,zoom_fac/2
            do j=js_zoom,je_zoom
             js=(j-js_zoom)*zoom_fac + jj+1
             if (js>=js_pe.and.js<=je_pe ) then
              ts_obc_west(js,kk,1)=buf(k,j,1)
              ts_obc_west(js,kk,2)=buf(k,j,2)
             endif
            enddo
           enddo
          enddo
         enddo
c
c        smooth T and S
c
         ismooth=zoom_fac/2; is=ismooth
         call smooth_merid(ts_obc_west(:,:,1),is,2,km)
         call smooth_merid(ts_obc_west(:,:,2),is,2,km)

        else
         i=0   ! a signal that no western TS b.c. are needed
         if (my_pe==0) 
     &    call mpi_send(i,1,impiinteger,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,ierr)
        endif

        if (restore_TS_obc_east) then
c
c        receive eastern TS boundaries
c
         i=1; if (my_pe==0) 
     &    call mpi_send(i,1,impiinteger,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,ierr)
         len=(je_zoom-js_zoom+1)*2*km_mother
         if (my_pe == 0)
     &    call mpi_recv(buf,len,impireal,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,status,ierr)
         call mpi_bcast(buf,len,impireal,0,my_comm,ierr)
c
c        transfer coarse grid data to fine grid
c
         do k=1,km_mother
          do kk=(k-1)*zoom_fac_k + 1,k*zoom_fac_k  
           do jj=-zoom_fac/2,zoom_fac/2
            do j=js_zoom,je_zoom
             js=(j-js_zoom)*zoom_fac + jj+1
             if (js>=js_pe.and.js<=je_pe ) then
              ts_obc_east(js,kk,1)=buf(k,j,1)
              ts_obc_east(js,kk,2)=buf(k,j,2)
             endif
            enddo
           enddo
          enddo
         enddo
c
c        smooth T and S
c
         ismooth=zoom_fac/2; is=ismooth
         call smooth_merid(ts_obc_east(:,:,1),is,imt-1,km)
         call smooth_merid(ts_obc_east(:,:,2),is,imt-1,km)

        else
         i=0; if (my_pe==0) 
     &    call mpi_send(i,1,impiinteger,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,ierr)
        endif

        deallocate( buf)
        allocate(buf(is_zoom:ie_zoom,km_mother,2) ); buf=0.

        if (restore_TS_obc_south) then
c
c        receive southern TS boundaries
c
         i=1; if (my_pe==0) 
     &    call mpi_send(i,1,impiinteger,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,ierr)
         len=(ie_zoom-is_zoom+1)*2*km_mother
         if (my_pe == 0) 
     &     call mpi_recv(buf,len,impireal,sub_domain_pe0(mother),
     &                   tag,mpi_comm_world,status,ierr)
         call mpi_bcast(buf,len,impireal,0,my_comm,ierr)
c
c        transfer coarse grid data to fine grid
c
         do k=1,km_mother
          do kk=(k-1)*zoom_fac_k + 1,k*zoom_fac_k  
           do ii=-zoom_fac/2,zoom_fac/2
            do i=is_zoom,ie_zoom
             is=(i-is_zoom)*zoom_fac + ii+1
             if (is>=is_pe.and.is<=ie_pe ) then
              ts_obc_south(is,kk,1)=buf(i,k,1)
              ts_obc_south(is,kk,2)=buf(i,k,2)
             endif
            enddo
           enddo
          enddo
         enddo
c
c        smooth T and S
c
         ismooth=zoom_fac/2; is=ismooth
         call smooth_zonal(ts_obc_south(:,:,1),is,2,km)
         call smooth_zonal(ts_obc_south(:,:,2),is,2,km)

        else
         i=0; if (my_pe==0) 
     &    call mpi_send(i,1,impiinteger,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,ierr)
        endif

        if (restore_TS_obc_north) then
c
c        receive northern TS boundaries
c
         i=1; if (my_pe==0) 
     &    call mpi_send(i,1,impiinteger,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,ierr)
         len=(ie_zoom-is_zoom+1)*2*km_mother
         if (my_pe == 0) 
     &     call mpi_recv(buf,len,impireal,sub_domain_pe0(mother),
     &                   tag,mpi_comm_world,status,ierr)
         call mpi_bcast(buf,len,impireal,0,my_comm,ierr)
c
c        transfer coarse grid data to fine grid
c
         do k=1,km_mother
          do kk=(k-1)*zoom_fac_k + 1,k*zoom_fac_k  
           do ii=-zoom_fac/2,zoom_fac/2
            do i=is_zoom,ie_zoom
             is=(i-is_zoom)*zoom_fac + ii+1
             if (is>=is_pe.and.is<=ie_pe ) then
              ts_obc_north(is,kk,1)=buf(i,k,1)
              ts_obc_north(is,kk,2)=buf(i,k,2)
             endif
            enddo
           enddo
          enddo
         enddo
c
c        smooth T and S
c
         ismooth=zoom_fac/2; is=ismooth
         call smooth_zonal(ts_obc_north(:,:,1),is,jmt-1,km)
         call smooth_zonal(ts_obc_north(:,:,2),is,jmt-1,km)

        else
         i=0; if (my_pe==0) 
     &    call mpi_send(i,1,impiinteger,sub_domain_pe0(mother),
     &                  tag,mpi_comm_world,ierr)
        endif
        deallocate( buf)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done'
#endif
c
c       send density for BLUE
c
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' send blue '
#endif
        if (my_pe==0) 
     &   call mpi_recv(idoit,1,impiinteger,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
        call mpi_bcast(idoit,1,impiinteger,0,my_comm,ierr)
        if (idoit==1) then
c
c        send flag whether rho or T and S is sended
c
#ifdef flame_density
         send_rho=1
#else
         send_rho=0
#endif
         if (my_pe == 0) call mpi_send(send_rho,1,impiinteger,
     &         sub_domain_pe0(mother), tag,mpi_comm_world,ierr)
         if (send_rho == 1) then
          allocate(buf4(is_zoom:ie_zoom,km_mother,js_zoom:je_zoom,1))
         else
          allocate(buf4(is_zoom:ie_zoom,km_mother,js_zoom:je_zoom,2))
         endif
         allocate(buf3(is_zoom:ie_zoom,km_mother,js_zoom:je_zoom) )
         allocate(buf2(is_zoom:ie_zoom,js_zoom:je_zoom) )
         buf2=0.; buf4=0.; buf3=0.
         do k=1,km_mother
          do i=is_zoom,ie_zoom
           do j=js_zoom,je_zoom
            n=0
            do ii=-zoom_fac/2,zoom_fac/2
             do jj=-zoom_fac/2,zoom_fac/2
              is=(i-is_zoom)*zoom_fac + ii+1
              js=(j-js_zoom)*zoom_fac + jj+1
              if (is>=is_pe.and.is<=ie_pe.and.
     &            js>=js_pe.and.js<=je_pe )  then
               if (send_rho==1) then
                buf4(i,k,j,1)=buf4(i,k,j,1)+rho(is,k,js)*tmask(is,k,js)
                n=n+tmask(is,k,js)
               else
                do kk=(k-1)*zoom_fac_k + 1,k*zoom_fac_k  
                 buf4(i,k,j,1)=buf4(i,k,j,1)+
     &                         t(is,kk,js,1,tau)*tmask(is,kk,js)
                 buf4(i,k,j,2)=buf4(i,k,j,2)+
     &                         t(is,kk,js,2,tau)*tmask(is,kk,js)
                 n=n+tmask(is,kk,js)
                enddo
               endif
              endif
             enddo
            enddo
            if (n>0) then
             buf4(i,k,j,:)=buf4(i,k,j,:)/n
             if (k==1) buf2(i,j)=1.
            endif
           enddo
          enddo
         enddo
         do n=1,n_pes-1
          if (my_pe==n) then
           len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)
           call mpi_send(buf2,len,impireal,0,
     &                   tag,my_comm,ierr)
           len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)*km_mother
           call mpi_send(buf4(:,:,:,1),len,impireal,0,
     &                   tag,my_comm,ierr)
           if (send_rho==0) then
            call mpi_send(buf4(:,:,:,2),len,impireal,0,
     &                    tag,my_comm,ierr)
           endif
          elseif (my_pe ==0) then
           len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)
           call mpi_recv(buf2,len,impireal,n,
     &                   tag,my_comm,status,ierr)
           len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)*km_mother
           call mpi_recv(buf3,len,impireal,n,
     &                   tag,my_comm,status,ierr)
           do k=1,km_mother
             where( buf2 == 1.) buf4(:,k,:,1)=buf3(:,k,:)
           enddo
           if (send_rho==0) then
            call mpi_recv(buf3,len,impireal,n,
     &                    tag,my_comm,status,ierr)
            do k=1,km_mother
             where( buf2 == 1.) buf4(:,k,:,2)=buf3(:,k,:)
            enddo
           endif
          endif
         enddo
         len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)*km_mother
         if (send_rho==0) len=len*2
         if (my_pe == 0) 
     &     call mpi_send(buf4,len,impireal,sub_domain_pe0(mother),
     &                   tag,mpi_comm_world,ierr)
         deallocate(buf4,buf2,buf3)
        endif ! idoit

#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done '
#endif

      endif ! mother>0


      do nchild=1,nr_childs
c
c       send/receive data to/from child
c
        allocate(cbuf(imt,jmt));cbuf=0.
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' domain_exchg, index_exchg'
#endif
        call index_get(is,ie,js,je,childs(nchild))
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done'
#endif
        pe_child=sub_domain_pe0(childs(nchild))
c
c        send the streamfunction
c
        allocate(p_s(is:ie),p_n(is:ie),p_w(js:je),p_e(js:je) )
        p_s=0.; p_n=0.; p_w=0.; p_e=0.
c       get psi from other pes 
        do j=max(js_pe,js),min(je_pe,je)
          do i=max(is,is_pe),min(ie,ie_pe)
            cbuf(i,j)=psi(i,j,1)            
          enddo
        enddo
        call pe0_recv_2D(cbuf)
        p_s=cbuf(is:ie,js); p_n=cbuf(is:ie,je)
        p_w=cbuf(is,js:je); p_e=cbuf(ie,js:je)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' send psi'
#endif
        if (my_pe == 0) then
         len=ie-is+1
         call mpi_send(p_s,len,impireal,pe_child,
     &                 tag,mpi_comm_world,ierr)
         call mpi_send(p_n,len,impireal,pe_child,
     &                 tag,mpi_comm_world,ierr)
         len=je-js+1
         call mpi_send(p_w,len,impireal,pe_child,
     &                 tag,mpi_comm_world,ierr)
         call mpi_send(p_e,len,impireal,pe_child,
     &                 tag,mpi_comm_world,ierr)
        endif
        deallocate( p_n,p_s,p_e,p_w)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done'
#endif
c
c       send the surface momentum fluxes
c
        allocate(buf(is:ie,js:je,2) );buf=0.
        do n=1,2
         do j=max(js_pe,js),min(je_pe,je)
          do i=max(is,is_pe),min(ie,ie_pe)
            cbuf(i,j)=smf(i,j,n)            
c           this is needed as a mask in sub domain
            if (umask(i,1,j)==0.) cbuf(i,j)=-1e23
          enddo
         enddo
         call pe0_recv_2D(cbuf)
         buf(is:ie,js:je,n)=cbuf(is:ie,js:je)
        enddo
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' send tau'
#endif
        if (my_pe == 0) then
         len=(ie-is+1)*(je-js+1)*2
         call mpi_send(buf,len,impireal,pe_child,
     &                 tag,mpi_comm_world,ierr)
        endif
        deallocate(buf)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done'
#endif
c
c       send restoring time scales and climatogical values
c       for the tracer fluxes also
c
        allocate(buf(is:ie,js:je,nt*2) );buf=0.
        do n=1,nt
         do j=max(js_pe,js),min(je_pe,je)
          do i=max(is,is_pe),min(ie,ie_pe)
            cbuf(i,j)=stf_rest(i,j,n)            
          enddo
         enddo
         call pe0_recv_2D(cbuf)
         buf(is:ie,js:je,n)=cbuf(is:ie,js:je)
         do j=max(js_pe,js),min(je_pe,je)
          do i=max(is,is_pe),min(ie,ie_pe)
            cbuf(i,j)=stf_clim(i,j,n)            
          enddo
         enddo
         call pe0_recv_2D(cbuf)
         buf(is:ie,js:je,n+nt)=cbuf(is:ie,js:je)
        enddo
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' send tracer fluxes'
#endif
        if (my_pe == 0) then
c        numer of tracer fluxes which are send
         call mpi_send(nt,1,impiinteger,pe_child,
     &                 tag,mpi_comm_world,ierr)
         len=(ie-is+1)*(je-js+1)*nt*2
         call mpi_send(buf,len,impireal,pe_child,
     &                 tag,mpi_comm_world,ierr)
        endif
        deallocate(buf)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done'
#endif
c
c       send ustar fluxes if requested
c
        if (my_pe == 0) call mpi_recv(i,1,impiinteger,pe_child,
     &                 tag,mpi_comm_world,status,ierr)
        call mpi_bcast(i,1,impiinteger,0,my_comm,ierr)
        if (i==1.and.(enable_ktmix.or.enable_tkemix)) then
#ifdef test_exchg
         print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &      ' send ktmix'
#endif
         allocate(buf(is:ie,js:je,1) ); buf=0.
         do j=max(js_pe,js),min(je_pe,je)
          do i=max(is,is_pe),min(ie,ie_pe)
            cbuf(i,j)=ustar(i,j)            
          enddo
         enddo
         call pe0_recv_2D(cbuf)
         buf(is:ie,js:je,1)=cbuf(is:ie,js:je)
         if (my_pe == 0) then
          len=(ie-is+1)*(je-js+1)
          call mpi_send(buf,len,impireal,pe_child,
     &                  tag,mpi_comm_world,ierr)
         endif
         deallocate(buf)
#ifdef test_exchg
         print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &      ' done'
#endif
        elseif (i==1 .and. .not. (enable_ktmix.or.enable_tkemix)) then
          print*,' cannot send ustar to sub domain '
          print*,' since sub domain #',sub_domain,
     &           ' has not Kraus Truner model or TKE scheme enabled'
          call halt_stop(' in domain_exchg ')
        endif
c
c       send TS at open boundaries
c
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' send TS o.b.c.'
#endif
        allocate(buf(km,js:je,2) );
        buf=0.; len=(je-js+1)*km*2
        if (my_pe == 0) call mpi_recv(i,1,impiinteger,pe_child,
     &                 tag,mpi_comm_world,status,ierr)
        call mpi_bcast(i,1,impiinteger,0,my_comm,ierr)
        if (i==1) then
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' send TS west'
#endif
         do n=1,2
          do k=1,km
           if (is>=is_pe.and.is<=ie_pe) then
            do j=max(js_pe,js),min(je_pe,je)
             cbuf(is,j)=t(is,k,j,n,tau)
            enddo
           endif
           call pe0_recv_2D(cbuf)
           buf(k,js:je,n)=cbuf(is,js:je)
          enddo
         enddo
         if (my_pe == 0) call mpi_send(buf,len,impireal,pe_child,
     &                   tag,mpi_comm_world,ierr)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done'
#endif
        endif
        if (my_pe == 0) call mpi_recv(i,1,impiinteger,pe_child,
     &                 tag,mpi_comm_world,status,ierr)
        call mpi_bcast(i,1,impiinteger,0,my_comm,ierr)
        if (i==1) then
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' send TS east'
#endif
         do n=1,2
          do k=1,km
           if (ie>=is_pe.and.ie<=ie_pe) then
            do j=max(js_pe,js),min(je_pe,je)
             cbuf(ie,j)=t(ie,k,j,n,tau)
            enddo
           endif
           call pe0_recv_2D(cbuf)
           buf(k,js:je,n)=cbuf(ie,js:je)
          enddo
         enddo
         if (my_pe == 0) call mpi_send(buf,len,impireal,pe_child,
     &                   tag,mpi_comm_world,ierr)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done'
#endif
        endif

        deallocate(buf); 
c        print*,'allocating',is,ie,km
        allocate(buf(is:ie,km,2) ); 
c        print*,' done'
        buf=0.; len=(ie-is+1)*km*2
        if (my_pe == 0) call mpi_recv(i,1,impiinteger,pe_child,
     &                 tag,mpi_comm_world,status,ierr)
        call mpi_bcast(i,1,impiinteger,0,my_comm,ierr)
        if (i==1) then
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' send TS southt'
#endif
         do n=1,2
          do k=1,km
           if (js>=js_pe.and.js<=je_pe) then
            do i=max(is_pe,is),min(ie_pe,ie)
             cbuf(i,js)=t(i,k,js,n,tau)
            enddo
           endif
           call pe0_recv_2D(cbuf)
           buf(is:ie,k,n)=cbuf(is:ie,js)
          enddo
         enddo
         if (my_pe == 0) call mpi_send(buf,len,impireal,pe_child,
     &                  tag,mpi_comm_world,ierr)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done'
#endif
        endif
        if (my_pe == 0) call mpi_recv(i,1,impiinteger,pe_child,
     &                 tag,mpi_comm_world,status,ierr)
        call mpi_bcast(i,1,impiinteger,0,my_comm,ierr)
        if (i==1) then
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' send TS north'
#endif
         do n=1,2
          do k=1,km
           if (je>=js_pe.and.je<=je_pe) then
            do i=max(is_pe,is),min(ie_pe,ie)
             cbuf(i,je)=t(i,k,je,n,tau)
            enddo
           endif
           call pe0_recv_2D(cbuf)
           buf(is:ie,k,n)=cbuf(is:ie,je)
          enddo
         enddo
         if (my_pe == 0) call mpi_send(buf,len,impireal,pe_child,
     &                  tag,mpi_comm_world,ierr)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done'
#endif
        endif
        deallocate(buf)
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done'
#endif
#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' receive blue'
#endif
        if (enable_blue) then
c
c        receive density for BLUE
c
         idoit=1; if (my_pe==0) 
     &     call mpi_send(idoit,1,impiinteger,pe_child,
     &                   tag,mpi_comm_world,ierr)
c        receive flag whether rho or T/S is sended
c        i=0 TS, i=1 rho is coming
         if (my_pe==0) 
     &    call mpi_recv(i,1,impiinteger,pe_child,
     &                  tag,mpi_comm_world,status,ierr)
         call mpi_bcast(i,1,impiinteger,0,my_comm,ierr)
         if (i==0) then; allocate(buf4(is:ie,km,js:je,2) )
         elseif (i==1) then; allocate(buf4(is:ie,km,js:je,1) )
         else
          if (my_pe==0) print*,' expecting either 0 or 1 here'
          call halt_stop(' in domain_exchg')
         endif
         buf4=0.; len=(ie-is+1)*(je-js+1)*km
         if (i==0) len=len*2
         if (my_pe==0) call mpi_recv(buf4,len,impireal,pe_child,
     &                 tag,mpi_comm_world,status,ierr)
c        broadcast result
         call bcast_real(buf4,len,0)
c
         if (i==0) then
          do k=1,km
           do j=js+1,je-1
            call model_dens(buf4(is+1,k,j,1),buf4(is+1,k,j,2),
     &                      blue_rho(is+1,k,j,1),k,ie-is-1
#ifdef partial_cell
     &                       ,ztp(is+1,k,j)
#endif
     &                       )
           enddo
          enddo
         else
          blue_rho(is+1:ie-1,:,js+1:je-1,1)=
     &                       buf4(is+1:ie-1,:,js+1:je-1,1)
         endif
         blue_alpha(is+1:ie-1,:,js+1:je-1)=0.1
         blue_rho(:,:,:,2)=blue_rho(:,:,:,1)

         deallocate(buf4)

        else ! enable_blue

         idoit=0; if (my_pe==0) 
     &     call mpi_send(idoit,1,impiinteger,pe_child,
     &                   tag,mpi_comm_world,ierr)
        endif ! enable_blue


#ifdef test_exchg
        print*,' DOMAIN=',sub_domain, ' PE=',my_pe,
     &     ' done'
#endif

       deallocate(cbuf)
      enddo ! nchilds

#ifdef test_exchg
      if (my_pe==0) print*,' done exchanging data across sub domains '
#endif

#endif
      end subroutine domain_exchg





      subroutine exchg_sub_grid ()
c
c---------------------------------------------------------------
c     Exchange grid data over sub domains
c---------------------------------------------------------------
c
      use spflame_module
      implicit none
#ifndef no_mpp
# include "sub_mpif.h"
      integer ierr,tag,all_pes,nchild
      integer,dimension(MPI_STATUS_SIZE)  :: Status
      integer is,ie,js,je,len,ii,jj,kk,i,j,iii,jjj,k
      real yt_buf(js_zoom:je_zoom)
      real xt_buf(is_zoom:ie_zoom)
      real dyt_buf(js_zoom:je_zoom)
      real dxt_buf(is_zoom:ie_zoom)
      real dzt_buf(km_mother),sumu,sumt,tolr,dxubar,dyubar,dzwbar

c  see if there is anybody out there
      call mpi_comm_size(mpi_comm_world,all_pes,ierr)
      if (all_pes == n_pes) return
      tag=0
       
      if (my_pe==0) print*,' exchanging grid over sub domains'

      if (mother >0 ) then
c
c      get grid from mother domain first
c
       call index_send(is,ie,js,je)
       if (my_pe == 0) then
        len=ie_zoom-is_zoom+1
        call mpi_recv(xt_buf,len,impireal,sub_domain_pe0(mother),
     &                tag,mpi_comm_world,status,ierr)
        len=je_zoom-js_zoom+1
        call mpi_recv(yt_buf,len,impireal,sub_domain_pe0(mother),
     &                tag,mpi_comm_world,status,ierr)
        len=ie_zoom-is_zoom+1
        call mpi_recv(dxt_buf,len,impireal,sub_domain_pe0(mother),
     &                tag,mpi_comm_world,status,ierr)
        len=je_zoom-js_zoom+1
        call mpi_recv(dyt_buf,len,impireal,sub_domain_pe0(mother),
     &                tag,mpi_comm_world,status,ierr)
        call mpi_recv(dzt_buf,km_mother,impireal,
     &                sub_domain_pe0(mother),
     &                tag,mpi_comm_world,status,ierr)
        len=ie_zoom-is_zoom+1
        dxt_buf=dxt_buf/(zoom_fac*1.0)
        call gcell_sub_domain (imt, len, xt_buf, dxt_buf, 
     &                         ii, dxtdeg, dxudeg)
        if (ii /= imt-1) then
          print*,' error for xt in sub grid generation'
          print*,ii,imt
c          call flush(6,ierr)
          call halt_stop('stop')
        else
          dxtdeg(imt)=dxtdeg(imt-1)
          dxudeg(imt)=dxudeg(imt-1)
        endif
        len=je_zoom-js_zoom+1
        dyt_buf=dyt_buf/(zoom_fac*1.0)
        call gcell_sub_domain (jmt, len, yt_buf, dyt_buf, 
     &                         ii, dytdeg, dyudeg)
        if (ii /= jmt-1) then
          print*,' error for yt in sub grid generation'
          print*,ii,jmt
c          call flush(6,ierr)
          call halt_stop('stop')
        else
          dytdeg(jmt)=dytdeg(jmt-1)
          dyudeg(jmt)=dyudeg(jmt-1)
        endif
        if (km==km_mother) then
          dzt=dzt_buf
        else
         do ii=1,km_mother
          do kk=(ii-1)*zoom_fac_k + 1,ii*zoom_fac_k  
           dzt(kk)=dzt_buf(ii)/zoom_fac_k
          enddo
         enddo
        endif
       endif
       call mpi_bcast(dxtdeg,imt,impireal,0,my_comm,ierr)
       call mpi_bcast(dxudeg,imt,impireal,0,my_comm,ierr)
       call mpi_bcast(dytdeg,jmt,impireal,0,my_comm,ierr)
       call mpi_bcast(dyudeg,jmt,impireal,0,my_comm,ierr)
       call mpi_bcast(xt_buf,1,impireal,0,my_comm,ierr)
       call mpi_bcast(yt_buf,1,impireal,0,my_comm,ierr)
       call mpi_bcast(dzt,km,impireal,0,my_comm,ierr)
       xt(1)=xt_buf(is_zoom)
       xu(1)=xt_buf(is_zoom)+dxtdeg(1)/2.
       do ii=2,imt
c        xt(ii)=xt(ii-1)+dxtdeg(ii)
c        xu(ii)=xu(ii-1)+dxudeg(ii)
        xt(ii)=xt(ii-1)+dxudeg(ii-1)
        xu(ii)=xu(ii-1)+dxtdeg(ii)
       enddo
       yt(1)=yt_buf(js_zoom)
       yu(1)=yt_buf(js_zoom)+dytdeg(1)/2.
       do ii=2,jmt
c        yt(ii)=yt(ii-1)+dytdeg(ii)
c        yu(ii)=yu(ii-1)+dyudeg(ii)
        yt(ii)=yt(ii-1)+dyudeg(ii-1)
        yu(ii)=yu(ii-1)+dytdeg(ii)
       enddo 
       dzw(1) = dzt(1)
       sumt = dzt(1)
       sumu = 0.5*dzt(1) + dzw(1)
       do ii=2,km
        sumt = sumt + dzt(ii)
        dzw(ii) = 2*(sumt-sumu)
        sumu = sumu + dzw(ii)
       enddo
       zt(1) = 0.5*dzt(1)
       zw(1) = dzt(1)
       do ii=2,km
        zw(ii) = zw(ii-1) + dzt(ii)
        zt(ii) = zt(ii-1) + dzw(ii-1)
       enddo
       dzw(0)  = zt(1)
       dzw(km) = zw(km) - zt(km)

       if (my_pe==0) then
        print*,' sub grid generation results : '
        print*,' Longitude T grid:'
#ifdef notdef
        do i=is_zoom,ie_zoom
         do ii=-zoom_fac/2,zoom_fac/2
          iii=(i-is_zoom)*zoom_fac +1+ii
          if (iii>=1.and.iii<=imt) then
           print*,' i=',iii,' xt(i)=',xt(iii),' dxtdeg(i)=',dxtdeg(iii),
     &     ' ic=',i,' c_xt(ic)=', xt_buf(i),
     &     ' c_dxtdeg(ic)=', dxt_buf(i)*zoom_fac
          endif
         enddo
        enddo
#else
        print*,''
        print*,' this grid: '
        print*,' xt(1)    =',xt(1),     ' xt(imt)    =',xt(imt)
        print*,' dxtdeg(1)=',dxtdeg(1), ' dxtdeg(imt)=',dxtdeg(imt)
        print*,''
        print*,' mothers grid:'
        print*,' xt(1)    =',xt_buf(is_zoom),     
     &         ' xt(imt)    =',xt_buf(ie_zoom)
        print*,' dxtdeg(1)=',dxt_buf(is_zoom),
     &         ' dxtdeg(imt)=',dxt_buf(ie_zoom)
        print*,''
#endif
        print*,' Longitude U grid:'
#ifdef notdef
        do i=is_zoom,ie_zoom
         do ii=-zoom_fac/2,zoom_fac/2
          iii=(i-is_zoom)*zoom_fac +1+ii
          if (iii>=1.and.iii<=imt) then
           print*,' i=',iii,' xu(i)=',xu(iii),' dxudeg(i)=',dxudeg(iii),
     &     ' ic=',i
          endif
         enddo
        enddo
#else
        print*,''
        print*,' this grid: '
        print*,' xu(1)    =',xu(1),     ' xu(imt)    =',xu(imt)
        print*,' dxudeg(1)=',dxudeg(1), ' dxudeg(imt)=',dxudeg(imt)
        print*,''
#endif
        print*,' Latitude T grid:'
#ifdef notdef
        do j=js_zoom,je_zoom
         do jj=-zoom_fac/2,zoom_fac/2
          jjj=(j-js_zoom)*zoom_fac +1+jj
          if (jjj>=1.and.jjj<=jmt) then
           print*,' j=',jjj,' yt(j)=',yt(jjj),' dytdeg(j)=',dytdeg(jjj),
     &     ' jc=',j,' c_yt(jc)=', yt_buf(j),
     &     ' c_dytdeg(jc)=', dyt_buf(j)*zoom_fac
          endif
         enddo
        enddo
#else
        print*,''
        print*,' this grid: '
        print*,' yt(1)    =',yt(1),     ' yt(jmt)    =',yt(jmt)
        print*,' dytdeg(1)=',dytdeg(1), ' dytdeg(jmt)=',dytdeg(jmt)
        print*,''
        print*,' mothers grid:'
        print*,' yt(1)    =',yt_buf(js_zoom),     
     &         ' yt(jmt)    =',yt_buf(je_zoom)
        print*,' dytdeg(1)=',dyt_buf(js_zoom),
     &         ' dytdeg(jmt)=',dyt_buf(je_zoom)
        print*,''
#endif
        print*,' Latitude U grid:'
#ifdef notdef
        do j=js_zoom,je_zoom
         do jj=-zoom_fac/2,zoom_fac/2
          jjj=(j-js_zoom)*zoom_fac +1+jj
          if (jjj>=1.and.jjj<=jmt) then
           print*,' j=',jjj,' yu(j)=',yu(jjj),' dyudeg(j)=',dyudeg(jjj),
     &     ' jc=',j
          endif
         enddo
        enddo
#else
        print*,''
        print*,' this grid: '
        print*,' yu(1)    =',yu(1),     ' yu(jmt)    =',yu(jmt)
        print*,' dyudeg(1)=',dyudeg(1), ' dyudeg(jmt)=',dyudeg(jmt)
        print*,''
#endif
       endif
c
c-----------------------------------------------------------------------
c     Check if the T grid resolution is an average of the
c     U cell resolution. This insures more accurate advection of
c     tracers within a stretched grid.
c-----------------------------------------------------------------------
c
      tolr = 1.e-5
      do i=2,imt-1
        dxubar = 0.5*(dxudeg(i) + dxudeg(i-1))
        if (abs(dxubar-dxtdeg(i)) .gt. tolr) then
          if (my_pe==0) write (6,'(a,i5,a)')
     &    '=>Warning: T cell delta x at i=',i
     &,   ' is not an average of adjacent U cell delta x`s'     
        endif     
      enddo
c
      do j=2,jmt-1
        dyubar = 0.5*(dyudeg(j) + dyudeg(j-1))
        if (abs(dyubar-dytdeg(j)) .gt. tolr) then
          if (my_pe==0) write (6,'(a,i5,a)')
     &    '=>Warning: T cell delta y at j=',j
     &,   ' is not an average of adjacent U cell delta y`s' 
        endif     
      enddo
c
      tolr = 1.e0
      do k=2,km-1
        dzwbar = 0.5*(dzw(k) + dzw(k-1))
        if (abs(dzwbar-dzt(k)) .gt. tolr) then
          if (my_pe==0) write (6,'(a,i5,a)')
     &    '=>Warning: T cell delta z at k=',k
     &,   ' is not an average of adjacent W cell delta z`s'     
        endif     
      enddo


      endif ! mother >0
c
c     now send grid to children
c
      do nchild=1,nr_childs
        call index_get(is,ie,js,je,childs(nchild))
        if (my_pe == 0) then
          call mpi_send(xt(is:ie),ie-is+1,impireal,
     &                  sub_domain_pe0(childs(nchild)),
     &                  tag,mpi_comm_world,ierr)
          call mpi_send(yt(js:je),je-js+1,impireal,
     &                  sub_domain_pe0(childs(nchild)),
     &                  tag,mpi_comm_world,ierr)
          call mpi_send(dxtdeg(is:ie),ie-is+1,impireal,
     &                  sub_domain_pe0(childs(nchild)),
     &                  tag,mpi_comm_world,ierr)
          call mpi_send(dytdeg(js:je),je-js+1,impireal,
     &                  sub_domain_pe0(childs(nchild)),
     &                  tag,mpi_comm_world,ierr)
          call mpi_send(dzt,km,impireal,
     &                  sub_domain_pe0(childs(nchild)),
     &                  tag,mpi_comm_world,ierr)
        endif
      enddo

      if (my_pe==0) print*,' done'

#endif
      end subroutine exchg_sub_grid



      subroutine exchg_sub_topo 
c
c---------------------------------------------------------------
c     Exchange kmt mask data over sub domains
c---------------------------------------------------------------
c
      use spflame_module
      use island_module
      implicit none
#ifndef no_mpp
# include "sub_mpif.h"
      integer,dimension(MPI_STATUS_SIZE)  :: Status
      integer ierr,tag,all_pes,nchild
      integer is,ie,js,je,len,ii,i,j,jj,ismooth,k,kk,n
      integer kmt_buf(is_zoom:ie_zoom,js_zoom:je_zoom)
#ifdef partial_cell
      real :: htp_buf(is_zoom:ie_zoom,js_zoom:je_zoom)
      integer :: passes = 0
#endif
      real, allocatable :: buf(:,:)
      real z,tol

c  see if there is anybody out there
      call mpi_comm_size(mpi_comm_world,all_pes,ierr)
      if (all_pes == n_pes) return
      tag=0
      if (mother>0 ) then
c
c      get the kmt mask from mother domain first
c
       if (my_pe==0) print*,' receiving kmt mask from mother domain'

       call index_send(is,ie,js,je)
       len=(ie_zoom-is_zoom+1)*(je_zoom-js_zoom+1)
       if (my_pe == 0) 
     &   call mpi_recv(kmt_buf,len,impiinteger,sub_domain_pe0(mother),
     &                 tag,mpi_comm_world,status,ierr)
       call mpi_bcast(kmt_buf,len,impiinteger,0,my_comm,ierr)

#ifdef partial_cell
       if (my_pe == 0)  then
        print*,' receiving also htp mask'
        call mpi_recv(htp_buf,len,impireal,sub_domain_pe0(mother),
     &                tag,mpi_comm_world,status,ierr)
       endif
       call mpi_bcast(htp_buf,len,impireal,0,my_comm,ierr)
#endif

       do j=js_zoom,je_zoom
        do ii=-zoom_fac/2,zoom_fac/2
         do jj=-zoom_fac/2,zoom_fac/2
          js=(j-js_zoom)*zoom_fac + jj+1
          do i=is_zoom,ie_zoom
           is=(i-is_zoom)*zoom_fac + ii+1
           if (is>=1.and.is<=imt.and.
     &            js>=1.and.js<=jmt ) kmt_big(is,js)=kmt_buf(i,j)
          enddo
         enddo
        enddo
       enddo

#ifdef partial_cell
       do j=js_zoom,je_zoom
        do ii=-zoom_fac/2,zoom_fac/2
         do jj=-zoom_fac/2,zoom_fac/2
          js=(j-js_zoom)*zoom_fac + jj+1
          do i=is_zoom,ie_zoom
           is=(i-is_zoom)*zoom_fac + ii+1
           if (is>=1.and.is<=imt.and.
     &            js>=1.and.js<=jmt ) htp(is,js)=htp_buf(i,j)
          enddo
         enddo
        enddo
       enddo
#endif

       if (zoom_fac_k>1) then
        do i=1,imt
         do j=1,jmt
          kmt_big(i,j)=max(0,kmt_big(i,j)*zoom_fac_k-1)
         enddo
        enddo
#ifdef partial_cell
        call halt_stop(
     & ' use of zoom_fac_k>1 not possible with partial cells'//
     &                        ' -> error in topog_exchg ')
#endif
       endif

#ifdef sub_use_ref_topo_in_sub_domain
#ifdef partial_cell
      call halt_stop(' cannot use other kmt mask with partial cells'//
     &               ' -> error in topog_exchg ')
#endif
       call add_other_topo
#endif
c
c      do smoothing X times, but do not produce
c      deeper cells than coarse grid boxes
c      preserve also topography at open boundaries
c
       if (my_pe==0) then
         print*,''
         print*,' Note: smoothing topography in sub domain #',
     &    sub_domain,' ',topo_smooth_iterations,' times '
         print*,''
       endif

       allocate(buf(imt,jmt) ); buf=0
       do n=1,topo_smooth_iterations
        ismooth=zoom_fac/2
        do j=1,jmt
          do i=1,imt
           is=0; buf(i,j)=0.
           do jj=-ismooth,ismooth
            do ii=-ismooth,ismooth
             if (ii+i>=1.and.ii+i<=imt.and.
     &          jj+j>=1.and.jj+j<=jmt ) then
#ifdef partial_cell
              buf(i,j)=buf(i,j)+htp(i+ii,j+jj)
#else
              z=0.
              if (kmt_big(i+ii,j+jj).ge.1) z=zt(kmt_big(i+ii,j+jj))
              buf(i,j)=buf(i,j)+z
#endif
              is=is+1
             endif
            enddo
           enddo
           buf(i,j)=buf(i,j)/max(is,1)
          enddo
        enddo

#ifdef partial_cell

        do i=1,imt
         do j=1,jmt
          if ((enable_obc_west .and.i<5)    .or.
     &        (enable_obc_east .and.i>imt-4).or.
     &        (enable_obc_south.and.j<5)    .or.
     &        (enable_obc_north.and.j>jmt-4) ) then
           htp(i,j)=min(htp(i,j),buf(i,j))
          else
           htp(i,j)=buf(i,j)
          endif
         enddo
        enddo
#else
        tol = 1. ! cm
        do i=1,imt
         do j=1,jmt
           kk=0
           do k=1,km
            if (buf(i,j)+tol>=zt(k) ) kk=k
           enddo
           if (kmt_big(i,j) > 0 ) then
             if ((enable_obc_west .and.i<5)    .or.
     &           (enable_obc_east .and.i>imt-4).or.
     &           (enable_obc_south.and.j<5)    .or.
     &           (enable_obc_north.and.j>jmt-4) ) then
              kmt_big(i,j)=min(kk,kmt_big(i,j))
             else
              kmt_big(i,j)=kk
             endif
           endif
         enddo
        enddo
#endif

       enddo

#ifdef partial_cell
c
c      adjust htp and kmt masks
c
        passes = 0
 10     kk=0
        do j=1,jmt
         do i=1,imt
          if (kmt_big(i,j)==0) then
           htp(i,j)=0.  ! preserve coastline
          else
           if (zw(kmt_big(i,j)) < htp(i,j).and.kmt_big(i,j)<km ) then
            kmt_big(i,j)=kmt_big(i,j)+1 ! increase depth level by one
            kk=1
           endif
           if (zw(kmt_big(i,j)) < htp(i,j).and.kmt_big(i,j)==km ) then
            htp(i,j)=zw(kmt_big(i,j)) ! max depth reached
           endif
           if (htp(i,j) < zw(kmt_big(i,j)-1).and.kmt_big(i,j)>4 ) then
            kmt_big(i,j)=kmt_big(i,j)-1 ! decrease depth level by one
            kk=1
           endif
           if (htp(i,j) < zw(kmt_big(i,j)-1).and.kmt_big(i,j)==4) then
            htp(i,j)=zw(kmt_big(i,j)) ! min depth reached
           endif
          endif
         enddo
        enddo
        passes = passes +1       
        if (kk==1) goto 10

        if (my_pe==0) 
     & print*,' number of passes while adjusting htp/kmt mask:',passes

#endif
c
c     apply condition on topography for o.b.
c
       if (enable_obc_south) then
        j=2
        do i=1,imt
         kmt_big(i,j-1)=kmt_big(i,j)
#ifdef partial_cell
         htp(i,j-1)=htp(i,j)
#endif
         k=min(min(kmt_big(i,j),kmt_big(i,j+1)),
     &         min(kmt_big(i,j),kmt_big(i,j+2)))
         if (k /= kmt_big(i,j)) then
          kmt_big(i,j-1:j+2)=k
#ifdef partial_cell
          do jj=j-1,j+2; htp(i,jj) = zw(k); enddo
#endif
         endif
        enddo
       endif
       if (enable_obc_north) then
        j=jmt-1
        do i=1,imt
         kmt_big(i,j+1)=kmt_big(i,j)
#ifdef partial_cell
         htp(i,j+1)=htp(i,j)
#endif
         k=min(min(kmt_big(i,j),kmt_big(i,j-1)),
     &         min(kmt_big(i,j),kmt_big(i,j-2)))
         if (k /= kmt_big(i,j)) then
          kmt_big(i,j-2:j+1)=k
#ifdef partial_cell
          do jj=j-2,j+1; htp(i,jj) = zw(k); enddo
#endif
         endif
        enddo
       endif
       if (enable_obc_west) then
        i=2
        do j=1,jmt
         kmt_big(i-1,j)=kmt_big(i,j)
#ifdef partial_cell
         htp(i-1,j)=htp(i,j)
#endif
         k=min(min(kmt_big(i,j),kmt_big(i+1,j)),
     &         min(kmt_big(i,j),kmt_big(i+2,j)))
         if (k /= kmt_big(i,j)) then
          kmt_big(i-1:i+2,j)=k
#ifdef partial_cell
          do ii=i-1,i+2; htp(ii,j) = zw(k); enddo
#endif
         endif
        enddo
       endif
       if (enable_obc_east) then
        i=imt-1
        do j=1,jmt
         kmt_big(i+1,j)=kmt_big(i,j)
#ifdef partial_cell
         htp(i+1,j)=htp(i,j)
#endif
         k=min(min(kmt_big(i,j),kmt_big(i-1,j)),
     &         min(kmt_big(i,j),kmt_big(i-2,j)))
         if (k /= kmt_big(i,j)) then
          kmt_big(i-2:i+1,j)=k
#ifdef partial_cell
          do ii=i-2,i+1; htp(ii,j) = zw(k); enddo
#endif
         endif
        enddo
       endif
c
c      a test for blue to mother domain
c
       z=1.
       do i=is_zoom+1,ie_zoom-1
         do j=js_zoom+1,je_zoom-1
           kk=0
           do ii=-zoom_fac/2,zoom_fac/2
            do jj=-zoom_fac/2,zoom_fac/2
              is=(i-is_zoom)*zoom_fac + ii+1
              js=(j-js_zoom)*zoom_fac + jj+1
              if (is>=1.and.is<=imt.and.
     &            js>=1.and.js<=jmt ) then
c               if ( kmt_big(is,js) >= kmt_buf(i,j) ) kk=1
               if ( (kmt_big(is,js)+zoom_fac_k-1)/zoom_fac_k 
     &               >= kmt_buf(i,j) ) kk=1
              endif
            enddo
           enddo
           if (kk==0)  then
            if (my_pe == 0) then
             print*,''
             print*,
     &    ' WARNING: found no boxes with enough depth in sub domain #',
     &       sub_domain
             print*,' for (i,j)=(',i,',',j,') of coarse grid'
             print*,' possible reason: topo_smooth_iterations too high'
             print*,' coarse topography: kmt(i,j)=',kmt_buf(i,j),
     &            ' = ',zt(kmt_buf(i,j)),' cm'
             is=(i-is_zoom)*zoom_fac -zoom_fac/2+1
             js=(j-js_zoom)*zoom_fac -zoom_fac/2+1
             ie=(i-is_zoom)*zoom_fac +zoom_fac/2+1
             je=(j-js_zoom)*zoom_fac +zoom_fac/2+1
             do jj=js,je
              print*,' fine topography (at xt=',xt(is),':',xt(ie),
     &           ' yt=',yt(jj),')'
              print*,'  kmt(',is,':',ie,',',jj,')=',kmt_big(is:ie,jj)
              do ii=is,ie
            print*,'  zt(kmt(',ii,',',jj,')=',zt(max(1,kmt_big(ii,jj)))
              enddo
              print*,' =',buf(is:ie,jj), ' cm (undiscretized)'
             enddo
             print*,''
            endif  
            z=0.
           endif
         enddo
       enddo
c       if (z==0.) call halt_stop(' in exchg_topog')
c
c      another  test for the open boundary conditions
c
       if (enable_obc_south) then
        do i=is_zoom+1,ie_zoom-1
         kk=0
         do ii=-zoom_fac/2,zoom_fac/2
           is=(i-is_zoom)*zoom_fac + ii+1
           if (is>=1.and.is<=imt) then
c            if ( kmt_big(is,2) > kmt_buf(i,js_zoom) ) kk=1
            if ( (kmt_big(is,2)+zoom_fac_k-1)/zoom_fac_k 
     &            > kmt_buf(i,js_zoom) ) kk=1
           endif
         enddo
         if (kk==1) then
          if (my_pe==0) print*,' inconsistent southern obc '
          call barrier
          call halt_stop(' in exchg_topog')
         endif
        enddo
       endif
       if (enable_obc_north) then
        do i=is_zoom+1,ie_zoom-1
         kk=0
         do ii=-zoom_fac/2,zoom_fac/2
           is=(i-is_zoom)*zoom_fac + ii+1
           if (is>=1.and.is<=imt) then
c            if ( kmt_big(is,jmt-1) > kmt_buf(i,je_zoom) ) kk=1
            if ( (kmt_big(is,jmt-1)+zoom_fac_k-1)/zoom_fac_k 
     &            > kmt_buf(i,je_zoom) ) kk=1
           endif
         enddo
         if (kk==1) then
          if (my_pe==0) print*,' inconsistent northern obc '
          call barrier
          call halt_stop(' in exchg_topog')
         endif
        enddo
       endif
       if (enable_obc_west) then
        do j=js_zoom+1,je_zoom-1
         kk=0
         do jj=-zoom_fac/2,zoom_fac/2
           js=(j-js_zoom)*zoom_fac + jj+1
           if (js>=1.and.js<=jmt) then
c            if ( kmt_big(2,js) > kmt_buf(is_zoom,j) ) kk=1
            if ( (kmt_big(2,js)+zoom_fac_k-1)/zoom_fac_k 
     &           > kmt_buf(is_zoom,j) ) kk=1
           endif
         enddo
         if (kk==1) then
          if (my_pe==0) print*,' inconsistent western obc '
          call barrier
          call halt_stop(' in exchg_topog')
         endif
        enddo
       endif
       if (enable_obc_east) then
        do j=js_zoom+1,je_zoom-1
         kk=0
         do jj=-zoom_fac/2,zoom_fac/2
           js=(j-js_zoom)*zoom_fac + jj+1
           if (js>=1.and.js<=jmt) then
c            if ( kmt_big(imt-1,js) > kmt_buf(ie_zoom,j) ) kk=1
            if ( (kmt_big(imt-1,js)+zoom_fac_k-1)/zoom_fac_k 
     &        > kmt_buf(ie_zoom,j) ) kk=1
           endif
         enddo
         if (kk==1) then
          if (my_pe==0) print*,' inconsistent eastern obc '
          call barrier
          call halt_stop(' in exchg_topog')
         endif
        enddo
       endif

       deallocate( buf)

       if (my_pe==0) print*,' done'

      endif  ! mother >0
c
c     now send kmt to child domains
c
      do nchild=1,nr_childs
       if (my_pe==0) print*,' sending kmt-mask to child domain #',nchild
       call index_get(is,ie,js,je,childs(nchild))
       if (my_pe == 0) then
        len=(ie-is+1)*(je-js+1)
        call mpi_send(kmt_big(is:ie,js:je),len,
     &                impiinteger,sub_domain_pe0(childs(nchild)),
     &                tag,mpi_comm_world,ierr)
#ifdef partial_cell
        print*,' sending also htp mask'
        call mpi_send(htp(is:ie,js:je),len,
     &                impireal,sub_domain_pe0(childs(nchild)),
     &                tag,mpi_comm_world,ierr)
#endif
       endif
       if (my_pe==0) print*,' done'
      enddo


#endif
      end subroutine exchg_sub_topo 





      subroutine visc_parameter_exchg(n_domain)
c
c---------------------------------------------------------------
c     Exchange viscous parameter over sub domains
c     if n_domain <0 then pe will expect data from mother
c     if n_domain >=0 then pe expect to send data to child n_domain
c---------------------------------------------------------------
c
      use spflame_module
      implicit none
      integer :: n_domain
#ifndef no_mpp
# include "sub_mpif.h"
 
      integer,dimension(MPI_STATUS_SIZE)  :: Status
      integer ierr,tag,all_pes
      integer is,ie,js,je,i,j,ii,jj
      logical biha
      real visc,dx,my_dx,my_am,scal,scal_length,scal_fac

c  see if there is anybody out there
      call mpi_comm_size(mpi_comm_world,all_pes,ierr)
      if (all_pes == n_pes) return

      tag=0
      if (n_domain>=0) then
       call index_get(is,ie,js,je,n_domain)
       if (my_pe == 0) then
        biha=.false.; visc=am
        if (enable_friction_biharmonic) then
         biha=.true.; visc=ambi
        endif
        dx=dxu(1)*csu(js+(je-js)/2)
        call mpi_send(biha,1,mpi_logical,sub_domain_pe0(n_domain),
     &                tag,mpi_comm_world,ierr)
        call mpi_send(visc,1,impireal,sub_domain_pe0(n_domain),
     &                tag,mpi_comm_world,ierr)
        call mpi_send(dx,1,impireal,sub_domain_pe0(n_domain),
     &                tag,mpi_comm_world,ierr)
       endif
      elseif (n_domain<0) then
       call index_send(is,ie,js,je)
       if (my_pe == 0) then
        call mpi_recv(biha,1,mpi_logical,sub_domain_pe0(mother),
     &                tag,mpi_comm_world,status,ierr)
        call mpi_recv(visc,1,impireal,sub_domain_pe0(mother),
     &                tag,mpi_comm_world,status,ierr)
        call mpi_recv(dx,1,impireal,sub_domain_pe0(mother),
     &                tag,mpi_comm_world,status,ierr)
       endif
       call mpi_bcast(biha,1,mpi_logical,0,my_comm,ierr)
       call mpi_bcast(visc,1,impireal,0,my_comm,ierr)
       call mpi_bcast(dx,1,impireal,0,my_comm,ierr)
c
c      scale near open boundaries the hor. harm. viscosity
c      so that the frictional time scale on the grid scale
c      of the mother domain becomes the same in the sub domain
c
       if (.not. enable_friction_harmonic) then
        print*,' need harmonic friction for sub domain #',sub_domain
        call halt_stop(' in visc_parameter_exchg')
       endif
       am=max(1e-5,am); my_dx=dxu(1)*csu(jmt/2)
       my_am=visc; if (biha) my_am=visc/dx**2
       if (my_pe==0) then
         print*,''
         print*,' Harmonic viscosity scaling near open boundaries:'
         if (biha) then
          print*,' biharmonic viscosity in mother domain = ',visc
          print*,' which corresponds to harmonic viscosity'
          print*,' on the grid scale of  mother domain   = ',my_am
         else
          print*,' harmonic viscosity in mother domain = ',my_am
         endif
         print*,''
         print*,' harmonic viscosity in sub domain    = ',am
         if (enable_friction_biharmonic) then
          print*,' biharmonic viscosity in sub domain    = ',ambi
         endif
         print*,''
         print*,' friction time scale (on grid of sub domain) [h]'
         print*,' harmonic in interior     : ',my_dx**2/am/60./60.
         print*,' harmonic near boundaries : ',my_dx**2/my_am/60./60.
         if (enable_friction_biharmonic) then
          print*,' biharmonic in interior   : ',my_dx**4/ambi/60./60.
         endif
         print*,''
       endif
       scal_length=0.25; scal_fac=1.0
#ifdef sub_wide_obc_friction_zone
       scal_length=2.0
#endif
#ifdef sub_narrow_obc_friction_zone
       scal_length=0.10
#endif
#ifdef sub_high_obc_friction_zone
       scal_fac=5.0
#endif
#ifdef sub_low_obc_friction_zone
       scal_fac=0.2
#endif
       my_am=my_am*scal_fac
       if (my_pe==0 ) then
         print*,' using a length scale for high harm. viscosity '
         print*,' near the open boundaries of ',scal_length,' deg'
         print*,''
         print*,' scaling the harm. viscosity near the open'
         print*,' boundaries also with ',scal_fac
         print*,' such that friction time scale near boundaries '
         print*,' becomes ',my_dx**2/my_am/60./60.,' h'
         print*,''
       endif
       do j=js_pe-2,je_pe+2
        do i=is_pe-2,ie_pe+2
          scal=1e4; ii=min(max(1,i),imt); jj=min(max(1,j),jmt)
          if (enable_obc_north) scal=min(scal,yt(jmt-4)-yt(jj))
          if (enable_obc_south) scal=min(scal,yt(jj)    -yt(5))
          if (enable_obc_west)  scal=min(scal,xt(ii)    -xt(5))
          if (enable_obc_east)  scal=min(scal,xt(imt-4)-xt(ii))
c       without cosine dependency
c          scal=0.5-atan(scal/scal_length)/pi
c          am_scale(i,j) = 1./am*(am*(1.-scal)+my_am*scal)
c       account for cosine dependency and other scaling in am_scale
          scal=(0.5-atan(scal/scal_length)/pi)
#ifdef sub_quadratic_obc_friction_zone
          scal=scal**2
#endif
          am_scale(i,j) = 
     &   (1.-scal)*am_scale(i,j)+my_am/am*csu(jj)/csu(jmt/2)*scal
        enddo
       enddo

      endif ! n_domain
#endif
      end subroutine visc_parameter_exchg




      subroutine add_other_topo
      use spflame_module
      implicit none
      integer io,i,j,k,imt2,jmt2,km2,is,ie,js,je
      integer,allocatable :: other_kmt(:,:)
      real,allocatable :: other_depth(:,:),depth(:,:)

      real, allocatable :: oxt(:),oyt(:),ozt(:),oxt2(:)
      real dummy,tol
      integer lenw,kk,indp
      real,allocatable :: work(:)


      if (my_pe==0) then
       print*,''
       print*,' Note: using other topography for sub domain #'
     &          ,sub_domain
      endif

#if defined C90_host || defined T3E_host
      call assign('assign -F f77 -N ieee f:sub_other_kmt.dta',i)
      call assign('assign -F f77 -N ieee f:sub_other_grid.dta',i)
#endif

      call get_free_iounit(io)
      open(io,file='sub_other_kmt.dta',form='unformatted')
      read (io) ! no checks
      read (io) 
      read (io) imt2, jmt2, km2
      read (io) 
      allocate( other_kmt(imt2,jmt2),other_depth(imt2,jmt2) )
      read (io) other_kmt
      close (io)

      allocate( oxt(imt2),oyt(jmt2),ozt(km2),oxt2(imt2) )

      call get_free_iounit (io)
      open (io,file='sub_other_grid.dta',form='unformatted')
      read (io) ! no checks
      read (io) imt2, jmt2, km2
      read (io) (dummy,i=1,imt2)
     &,         (dummy,j=1,jmt2)
     &,         (dummy,i=1,imt2)
     &,         (dummy,j=1,jmt2)
     &,         (dummy,k=1,km2)
     &,         (dummy,k=0,km2)
     &,         (oxt(i),i=1,imt2)
     &,         (dummy,i=1,imt2)
     &,         (oyt(j),j=1,jmt2)
     &,         (dummy,j=1,jmt2)
     &,         (ozt(k),k=1,km2)
     &,         (dummy,k=1,km2)
      close (io)


      do i=1,imt2
       do j=1,jmt2
        if (other_kmt(i,j)>0) then
         other_depth(i,j)=ozt(other_kmt(i,j))
        else
         other_depth(i,j)=0.0
        endif
       enddo
      enddo

      call tranlon (other_depth,imt2, imt2-2, jmt2, 
     &              oxt, xt(2), imt-2, oxt2)

      lenw=10*imt2
      allocate(depth(imt,jmt),work(lenw))

      if (oxt2(2)-oxt2(1) >= xt(2)-xt(1) ) then
       if (my_pe==0) print*,' assuming coarser grid of ref. topogr.'
       call ctf (other_depth, imt2, jmt2, oxt2, oyt, 
     &           depth, imt, jmt, 2, imt-1, 1, jmt,
     &           xt, yt, 1, work, lenw)
      else
       if (my_pe==0) print*,' assuming finer grid of ref. topogr.'
       call ftc (other_depth, imt2, jmt2, oxt2, oyt, 
     &           depth, imt, jmt, 2, imt-1, 1, jmt,
     &           xt, yt, 1, work, lenw)
      endif
      tol = 1. ! cm
      is=5; ie=imt-4; js=5; je=jmt-4
      do i=is,ie
        do j=js,je
           kk=0
           do k=1,km
            if (depth(i,j)+tol>=zt(k) ) kk=k
           enddo
           if (kmt_big(i,j) > 0 ) then
            kmt_big(i,j)=max(4,kk)
c            kmt_big(i,j) =max(kmt_big(i,j),kk)
           endif
        enddo
      enddo

#ifdef sub_hand_tuning_topo_in_sub_domain
c     apply hand tuning of topography here

c      i=60,61,64
c      j=71,72,75
      kmt_big(60,71)=21
      kmt_big(61,72)=21
      kmt_big(64,75)=21

c      i=62,63
c      j=70,71
      kmt_big(62,70)=21
      kmt_big(63,71)=21
c      i=65,66,67
c      j=73,74,75
      kmt_big(65,73)=21
      kmt_big(66,74)=21
      kmt_big(67,75)=21

#endif

      deallocate(depth,work)
      deallocate(other_kmt,other_depth,oxt,oyt,ozt,oxt2)

      end subroutine add_other_topo




      subroutine extrapolate_hor(d,k,spval,grid,klevel)
      use spflame_module
      implicit none
      integer :: k       ! number of grids points overlapping in other PE domains
      integer :: klevel  ! the vertical level of this slice
      real    :: spval   ! special value in data
      character (len=1) :: grid ! which grid: can be U or T
      real    ::    d(is_pe-k:ie_pe+k,js_pe-k:je_pe+k) ! data to be extrapolated

      integer :: mask(is_pe-k:ie_pe+k,js_pe-k:je_pe+k)  
      integer :: isum(is_pe  :ie_pe  ,js_pe  :je_pe  )
      real    ::   dm(is_pe  :ie_pe  ,js_pe  :je_pe  )
      integer :: i,j,ii,jj
      integer :: pass 
      integer, parameter :: maxpass = 100
      real :: clean

      pass = 1
 10   continue

c
c     construct a mask
c
      mask =1
      where( d == spval ) mask = 0
c
c     number of valid points in the surrounding
c
      isum=0
      do ii=-1,1
       do jj=-1,1
        do j=js_pe,je_pe
         do i=is_pe,ie_pe
           isum(i,j)=isum(i,j)+mask(i+ii,j+jj)
         enddo
        enddo
       enddo
      enddo
c
c     mean of valid data in the surrounding
c
      dm=0.
      do ii=-1,1
       do jj=-1,1
        do j=js_pe,je_pe
         do i=is_pe,ie_pe
           dm(i,j)=dm(i,j)+d(i+ii,j+jj)*mask(i+ii,j+jj)
         enddo
        enddo
       enddo
      enddo

      where( isum == 0 ) 
        dm = spval 
      elsewhere 
        dm = dm / isum 
      end where
c
c     set missing data to mean value of surrounding
c
      do j=js_pe,je_pe
       do i=is_pe,ie_pe
           d(i,j)=d(i,j)*mask(i,j) + (1.-mask(i,j))*dm(i,j)
       enddo
      enddo
c
c     check for missing values
c
      clean = 0.
      if     (grid == 'U' .or. grid == 'u') then
       do j=js_pe,je_pe
        do i=is_pe,ie_pe
         if (umask(i,klevel,j) == 1. .and. d(i,j) == spval) then
          clean = 1.
         endif
        enddo
       enddo
      elseif (grid == 'T' .or. grid == 't') then
       do j=js_pe,je_pe
        do i=is_pe,ie_pe
         if (tmask(i,klevel,j) == 1. .and. d(i,j) == spval) then
          clean = 1.
         endif
        enddo
       enddo
      else
       if (my_pe == 0) print*,' cannot recognize type of grid '
       call halt_stop(' in extrapolate_hor ')
      endif

      call global_max(clean)

      if (clean > 0.) then
        call border_exchg(d,1,k)
        pass = pass+1
        if (pass > maxpass ) then
          if (my_pe==0) print*,' WARNING:'
          if (my_pe==0) print*,' number of maximal passes exceeded in'
          if (my_pe==0) print*,' extrapolate_hor'
        else
         goto 10
        endif
      endif

c      if (my_pe==0) print*,' number of passes in extrapol_hor : ',pass

      end subroutine extrapolate_hor



      subroutine smooth_hor(d,k,grid,klevel)
      use spflame_module
      implicit none
      integer :: k       ! number of grids points overlapping in other PE domains
                         ! which is also the smoothing length in grid points
      integer :: klevel  ! the vertical level of this slice
      character (len=1) :: grid ! which grid: can be U or T
      real    ::    d(is_pe-k:ie_pe+k,js_pe-k:je_pe+k) ! data to be extrapolated

      integer :: isum(is_pe  :ie_pe  ,js_pe  :je_pe  )
      real    ::   dm(is_pe  :ie_pe  ,js_pe  :je_pe  )
      integer :: i,j,ii,jj


      if (grid == 'U' .or. grid == 'u') then

       if ( ( lbound( umask, 1 ) >  is_pe-k ) .or.
     &      ( ubound( umask, 1 ) <  ie_pe+k ) ) then
        if (my_pe==0) print*,
     &   ' umask array in spflame_module is too small ',
     &   ' for smoothing operation of ',k,' grid points '
         call halt_stop(' in smooth_hor ')
       endif
c
c      number of valid points in the surrounding
c
       isum=0
       do ii=-k,k
        do jj=-k,k
         do j=js_pe,je_pe
          do i=is_pe,ie_pe
            isum(i,j)=isum(i,j)+umask(i+ii,klevel,j+jj)
          enddo
         enddo
        enddo
       enddo
c
c      mean of valid data in the surrounding
c
       dm=0.
       do ii=-k,k
        do jj=-k,k
         do j=js_pe,je_pe
          do i=is_pe,ie_pe
            dm(i,j)=dm(i,j)+d(i+ii,j+jj)*umask(i+ii,klevel,j+jj)
          enddo
         enddo
        enddo
       enddo

      elseif (grid == 'T' .or. grid == 't') then

       if ( ( lbound( tmask, 1 ) >  is_pe-k ) .or.
     &      ( ubound( tmask, 1 ) <  ie_pe+k ) ) then
        if (my_pe==0) print*,
     &   ' tmask array in spflame_module is too small ',
     &   ' for smoothing operation of ',k,' grid points '
         call halt_stop(' in smooth_hor ')
       endif
c
c      number of valid points in the surrounding
c
       isum=0
       do ii=-k,k
        do jj=-k,k
         do j=js_pe,je_pe
          do i=is_pe,ie_pe
            isum(i,j)=isum(i,j)+tmask(i+ii,klevel,j+jj)
          enddo
         enddo
        enddo
       enddo
c
c      mean of valid data in the surrounding
c
       dm=0.
       do ii=-k,k
        do jj=-k,k
         do j=js_pe,je_pe
          do i=is_pe,ie_pe
            dm(i,j)=dm(i,j)+d(i+ii,j+jj)*tmask(i+ii,klevel,j+jj)
          enddo
         enddo
        enddo
       enddo

      else
       if (my_pe == 0) print*,' cannot recognize type of grid '
       call halt_stop(' in smooth_hor ')
      endif

      where( isum /= 0 )  dm = dm/isum
c      elsewhere dm=0.
c      end where
c
c     set missing data to mean value of surrounding
c
      do j=js_pe,je_pe
       do i=is_pe,ie_pe
         d(i,j)=dm(i,j)
       enddo
      enddo

      end subroutine smooth_hor



      subroutine smooth_merid(d,m,i,kk)
      use spflame_module
      implicit none
      integer :: m  ! number of grid points to be smoothed
      integer :: i  ! zonal index of section
      integer :: kk ! vertical extend of section
      real :: d(js_pe:je_pe,kk) ! data to be smoothed

      real :: b(is_pe-m:ie_pe+m,js_pe-m:je_pe+m)
      integer :: j,jj,k,isum(js_pe:je_pe)

      if ( ( lbound( tmask, 1 ) >  is_pe-m ) .or.
     &      ( ubound( tmask, 1 ) <  ie_pe+m ) ) then
        if (my_pe==0) print*,
     &   ' tmask array in spflame_module is too small ',
     &   ' for smoothing operation of ',m,' grid points '
         call halt_stop(' in smooth_merid ')
      endif

      do k=1,kk
       b=0.
       if (i>=is_pe .and. i<=ie_pe)
     &     b(i,js_pe:je_pe)= d(js_pe:je_pe,k)

       call border_exchg(b,1,m)

       if (i>=is_pe .and. i<=ie_pe) then
        d(:,k)=0.
        isum=0
        do jj=-m,m
         do j=js_pe,je_pe
          d(j,k) = d(j,k)+b(i,j+jj)*tmask(i,k,j+jj)
          isum(j) = isum(j)+tmask(i,k,j+jj)
         enddo
        enddo
        where( isum /=0 ) d(:,k)=d(:,k)/isum
       endif

      enddo

      end subroutine smooth_merid



      subroutine smooth_zonal(d,m,j,kk)
      use spflame_module
      implicit none
      integer :: m  ! number of grid points to be smoothed
      integer :: j  ! meridional index of section
      integer :: kk ! vertical extend of section
      real :: d(is_pe:ie_pe,kk) ! data to be smoothed

      real :: b(is_pe-m:ie_pe+m,js_pe-m:je_pe+m)
      integer :: i,ii,k,isum(is_pe:ie_pe)

      if ( ( lbound( tmask, 1 ) >  is_pe-m ) .or.
     &      ( ubound( tmask, 1 ) <  ie_pe+m ) ) then
        if (my_pe==0) print*,
     &   ' tmask array in spflame_module is too small ',
     &   ' for smoothing operation of ',m,' grid points '
         call halt_stop(' in smooth_zonal ')
      endif

      do k=1,kk
       b=0.
       if (j>=js_pe .and. j<=je_pe)
     &     b(is_pe:ie_pe,j)= d(is_pe:ie_pe,k)

       call border_exchg(b,1,m)

       if (j>=js_pe .and. j<=je_pe) then
        d(:,k)=0.
        isum=0
        do ii=-m,m
         do i=is_pe,ie_pe
          d(i,k) = d(i,k)+b(i+ii,j)*tmask(i+ii,k,j)
          isum(i) = isum(i)+tmask(i+ii,k,j)
         enddo
        enddo
        where( isum /=0 ) d(:,k)=d(:,k)/isum
       endif

      enddo

      end subroutine smooth_zonal



      subroutine gcell_sub_domain(maxlen, n_bounds, bounds, 
     &                                d_bounds,num, deltat, deltau)
c
c=======================================================================
c     (changed version of gcell for grid construction for
c      sub domains )
c=======================================================================
c
      implicit none
      integer maxlen,n_bounds,num
      real deltat(maxlen), deltau(maxlen)
      real d_bounds(n_bounds), bounds(n_bounds)

      real avg_res, wid, del
      integer l,n,i,ierr,m
c
c     Do all regions, one at a time, to construct the domain
c
      num  = 1
      do l=1,n_bounds-1
c       avg_res = average resolution of T cells within region
c       wid     = width of region
        avg_res = .5*(d_bounds(l) + d_bounds(l+1))
        wid = abs(bounds(l+1) - bounds(l))
        n   = nint(wid/avg_res)
        del=wid/n
        do i=1,n
          deltau(num+i-1) = del
        enddo 
        num = num + n
      enddo
c
c     adjust "num" to reflect the total number of cells contained in
c     all regions
c
      num = num - 1
c
      do i=1,num
c
c       build resolution for T cells: "deltat". Note that
c       variable resolution (stretched grid) implies T points are
c       off center
c
        if (i .eq. 1) then
          deltat(i) = 0.5*(d_bounds(1) + deltau(i))
        else
          deltat(i) = 0.5*(deltau(i) + deltau(i-1))
        endif
      enddo     
      end subroutine gcell_sub_domain




