#include "options.inc"

c
c=======================================================================
c      solve:
c             A * dpsi = forc
c     SPFLAME version:   c.eden
c=======================================================================
c

      module congrad_module
      implicit none

      logical, allocatable :: imask(:)
      integer, allocatable :: iperm(:),jperm(:), iofs(:)
      integer, allocatable :: nippts(:),nippts_all(:)
      real, allocatable :: Zresisle(:),Zinv_isle(:),sum_isle(:)

      integer mnisle,nisle,maxipp
      integer imain,imain_pe

      end module congrad_module

      subroutine init_congrad()
      use spflame_module
      use congrad_module
      use island_module
      implicit none

      integer, allocatable :: iperm_all(:),jperm_all(:), iofs_all(:)
      logical, allocatable :: imask_all(:)
      integer map_big(imt,jmt),nn,isle,n,i,j

      if (my_pe==0) then
       print*,' Initialisation of Poisson solver, looking for islands'
      endif

      mnisle=500    ! max. number of islands
      maxipp=40000 ! max. number of island perimeter points

      allocate(imask_all(-mnisle:mnisle) )
      imask_all=.false.
      allocate(iperm_all(maxipp) )
      iperm_all=0
      allocate(jperm_all(maxipp) )
      jperm_all=0
      allocate(iofs_all(mnisle) )
      iofs_all=0
      allocate(nippts_all(mnisle) )
      nippts_all=0

      call isleperim (kmt_big, map_big, 
     &    iperm_all, jperm_all, iofs_all, nippts_all, nisle,
     &    imt, jmt,  mnisle, maxipp,my_pe,cyclic,
     &    enable_obc_north,enable_obc_south,
     &    enable_obc_west,enable_obc_east,.false.)

      if (my_pe==0 .and. enable_show_island_map) then
         call showmap(map_big,imt,jmt)
      endif
      
      map(is_pe:ie_pe,js_pe:je_pe)=map_big(is_pe:ie_pe,js_pe:je_pe)

      maxipp=nippts_all(1)
      do isle=2,nisle
       maxipp=maxipp+nippts_all(isle)
      enddo  

      if (my_pe==0) print*,' overall island perimeter points ',maxipp

      allocate(Zresisle(nisle))
      Zresisle=0.
      allocate(Zinv_isle(nisle))
      Zinv_isle=0.
      allocate(sum_isle(nisle))
      sum_isle=0.

      allocate(imask(-nisle:nisle) )
      imask=.false.
      allocate(iperm(maxipp) )
      iperm=0
      allocate(jperm(maxipp) )
      jperm=0
      allocate(iofs(nisle) )
      iofs=0
      allocate(nippts(nisle) )
      nippts=0
c
c     set mask for land mass perimeters on which to perform calculations
c     imask(-n) = .false.  [no equations ever on dry land mass n]
c     imask(0)  = .true.   [equations at all mid ocean points]
c     imask(n)  = .true./.false [controls whether there will be
c                                equations on the ocean perimeter of
c                                land mass n]
c
      do isle=-nisle,nisle
        if (isle .ge. 0 .and. isle .le. nisle) then
          imask(isle) = .true.
        else
          imask(isle) = .false.
        end if
      end do
c
c     user-specified changes to island mask
c       imask(1) = .true.
c       imask(2) = .true.
c
c     there are problems if imask is set .true. for a nonexistent
c     island.
c
c    Get the island perimeter points on the actual PE
c
      nn = 0
      do isle=1,nisle
        iofs(isle) = nn
        do n=1,nippts_all(isle)
          i = iperm_all(iofs_all(isle)+n)
          j = jperm_all(iofs_all(isle)+n)
          if(i.ge.is_pe.and.i.le.ie_pe.and.
     +       j.ge.js_pe.and.j.le.je_pe) then
            nn = nn+1
            iperm(nn) = i
            jperm(nn) = j
          endif
        end do
        nippts(isle) = nn - iofs(isle)
      end do

c     # of landmass to fix drifting streamfunction
      imain=1

c     determine on which PE this points is
      j = ( jperm_all(iofs(imain)+1) - 2 ) / j_blk + 1 
      i = ( iperm_all(iofs(imain)+1) - 2 ) / i_blk + 1
      imain_pe = ( j - 1 ) * n_pes_i + i - 1

      if (my_pe==0) then
        print*,''
        print*,' fix streamfunction to zero on landmass ',imain
        print*,' which is on PE #',imain_pe
        print*,''
      endif

      deallocate(imask_all, iperm_all, jperm_all, iofs_all )

      if (my_pe==0) print*,' done'

      end subroutine init_congrad


      subroutine congr  (dpsi, res, forc, cf, epsilon,islands)
      use spflame_module
      implicit none
c=======================================================================
c                            C O N G R A D
c      solve:
c             A * dpsi = forc
c      author:      Charles Goldberg        e-mail: chg@gfdl.gov
c
c=======================================================================

      real, dimension(is_pe:ie_pe,js_pe:je_pe,-1:1,-1:1) :: cf
      real, dimension(is_pe:ie_pe,js_pe:je_pe) :: forc

      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: dpsi
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: Z,Zres,As
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: s
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: res
      real :: epsilon
      logical :: islands
      real dot2,absmax
      external dot2,absmax
      integer i,j,k
      real Zresmax, estimated_error
      real betakm1,betak,betak_min,alpha,betaquot
      logical diverged,converged
      integer key
      real smax,step,step1,cfactor
      real convergence_rate,s_dot_as
      integer is,ie,js,je
c
      is=max(is_pe,2); ie=min(ie_pe,imt-1)
      js=max(2,js_pe); je=min(je_pe,jmt-1)
 

      Z=0. ; Zres=0.; As=0.; s=0.      

c-----------------------------------------------------------------------
c     impose boundary conditions on guess
c     dpsi(0) = guess
c-----------------------------------------------------------------------
c
      call set_cyclic(dpsi,1,1)
c
c-----------------------------------------------------------------------
c     make approximate inverse operator Z (always even symmetry)
c-----------------------------------------------------------------------
c
      call make_inv (cf, Z, islands)
      call set_cyclic(Z,1,1)
c
c-----------------------------------------------------------------------
c     res(0)  = forc - A * dpsi(0)
c     impose cyclic and/or symmetry conditions on res(i,j)
c-----------------------------------------------------------------------
c

      call op9_vec(cf, dpsi, res)
      do j=js,je
        do i=is,ie
          res(i,j) = forc(i,j) - res(i,j)
        end do
      end do
      call set_cyclic(res,1,1)
c
c-----------------------------------------------------------------------
c     Zres(k-1) = Z * res(k-1)
c     see if guess is a solution, bail out to avoid division by zero
c-----------------------------------------------------------------------
c
      k = 0
      diverged=.false.

      call inv_op(1, Z, res, Zres, islands)


c
c     set borders of Zres using cyclic/symmetry, if defined.
c
      call set_cyclic(Zres,1,1)
c
      Zresmax = absmax(Zres)
c
c     Assume convergence rate of 0.99 to extrapolate error
c
      if (100.0 * Zresmax .lt. epsilon) then
	  estimated_error = 100.0 * Zresmax 
	  goto 101
      endif
c
c-----------------------------------------------------------------------
c     beta(0) = 1
c     s(0)    = zerovector()
c-----------------------------------------------------------------------
c
      betakm1 = 1.0
      s=0.
c
c-----------------------------------------------------------------------
c     begin iteration loop
c-----------------------------------------------------------------------
c
      do k = 1,max_itts_solver
c
c-----------------------------------------------------------------------
c       Zres(k-1) = Z * res(k-1)
c-----------------------------------------------------------------------
c
        key = 2;if(k==1) key=1
        call inv_op(key, Z, res, Zres, islands)


c        call set_cyclic(Zres,1,1)

c       set borders of Zres using cyclic/symmetry, if defined.
!kk     in case of T3E, perm_border is called within inv_op
c
c-----------------------------------------------------------------------
c       beta(k)   = res(k-1) * Zres(k-1)
c-----------------------------------------------------------------------
c
        betak = dot2(Zres, res)

c        if (my_pe==0) print*,'k=',k,betak,betakm1,betak/betakm1


        if (k .eq. 1) then
          betak_min = abs(betak)
	elseif (k .gt. 2) then
          betak_min = min(betak_min, abs(betak))
          if (abs(betak) .gt. 100.0*betak_min) then
	    write (6,'(/a/a/)')
     &      'WARNING: conjugate gradient terminated because correction'
     &,     '         steps are diverging. Probable cause...roundoff'
            smax = absmax(s)
            step = abs(alpha) * smax
	    estimated_error=step*convergence_rate/(1.0-convergence_rate)
            diverged=.true.
	    go to 101
	  endif
	endif
c
c-----------------------------------------------------------------------
c       s(k)      = Zres(k-1) + (beta(k)/beta(k-1)) * s(k-1)
c-----------------------------------------------------------------------
c
        betaquot = betak/betakm1
        do j=js,je
          do i=is,ie
            s(i,j) = Zres(i,j) + betaquot * s(i,j)
          end do
        end do
        call set_cyclic(s,1,1)
*
****    Distribute the borders of the domain of the actaul PE
****    to it`s neighbors
*
        call border_exchg(s,1,1)
c
c-----------------------------------------------------------------------
c       As(k)     = A * s(k)
c-----------------------------------------------------------------------
c
        call op9_vec(cf, s, As)
        call set_cyclic(As,1,1)
c
c-----------------------------------------------------------------------
c       If s=0 then the division for alpha(k) gives a float exception.
c       Assume convergence rate of 0.99 to extrapolate error.
c       Also assume alpha(k) ~ 1.
c-----------------------------------------------------------------------
c

        s_dot_As = dot2(s, As)

        if (abs(s_dot_As) .lt. abs(betak)*1.e-10) then
          smax = absmax(s)
	  estimated_error = 100.0 * smax 
	  goto 101
	endif
c
c-----------------------------------------------------------------------
c       alpha(k)  = beta(k) / (s(k) * As(k))
c-----------------------------------------------------------------------
c
        alpha = betak / s_dot_As
c
c-----------------------------------------------------------------------
c       update values:
c       dpsi(k)   = dpsi(k-1) + alpha(k) * s(k)
c       res(k)    = res(k-1) - alpha(k) * As(k)
c-----------------------------------------------------------------------
c
        do j=js_pe,je_pe
          do i=is_pe,ie_pe
            dpsi (i,j) = dpsi(i,j) + alpha * s(i,j)
            res  (i,j) = res (i,j) - alpha * As(i,j)
          end do
        end do

        if (islands) call avg_dist (res)
c        call set_cyclic(res,1,1)
c
        smax = absmax(s)
c        print*,' smax=',smax
c
c-----------------------------------------------------------------------
c       test for convergence
c       if (estimated_error) < epsilon) exit
c-----------------------------------------------------------------------
c
        step = abs(alpha) * smax
        if (k .eq. 1) then
          step1 = step
          estimated_error = step
          if (step .lt. epsilon) goto 101
        else if (step .lt. epsilon) then
          cfactor = log(step/step1)
          convergence_rate = exp(cfactor/(k-1))
	  estimated_error = step*convergence_rate/(1.0-convergence_rate)
          if (estimated_error .lt. epsilon) goto 101
        end if
c
        betakm1 = betak
c
      end do
c
c-----------------------------------------------------------------------
c     end of iteration loop
c-----------------------------------------------------------------------
c
  101 continue
      if ((k .gt. max_itts_solver).or.(diverged)) then
        cfactor = log(step/step1)
        convergence_rate = exp(cfactor/(k-1))
	estimated_error = step*convergence_rate/(1.0-convergence_rate)
        converged = .false.
        if (my_pe==0) then
          print*,' Poisson solver is not converged in sub domain # ',
     &             sub_domain
          print*,' after ',k,' iterations (max_itts_solver=',
     &             max_itts_solver,' )'
          print*,' estimated error= ',estimated_error,
     &             '( epsilon=',epsilon,' )'
        endif
        call  halt_stop(' solver not converged ')
      else
        converged = .true.
      end if
      itts_solver = k
c
c-----------------------------------------------------------------------
c     return the last increment of dpsi in the argument res
c-----------------------------------------------------------------------
c
      if (itts_solver .eq. 0) then
        do j=js_pe,je_pe
          do i=is_pe,ie_pe
            res(i,j) = Zres(i,j)
          end do
        end do
      else
        do j=js_pe,je_pe
          do i=is_pe,ie_pe
            res(i,j) = alpha * s(i,j)
          end do
        end do
      endif
      if (islands) call remove_drift(dpsi)

      end subroutine congr

      subroutine remove_drift(pp)
      use spflame_module
      use congrad_module
      implicit none
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: pp
      integer i,j
      real dpsi1
      if (my_pe==imain_pe) 
     &    dpsi1 = pp(iperm(iofs(imain)+1),jperm(iofs(imain)+1))
      call bcast_real(dpsi1,1,imain_pe)
      do j=js_pe,je_pe
          do i=is_pe,ie_pe
           if (map(i,j) .le. 0) then
             pp(i,j)=pp(i,j)-dpsi1
           endif
          enddo
      enddo
      end subroutine remove_drift

      function absmax(f)
      use spflame_module
      implicit none
      integer i,j
      real amax,absmax
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: f

      amax = 0.0
      do i=is_pe,ie_pe
       do j=js_pe,je_pe
        amax = max(amax, abs(f(i,j)))
       end do
      end do
      call global_max(amax)
      absmax = amax
      end function absmax


      function dot2 (dp_vec, res_vec)
      use spflame_module
      implicit none
c
c     this dot product produces the correct answers because for
c     ocean perimeter subscripts, ij=isle, the value on a
c     type(dpsi_type) vector, dp_vec(isle)=dp_vec(i,j), i.e., the true
c     value is replicated, and for a type(res_type) vector,
c     res_vec(isle) = sum (res_vec(i,j)), i.e., the true value is the
c     accumulation of the distributed values.
c
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: dp_vec,res_vec
      real sum,dot2
      integer i,j
      integer is,ie,js,je
c
      is=max(is_pe,2); ie=min(ie_pe,imt-1)
      js=max(2,js_pe); je=min(je_pe,jmt-1)

      sum = 0.
      do j=js,je
        do i=is,ie
          sum = sum + dp_vec(i,j) * res_vec(i,j)
        enddo
      enddo
      call global_sum(sum)
      dot2 = sum
      end function dot2



      subroutine op9_vec(cf, dpsi, ores)
      use spflame_module
      implicit none
c                       res = A * dpsi
c
c     this subroutine does not collect the terms of the true value
c     of res(isle) = sum (res(i,j)).  the contributions to the sum
c     remain distributed among the T cells (i,j) that form the
c     ocean perimeter of land_mass(isle).
c
c     at present, borders are not computed [i=1 or imt] [j=1 or jmt]
c
      real, dimension(is_pe:ie_pe,js_pe:je_pe,-1:1,-1:1) :: cf
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: dpsi,ores
      integer i,j
      integer is,ie,js,je
c
      is=max(is_pe,2); ie=min(ie_pe,imt-1)
      js=max(2,js_pe); je=min(je_pe,jmt-1)

      do j=js,je
        do i=is,ie
          ores(i,j) =
     &               cf(i,j,-1,-1) * dpsi(i-1,j-1) +
     &               cf(i,j, 0,-1) * dpsi(i  ,j-1) +
     &               cf(i,j, 1,-1) * dpsi(i+1,j-1)
        end do
        do i=is,ie
          ores(i,j) = ores(i,j) +
     &               cf(i,j,-1, 0) * dpsi(i-1,j  ) +
     &               cf(i,j, 0, 0) * dpsi(i  ,j  ) +
     &               cf(i,j, 1, 0) * dpsi(i+1,j  )
        end do
        do i=is,ie
          ores(i,j) = ores(i,j) +
     &               cf(i,j,-1, 1) * dpsi(i-1,j+1) +
     &               cf(i,j, 0, 1) * dpsi(i  ,j+1) +
     &               cf(i,j, 1, 1) * dpsi(i+1,j+1)
        end do
      end do
      end subroutine op9_vec


      subroutine inv_op(key, Z, ores, Zres, islands)
      use spflame_module
      use congrad_module
      implicit none
      integer key,i,j
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: Z,ores,Zres
      logical :: islands
c
c     apply and approximate inverse Z or the operator A
c
c     res is type(res_type), i.e., perimeter values res(isle)
c         are the sum of the distributed contributions res(i,j)
c     Zres is type(dpsi_type), i.e., perimeter values Zres(isle)
c         must be replicated at each perimeter point Zres(i,j)
c
c     borders  of Zres [i=1 or imt] [j=1 or jmt] must be defined
c     and must satisfy cyclic and/or symmetry, if defined.
c
c     currently, Z is diagonal:  Z(ij) = 1/A(ij)
c     and is stored in type(dpsi_type) format, i.e., Z(isle) is
c     replicated and stored in each Z(i,j) in the perimeter of
c     land_mass(isle).
c
      do j=js_pe,je_pe
        do i=is_pe,ie_pe
          Zres(i,j) = Z(i,j) * ores(i,j)
        end do
      end do
c
c     sum contributions to Zres(isle)
c     distribute Zres(isle) to all perimeter points
      if (islands) call sum_dist (key, Zres)
c
      end subroutine  inv_op


      subroutine make_inv (cf, Z, islands)
      use spflame_module
      use congrad_module
      implicit none
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: Z
      real, dimension(is_pe:ie_pe,js_pe:je_pe,-1:1,-1:1) :: cf
      integer i,j,isle,n
      integer is,ie,js,je
      logical :: islands
c
      is=max(is_pe,1); ie=min(ie_pe,imt)
      js=max(1,js_pe); je=min(je_pe,jmt)
c
c     construct an approximate inverse Z to A
c
c     Z will be diagonal:  Z(ij) = 1/A(ij)
c     and values for ocean perimeter entries Z(isle) will be replicated
c     at all T cells Z(i,j) in the ocean perimeter of land_mass(isle).
c
c     T cells (i,j) for which there is no diagonal coefficient
c     i.e., A(ij)=A(i,j)=0, are masked off by assigning Z(i,j)=0.
c     there are effectively no equations and no variables dpsi(i,j)
c     at these points.
c
c     copy diagonal coefficients of A to Z

      do j=js,je
        do i=is,ie
          Z(i,j) = cf(i,j,0,0)
        end do
      end do
c
c     for each land_mass(isle),
c     sum the contributions to cfdiag(isle)=A(isle,isle)
c     now stored in Z(i,j) at ocean perimeter T cells and replicate
c     the sum in all Z(i,j) for which (i,j) is in ocean perimeter
c     of land_mass(isle).
c
      if (islands) call sum_dist (1, Z)
c
c     now invert Z
c
      do j=js,je
        do i=is,ie
          if (Z(i,j) /= 0.0) then
            Z(i,j) = 1./Z(i,j)
          else
            Z(i,j) = 0.0
          end if
        end do
      end do
c
c     make inverse zero on island perimeters that are not integrated
c
      if (islands) then
       do isle=1,nisle
        if (.not. imask(isle)) then
          do n=1,nippts(isle)
            i = iperm(iofs(isle)+n)
            j = jperm(iofs(isle)+n)
            if(j >= js_pe-1 .and. j <= je_pe+1)   then
               Z(i,j) = 0.0
            end if
          end do
        end if
        if(Zresisle(isle) /= 0.0)  then
           Zinv_isle(isle) = 1./Zresisle(isle)
        end if
       end do
      endif
      end subroutine make_inv





      subroutine sum_dist (key,Zres)
      use spflame_module
      use congrad_module
      implicit none
      integer key,isle,n,i,j
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: Zres
c
c     sum contributions to Zres(isle)
c     distribute Zres(isle) to all perimeter points
c
c     this subroutine converts a type(res_type) vector with
c     distributed contributions to perimeter values
c        Zres(isle) = sum (Zres(i,j))
c     into a type (dpsi_type) vector with replicated values
c     for land_mass perimeters
c        Zres(isle) = Zres(i,j)
c     for all (i,j) in the ocean perimeter of land_mass(isle).
c
c     Ketelsens work is to prevent the communication 
c     of the island perimeter values of Zres during the iteration
c     However, I do not understand this code. Do you ?
c
c       if key == 1:
c           Islands sums over Zres are computed on PE domain
c           a sum over all PEs computed and
c           Zres is set to thesee sums on the islands perimeter points
c           the sum over 1 (sum_isle) is also computed and saved here for later use.
c
c       if key != 2:
c           Zres(isle) is multiplied with sum_isle(isle)
c            and Zresisle is multiplied with Zinv_isle and sum_isle(1)
c
c     sum contributions to Zres(isle)
c
      if(key == 1)   then
         do isle=1,nisle
           if (imask(isle)) then
             Zresisle(isle) = 0.0
             sum_isle(isle) = 0.0
             do n=1,nippts(isle)
               i = iperm(iofs(isle)+n)
               j = jperm(iofs(isle)+n)
               if(j >= js_pe-1 .and. j <= je_pe+1)   then
                  Zresisle(isle) = Zresisle(isle) + Zres(i,j)
                  sum_isle(isle) = sum_isle(isle) + 1.
               end if
             end do
           end if
         end do
         if ( nisle > 0 ) then
           call global_sum_vec(Zresisle,nisle)
           call global_sum_vec(sum_isle,nisle)
         endif
c
c     distribute Zres(isle) to all perimeter points
c
         do isle=1,nisle
           if (imask(isle)) then
             do n=1,nippts(isle)
               i = iperm(iofs(isle)+n)
               j = jperm(iofs(isle)+n)
               if(j >= js_pe-1 .and. j <= je_pe+1)   then
                  Zres(i,j) = Zresisle(isle)
               end if
             end do
           end if
         end do
      else
         do isle=1,nisle
           if (imask(isle)) then
#ifdef SX5_host
ce optimisation for sxf90, NEC cross compiler
c  yields a few percent perfomance, about 15% cpu time 
c  less in congr
!CDIR NODEP
#endif
             do n=1,nippts(isle)
               i = iperm(iofs(isle)+n)
               j = jperm(iofs(isle)+n)
               if(j >= js_pe-1 .and. j <= je_pe+1)   then
                  Zres(i,j) = Zres(i,j)*sum_isle(isle)
               end if
             end do
             Zresisle(isle) = Zresisle(isle) * Zinv_isle(isle) 
     &                                       * sum_isle(isle)
           end if
         end do
      end if
      call  perm_border (Zres)
      end subroutine sum_dist 






      subroutine avg_dist (Zres)
      use spflame_module
      use congrad_module
      use island_module
      implicit none
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: Zres
      integer isle,n,i,j
c
c     avg contributions to Zres(isle)
c     distribute Zres(isle) to all perimeter points
c
c     this subroutine converts a type(res_type) vector with
c     distributed contributions to perimeter values
c        Zres(isle) = avg (Zres(i,j))
c     into a type (dpsi_type) vector with replicated values
c     for land_mass perimeters
c        Zres(isle) = Zres(i,j)
c     for all (i,j) in the ocean perimeter of land_mass(isle).
c
c     avg contributions to Zres(isle)
c
      do isle=1,nisle
        if (imask(isle)) then
          Zresisle(isle) = 0.0
          do n=1,nippts(isle)
            i = iperm(iofs(isle)+n)
            j = jperm(iofs(isle)+n)
            Zresisle(isle) = Zresisle(isle) + Zres(i,j)
          end do
        end if
      end do
      if ( nisle > 0 ) then
          call global_sum_vec(Zresisle,nisle)
      endif
c
c     distribute Zres(isle) to all perimeter points
c
      do isle=1,nisle
        if (imask(isle)) then
          Zresisle(isle) = Zresisle(isle)/nippts_all(isle)
          do n=1,nippts(isle)
            i = iperm(iofs(isle)+n)
            j = jperm(iofs(isle)+n)
            if(j >= js_pe-1 .and. j <= je_pe+1)   then
               Zres(i,j) = Zresisle(isle)
            end if
          end do
        end if
      end do
      call  perm_border (Zres)
      end subroutine avg_dist 



      subroutine perm_border (Zres)
      use spflame_module
      use congrad_module
      implicit none
      real, dimension(is_pe-1:ie_pe+1,js_pe-1:je_pe+1) :: Zres
      integer :: isle,n,j
      integer, allocatable, save :: jp_upper(:),jp_lower(:)
      integer              ,save :: i_last_pe,i_first_pe
      logical ::  first = .true.

      if (cyclic) then

!     At first call, look for isles at boundary. Store isle number
!     in jp_upper and jp_upper, respectively. Send values to opposite
!     boundary PE.

        if(first)   then
         allocate( jp_upper(jmt), jp_lower(jmt) )

         i_last_pe  =  my_blk_j * n_pes_i - 1
         i_first_pe =  (my_blk_j - 1) * n_pes_i
         jp_upper   = 0
         jp_lower   = 0
         do isle=1,nisle
           if (imask(isle)) then
             do n=1,nippts(isle)
                if (my_pe .eq. i_first_pe) then
                   if(iperm(iofs(isle)+n) == 2)    then
                      jp_upper(jperm(iofs(isle)+n)) = isle
                   end  if
                end if
                if (my_pe .eq. i_last_pe) then
                   if(iperm(iofs(isle)+n) == imt-1)    then
                      jp_lower(jperm(iofs(isle)+n)) = isle
                   end  if
                end if
             end do
           end if
         end do

         if ( (nisle > 0) .and. (n_pes_i > 1)) then
          if (my_pe .eq. i_first_pe)
     &     call send_integer(jp_upper(1),jmt,i_last_pe,1)
          if (my_pe .eq. i_last_pe)
     &     call recv_integer(jp_upper(1),jmt,i_first_pe,1)
          if (my_pe .eq. i_last_pe)
     &     call send_integer(jp_lower(1),jmt,i_first_pe,2)
          if (my_pe .eq. i_first_pe)
     &     call recv_integer(jp_lower(1),jmt,i_last_pe,2)
         endif

         first = .false.
        end if

!     Set values at cyclic boundaries

        if (my_pe == i_first_pe .and. nisle > 0 ) then
         do j=js_pe,je_pe
            if(jp_lower(j) > 0)   then
               Zres (1,j) =  Zresisle (jp_lower(j))
            end if
         end do
        end if

        if (my_pe == i_last_pe .and. nisle > 0 ) then
         do j=js_pe,je_pe
            if(jp_upper(j) > 0)   then
               Zres (imt,j) = Zresisle (jp_upper(j))
            end if
         end do
        end if
      endif 
      end subroutine perm_border 




