Difference between revisions of "Using petsc (parallel)"

Revision as of 16:48, 10 February 2010

And here is a quick parallel code - compile as in using petsc but run like

mpirun -n <N procS> ./a.out <other options>

On my workstation, n = 2 actually results in better performance than n = 1(it has four cores but usually multicore cpus do nothing much for petsc), though not if I bump the problem up to 1024x1024

On bluecrystalp1, running a 1024x1024 problem with -pc_type sor -ksp_type bcgs, I get

1 node, 1 core : 70s 
1 node, 2 cores  : 45s
1 node, 4 cores : 32s
2 nodes 4 cores each : 14s

compared to the 49s required on my workstation with one core. Asking for 4 nodes seems to result in lengthy queuing.

using mumps instead (-pc_type lu -pc_factor mat_solver_package mumps) I get

1 node, 1 core : 77s
1 node, 2 cores: 48s
1 node, 4 cores:
2 node, 1 core each: 47s
2 node, 2 cores each: 29s
2 node, 4 cores each:

!! solve a 2D n x n (a(x,y)u')' + b(x,y) u = c(x,y) 
!! with periodic boundary conditions in y, u(x=0,y) = 0 and u'(x=1,y)= d
!! a(x,y) = 1 for x < 1/2, h for x > 1/2
!! b(x,y) = 1 for x < 1/2, r for x > 1/2 
!! c(x,y) = 1 for x < 1/2, p for x > 1/2 

program main

  implicit none

#include "finclude/petsc.h"
#include "finclude/petscvec.h"
#include "finclude/petscmat.h"
#include "finclude/petscksp.h"
#include "finclude/petscpc.h"
 
  
  Mat A
  Vec x,b, res
  KSP ksp
  PetscInt ierr, ndof, n, m, i , j, lbegin, lend
  PetscScalar dfluxe(0:1), dfluxw(0:1), dfluxn(0:1), dfluxs(0:1), su(0:0), sc, &
       h, r, p , d, rnorm, bnorm, one
  PetscInt row(0:0), fcol(0:1)
  
  n = 256
  m = n/2
  ndof = n * n

  !x > 1/2 diffusion = h u'
  h = 1e-2
  !x > 1/2 dissipation = r u , set small r to have problems inverting the matrix
  !so that direct solvers win
  r = 1e-6
  !x > 1/2 source = p
  p = 0.5
  ! u' at x = 1
  d = 100
  

  ! this has to be the first petsc call 
  call PetscInitialize(PETSC_NULL_CHARACTER,ierr)
  
  !allocate space for matrix elements
  call MatCreateMpiAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, &
       ndof, ndof, 5, PETSC_NULL_INTEGER, 1,PETSC_NULL_INTEGER, A,ierr)
  ! alocate space for rhs, residual and solution
  call MatGetVecs(A, x, b, ierr);
  call MatGetVecs(A, x, res, ierr);
  

  !establish which dofs are on this processor
  call MatGetOwnerShipRange(A, lbegin, lend, ierr)

  ! bulk
  dfluxe(0) = 1.0
  dfluxe(1) = -1.0
 
  su(0) = 1.0
  sc = 1.0

  do i = 1, n-2

     dfluxw(0) = dfluxe(0)
     dfluxw(1) = dfluxe(1)

     
     dfluxn(0) = 1.0
     dfluxn(1) = -1.0

  

     if (i > m) then
        su(0) = r
        sc = p
        dfluxe(0) = h
        dfluxe(1) = -h
        dfluxn(0) = h
        dfluxn(1) = -h
     endif
     
     dfluxs(0) = dfluxn(0)
     dfluxs(1) = dfluxn(1)
    
     do j = 1, n-2
          
        row(0) = j*n + i
        if ((lbegin .le. row(0)) .and. (lend .gt. row(0))) then
        
           ! source term
           call MatSetValues(A, 1 ,row, 1, row, su, ADD_VALUES,ierr)
           call VecSetValue( b, row(0) , sc , INSERT_VALUES, ierr)
           
           ! fluxes
           fcol(0) = row(0)
           
           fcol(1) = row(0) + 1
           call MatSetValues(A, 1 ,row, 2, fcol, dfluxe, ADD_VALUES,ierr)
           
           fcol(1) = row(0) - 1
           call MatSetValues(A, 1 ,row, 2, fcol, dfluxw, ADD_VALUES,ierr)
           
           fcol(1) = row(0) + n
           call MatSetValues(A, 1 ,row, 2, fcol, dfluxn, ADD_VALUES,ierr)
           
           fcol(1) = row(0) - n
           call MatSetValues(A, 1 ,row, 2, fcol, dfluxs, ADD_VALUES,ierr)
        end if
        
     end do

     ! periodic boundaries
     j = 0
     row(0) = j*n + i
     if ((lbegin .le. row(0)) .and. (lend .gt. row(0))) then
        ! source term
        call MatSetValues(A, 1 ,row, 1, row, su, ADD_VALUES,ierr)
        call VecSetValue( b, row(0) , sc , INSERT_VALUES, ierr)
        
        ! fluxes
        fcol(0) = row(0)
        
        fcol(1) = row(0) + 1
        call MatSetValues(A, 1 ,row, 2, fcol, dfluxe, ADD_VALUES,ierr)
        
        fcol(1) = row(0) - 1
        call MatSetValues(A, 1 ,row, 2, fcol, dfluxw, ADD_VALUES,ierr)
        
        fcol(1) = row(0) + n
        call MatSetValues(A, 1 ,row, 2, fcol, dfluxn, ADD_VALUES,ierr)
        
        fcol(1) = (n-1)*n + i
        call MatSetValues(A, 1 ,row, 2, fcol, dfluxs, ADD_VALUES,ierr)
     end if

     j = n-1
     row(0) = j*n + i
     if ((lbegin .le. row(0)) .and. (lend .gt. row(0))) then
        ! source term
        call MatSetValues(A, 1 ,row, 1, row, su, ADD_VALUES,ierr)
        call VecSetValue( b, row(0) , sc , INSERT_VALUES, ierr)
        
        ! fluxes
        fcol(0) = row(0)
        
        fcol(1) = row(0) + 1
        call MatSetValues(A, 1 ,row, 2, fcol, dfluxe, ADD_VALUES,ierr)
        
        fcol(1) = row(0) - 1
        call MatSetValues(A, 1 ,row, 2, fcol, dfluxw, ADD_VALUES,ierr)
        
        fcol(1) = row(0) - n
        call MatSetValues(A, 1 ,row, 2, fcol, dfluxn, ADD_VALUES,ierr)
        
        fcol(1) = i
        call MatSetValues(A, 1 ,row, 2, fcol, dfluxs, ADD_VALUES,ierr)
     end if
     
  end do
   
  !x-boundaries

  dfluxw(0) = h
  dfluxw(1) = -h

  do j = 0, n - 1
  
     i = 0
     row(0) = j*n + i
     if ((lbegin .le. row(0)) .and. (lend .gt. row(0))) then
        call MatSetValues(A, 1 ,row, 1, row, su, ADD_VALUES,ierr)
     end if
     
     
     i = n - 1
     row(0) = j*n + i
     if ((lbegin .le. row(0)) .and. (lend .gt. row(0))) then
        su = r
        sc = h*d
        call MatSetValues(A, 1 ,row, 1, row, su, ADD_VALUES,ierr)
        call VecSetValue( b, row(0) , sc , INSERT_VALUES, ierr)
        fcol(0) = row(0)
        fcol(1) = row(0) - 1
        call MatSetValues(A, 1 ,row, 2, fcol, dfluxw, ADD_VALUES,ierr) 
     end if
     


  end do


  !needs to be done before A can be used
  call MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY,ierr)
  call MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY,ierr)

 
  !KSP context: KSP is petsc's interface to linear solvers
  call KSPCreate(PETSC_COMM_WORLD,ksp,ierr)

  !A is defines both the operator and preconditioner (but might not)
  call KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN,ierr)

  !grab all the options from the command line
  call KSPSetFromOptions(ksp,ierr)

  ! solve
  call KSPSolve(ksp,b,x,ierr)

  !check results
  call MatMult(A, x, res, ierr)
  one = -1.0
  call VecAXPY(res , one , b, ierr)
  call VecNorm(res , NORM_2, rnorm, ierr) 
  call VecNorm(b , NORM_2, bnorm, ierr)
  write(*,*) "||Ax - b||/||b|| = ", rnorm/bnorm


  !clean up
   call VecDestroy(x,ierr)
   call VecDestroy(b,ierr)
   call MatDestroy(A,ierr)

  !last petsc call
  call PetscFinalize(ierr)

end program main

Difference between revisions of "Using petsc (parallel)"

Revision as of 16:48, 10 February 2010

Navigation menu

Search