 High Performance Scientific Computing   AMath 483/583 Class Notes   Spring Quarter, 2011

#### Previous topic

Jacobi iteration using OpenMP with parallel do constructs

#### Next topic

Jacobi iteration using MPI

# Jacobi iteration using OpenMP with coarse-grain parallel block¶

The code below implements Jacobi iteration for solving the linear system arising from the steady state heat equation with a single parallel block. Work is split up manually between threads.

Compare to:

The code:

```! \$CLASSHG/codes/openmp/jacobi2_omp.f90
!
! Domain decomposition version of Jacobi iteration illustrating
! coarse grain parallelism with OpenMP.
!
! The grid points are split up into nthreads disjoint sets and each thread
! is assigned one set that it updates for all iterations.

program jacobi2
use omp_lib
implicit none
real(kind=8), dimension(:), allocatable :: x,u,uold,f
real(kind=8) :: alpha, beta, dx, tol, dumax, dumax_thread
real(kind=8), intrinsic :: exp
integer :: n, nthreads, points_per_thread,thread_num
integer :: i,iter,maxiter,istart,iend,nprint

! Specify number of threads to use:
nthreads = 1       ! need this value in serial mode
!\$ nthreads = 8
!\$ call omp_set_num_threads(nthreads)
!\$ print "('Using OpenMP with ',i3,' threads')", nthreads

nprint = 1000 ! print dumax every nprint iterations

print *, "Input n ... "
read *, n

! allocate storage for boundary points too:
allocate(x(0:n+1), u(0:n+1), uold(0:n+1), f(0:n+1))

open(unit=20, file="heatsoln.txt", status="unknown")

! grid spacing:
dx = 1.d0 / (n+1.d0)

! boundary conditions:
alpha = 20.d0
beta = 60.d0

! tolerance and max number of iterations:
tol = 0.1 * dx**2
print *, "Convergence tolerance: tol = ",tol
maxiter = 10000
print *, "Maximum number of iterations: maxiter = ",maxiter

! Determine how many points to handle with each thread.
! Note that dividing two integers and assigning to an integer will
! round down if the result is not an integer.
! This, together with the min(...) in the definition of iend below,
! insures that all points will get distributed to some thread.
points_per_thread = (n + nthreads - 1) / nthreads
print *, "points_per_thread = ",points_per_thread

! Start of the parallel block...
! ------------------------------

! This is the only time threads are forked in this program:

!\$omp parallel private(thread_num, iter, istart, iend, i, dumax_thread)

thread_num = 0     ! needed in serial mode
!\$ thread_num = omp_get_thread_num()    ! unique for each thread

! Determine start and end index for the set of points to be
! handled by this thread:
istart = thread_num * points_per_thread + 1
iend = min((thread_num+1) * points_per_thread, n)

!\$omp critical
print 201, thread_num, istart, iend
!\$omp end critical
201 format("Thread ",i2," will take i = ",i6," through i = ",i6)

! Initialize:
! -----------

! each thread sets part of these arrays:
do i=istart, iend
! grid points:
x(i) = i*dx
! source term:
f(i) = 100.*exp(x(i))
! initial guess:
u(i) = alpha + x(i)*(beta-alpha)
enddo

! boundary conditions need to be added:
u(0) = alpha
u(n+1) = beta

uold = u   ! initialize, including boundary values

! Jacobi iteratation:
! -------------------

do iter=1,maxiter

! initialize uold to u (note each thread does part!)
uold(istart:iend) = u(istart:iend)

!\$omp single
dumax = 0.d0     ! global max initialized by one thread
!\$omp end single

! Make sure all of uold is initialized before iterating
!\$omp barrier
! Make sure uold is consitent in memory:
!\$omp flush

dumax_thread = 0.d0   ! max seen by this thread
do i=istart,iend
u(i) = 0.5d0*(uold(i-1) + uold(i+1) + dx**2*f(i))
dumax_thread = max(dumax_thread, abs(u(i)-uold(i)))
enddo

!\$omp critical
! update global dumax using value from this thread:
dumax = max(dumax, dumax_thread)
!\$omp end critical

! make sure all threads are done with dumax:
!\$omp barrier

!\$omp single
! only one thread will do this print statement:
if (mod(iter,nprint)==0) then
print 203, iter, dumax
203         format("After ",i8," iterations, dumax = ",d16.6,/)
endif
!\$omp end single

! check for convergence:
! note that all threads have same value of dumax
! at this point, so they will all exit on the same iteration.

if (dumax .lt. tol) exit

! need to synchronize here so no thread resets dumax = 0
! at start of next iteration before all have done the test above.
!\$omp barrier

enddo

print 212, thread_num,iter,dumax
212 format("Thread number ",i2," finished after ",i9," iterations, dumax = ",&
e16.6)

!\$omp end parallel

! Write solution to heatsoln.txt:
write(20,*) "          x                  u"
do i=0,n+1
write(20,222), x(i), u(i)
enddo
222 format(2e20.10)

print *, "Solution is in heatsoln.txt"

close(20)

end program jacobi2
```