MPI stands for Message Passing Interface and is a standard approach for programming distributed memory machines such as clusters, supercomputers, or heterogeneous networks of computers. It can also be used on a single shared memory computer, although it is often more cumbersome to program in MPI than in OpenMP.
A number of different implementations are available (open source and vendor supplied for specific machines). See this list, for example.
The VM has open-mpi partially installed.
You will need to do the following:
$ sudo apt-get update
$ sudo apt-get install openmpi-dev
On other Ubuntu installations you will also have to do:
$ sudo apt-get install openmpi-bin # Already on the VM
You should then be able to do the following:
$ cd $UWHPSC/codes/mpi
$ mpif90 test1.f90
$ mpiexec -n 4 a.out
and see output like:
Hello from Process number 1 of 4 processes
Hello from Process number 3 of 4 processes
Hello from Process number 0 of 4 processes
Hello from Process number 2 of 4 processes
The simple test code used above illustrates use of some of the basic MPI subroutines.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | ! $UWHPSC/codes/mpi/test1.f90
program test1
use mpi
implicit none
integer :: ierr, numprocs, proc_num
call mpi_init(ierr)
call mpi_comm_size(MPI_COMM_WORLD, numprocs, ierr)
call mpi_comm_rank(MPI_COMM_WORLD, proc_num, ierr)
print *, 'Hello from Process number', proc_num, &
' of ', numprocs, ' processes'
call mpi_finalize(ierr)
end program test1
|
The next example uses MPI_REDUCE to add up partial sums computed by independent processes.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | ! $UWHPSC/codes/mpi/pisum1.f90
! Computes pi using MPI.
! Compare to $UWHPSC/codes/openmp/pisum2.f90
program pisum1
use mpi
implicit none
integer :: ierr, numprocs, proc_num, points_per_proc, n, &
i, istart, iend
real (kind=8) :: x, dx, pisum, pisum_proc, pi
call mpi_init(ierr)
call mpi_comm_size(MPI_COMM_WORLD, numprocs, ierr)
call mpi_comm_rank(MPI_COMM_WORLD, proc_num, ierr)
! Ask the user for the number of points
if (proc_num == 0) then
print *, "Using ",numprocs," processors"
print *, "Input n ... "
read *, n
end if
! Broadcast to all procs; everybody gets the value of n from proc 0
call mpi_bcast(n, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
dx = 1.d0/n
! Determine how many points to handle with each proc
points_per_proc = (n + numprocs - 1)/numprocs
if (proc_num == 0) then ! Only one proc should print to avoid clutter
print *, "points_per_proc = ", points_per_proc
end if
! Determine start and end index for this proc's points
istart = proc_num * points_per_proc + 1
iend = min((proc_num + 1)*points_per_proc, n)
! Diagnostic: tell the user which points will be handled by which proc
print '("Process ",i2," will take i = ",i6," through i = ",i6)', &
proc_num, istart, iend
pisum_proc = 0.d0
do i=istart,iend
x = (i-0.5d0)*dx
pisum_proc = pisum_proc + 1.d0 / (1.d0 + x**2)
enddo
call MPI_REDUCE(pisum_proc,pisum,1,MPI_DOUBLE_PRECISION,MPI_SUM,0, &
MPI_COMM_WORLD,ierr)
if (proc_num == 0) then
pi = 4.d0 * dx * pisum
print *, "The approximation to pi is ",pi
endif
call mpi_finalize(ierr)
end program pisum1
|
In this example, a value is set in Process 0 and then passed to Process 1 and on to Process 2, etc. until it reaches the last process, where it is printed out.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | ! $UWHPSC/codes/mpi/copyvalue.f90
!
! Set value in Process 0 and copy this through a chain of processes
! and finally print result from Process numprocs-1.
!
program copyvalue
use mpi
implicit none
integer :: i, proc_num, num_procs,ierr
integer, dimension(MPI_STATUS_SIZE) :: status
call MPI_INIT(ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, num_procs, ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD, proc_num, ierr)
if (num_procs==1) then
print *, "Only one process, cannot do anything"
call MPI_FINALIZE(ierr)
stop
endif
if (proc_num==0) then
i = 55
print '("Process ",i3," setting i = ",i3)', proc_num, i
call MPI_SEND(i, 1, MPI_INTEGER, 1, 21, &
MPI_COMM_WORLD, ierr)
else if (proc_num < num_procs - 1) then
call MPI_RECV(i, 1, MPI_INTEGER, proc_num-1, 21, &
MPI_COMM_WORLD, status, ierr)
print '("Process ",i3," receives i = ",i3)', proc_num, i
print '("Process ",i3," sends i = ",i3)', proc_num, i
call MPI_SEND(i, 1, MPI_INTEGER, proc_num+1, 21, &
MPI_COMM_WORLD, ierr)
else if (proc_num == num_procs - 1) then
call MPI_RECV(i, 1, MPI_INTEGER, proc_num-1, 21, &
MPI_COMM_WORLD, status, ierr)
print '("Process ",i3," ends up with i = ",i3)', proc_num, i
endif
call MPI_FINALIZE(ierr)
end program copyvalue
|
The next two examples illustrate using Process 0 as a master process to farm work out to the other processes. In both cases the 1-norm of a matrix is computed, which is the maximum over j of the 1-norm of the `j`th column of the matrix.
In the first case we assume there are the same number of worker processes as columns in the matrix:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | ! $UWHPSC/codes/mpi/matrix1norm1.f90
!
! Compute 1-norm of a matrix using mpi.
! Process 0 is the master that sets things up and then sends a column
! to each worker (Processes 1, 2, ..., num_procs - 1).
!
! This version assumes there are at least as many workers as columns.
program matrix1norm1
use mpi
implicit none
integer :: i,j,jj,nrows,ncols,proc_num, num_procs,ierr,nerr
integer, dimension(MPI_STATUS_SIZE) :: status
real(kind=8) :: colnorm
real(kind=8), allocatable, dimension(:,:) :: a
real(kind=8), allocatable, dimension(:) :: anorm, colvect
call MPI_INIT(ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, num_procs, ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD, proc_num, ierr)
nerr = 0
if (proc_num==0) then
print *, "Input nrows, ncols"
read *, nrows, ncols
if (ncols > num_procs-1) then
print *, "*** Error, this version requires ncols < num_procs = ",&
num_procs
nerr = 1
endif
allocate(a(nrows,ncols)) ! only master process 0 needs the matrix
a = 1.d0 ! initialize to all 1's for this test
allocate(anorm(ncols)) ! to hold norm of each column in MPI_RECV
endif
! if nerr == 1 then all processes must stop:
call MPI_BCAST(nerr, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
if (nerr == 1) then
! Note that error message already printed by Process 0
! All processes must execute the MPI_FINALIZE
! (Could also just have "go to 99" here.)
call MPI_FINALIZE(ierr)
stop
endif
call MPI_BCAST(nrows, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
call MPI_BCAST(ncols, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
if (proc_num > 0) then
allocate(colvect(nrows)) ! to hold a column vector sent from master
endif
! -----------------------------------------
! code for Master (Processor 0):
! -----------------------------------------
if (proc_num == 0) then
do j=1,ncols
call MPI_SEND(a(1,j), nrows, MPI_DOUBLE_PRECISION,&
j, j, MPI_COMM_WORLD, ierr)
enddo
do j=1,ncols
call MPI_RECV(colnorm, 1, MPI_DOUBLE_PRECISION, &
MPI_ANY_SOURCE, MPI_ANY_TAG, &
MPI_COMM_WORLD, status, ierr)
jj = status(MPI_TAG)
anorm(jj) = colnorm
enddo
print *, "Finished filling anorm with values... "
print *, anorm
print *, "1-norm of matrix a = ", maxval(anorm)
endif
! -----------------------------------------
! code for Workers (Processors 1, 2, ...):
! -----------------------------------------
if (proc_num /= 0) then
if (proc_num > ncols) go to 99 ! no work expected
call MPI_RECV(colvect, nrows, MPI_DOUBLE_PRECISION,&
0, MPI_ANY_TAG, &
MPI_COMM_WORLD, status, ierr)
j = status(MPI_TAG) ! this is the column number
! (should agree with proc_num)
colnorm = sum(abs(colvect))
call MPI_SEND(colnorm, 1, MPI_DOUBLE_PRECISION, &
0, j, MPI_COMM_WORLD, ierr)
endif
99 continue ! might jump to here if finished early
call MPI_FINALIZE(ierr)
end program matrix1norm1
|
In the next case we consider the more realistic situation where there may be many more columns in the matrix than worker processes. In this case the master process must do more work to keep track of how which columns have already been handled and farm out work as worker processes become free.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | ! $UWHPSC/codes/mpi/matrix1norm1.f90
!
! Compute 1-norm of a matrix using mpi.
! Process 0 is the master that sets things up and then sends a column
! to each worker (Processes 1, 2, ..., num_procs - 1).
!
! This version allows more columns than workers.
program matrix1norm2
use mpi
implicit none
integer :: i,j,jj,nrows,ncols,proc_num, num_procs,ierr,nerr
integer :: numsent, sender, nextcol
integer, dimension(MPI_STATUS_SIZE) :: status
real(kind=8) :: colnorm
real(kind=8), allocatable, dimension(:,:) :: a
real(kind=8), allocatable, dimension(:) :: anorm, colvect
logical :: debug
debug = .true.
call MPI_INIT(ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, num_procs, ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD, proc_num, ierr)
if (proc_num==0) then
print *, "Input nrows, ncols"
read *, nrows, ncols
allocate(a(nrows,ncols)) ! only master process 0 needs the matrix
a = 1.d0 ! initialize to all 1's for this test
allocate(anorm(ncols)) ! to hold norm of each column in MPI_RECV
endif
call MPI_BCAST(nrows, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
call MPI_BCAST(ncols, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
if (proc_num > 0) then
allocate(colvect(nrows)) ! to hold a column vector sent from master
endif
! -----------------------------------------
! code for Master (Processor 0):
! -----------------------------------------
if (proc_num == 0) then
numsent = 0 ! keep track of how many columns sent
! send the first batch to get all workers working:
do j=1,min(num_procs-1, ncols)
call MPI_SEND(a(1,j), nrows, MPI_DOUBLE_PRECISION,&
j, j, MPI_COMM_WORLD, ierr)
numsent = numsent + 1
enddo
! as results come back, send out more work...
! the variable sender tells who sent back a result and ready for more
do j=1,ncols
call MPI_RECV(colnorm, 1, MPI_DOUBLE_PRECISION, &
MPI_ANY_SOURCE, MPI_ANY_TAG, &
MPI_COMM_WORLD, status, ierr)
sender = status(MPI_SOURCE)
jj = status(MPI_TAG)
anorm(jj) = colnorm
if (numsent < ncols) then
! still more work to do, the next column will be sent and
! this index also used as the tag:
nextcol = numsent + 1
call MPI_SEND(a(1,nextcol), nrows, MPI_DOUBLE_PRECISION,&
sender, nextcol, MPI_COMM_WORLD, ierr)
numsent = numsent + 1
else
! send an empty message with tag=0 to indicate this worker
! is done:
call MPI_SEND(MPI_BOTTOM, 0, MPI_DOUBLE_PRECISION,&
sender, 0, MPI_COMM_WORLD, ierr)
endif
enddo
print *, "Finished filling anorm with values... "
print *, anorm
print *, "1-norm of matrix a = ", maxval(anorm)
endif
! -----------------------------------------
! code for Workers (Processors 1, 2, ...):
! -----------------------------------------
if (proc_num /= 0) then
if (proc_num > ncols) go to 99 ! no work expected
do while (.true.)
! repeat until message with tag==0 received...
call MPI_RECV(colvect, nrows, MPI_DOUBLE_PRECISION,&
0, MPI_ANY_TAG, &
MPI_COMM_WORLD, status, ierr)
j = status(MPI_TAG) ! this is the column number
! may not be proc_num in general
if (debug) then
print '("+++ Process ",i4," received message with tag ",i6)', &
proc_num, j
endif
if (j==0) go to 99 ! received "done" message
colnorm = sum(abs(colvect))
call MPI_SEND(colnorm, 1, MPI_DOUBLE_PRECISION, &
0, j, MPI_COMM_WORLD, ierr)
enddo
endif
99 continue ! might jump to here if finished early
call MPI_FINALIZE(ierr)
end program matrix1norm2
|
Some other sample codes can also be found in the $UWHPSC/codes/mpi directory.
See also the samples in the list below.
- MPI references section of the bibliography lists some books.
- Argonne tutorials
- Tutorial slides by Bill Gropp
- Livermore tutorials
- Open MPI
- The MPI Standard
- Some sample codes
- LAM MPI tutorials
- Google “MPI tutorial” to find more.
- Documentation on MPI subroutines