// Copyright (C) 2005-2006 Douglas Gregor <doug.gregor@gmail.com>. | |
// Copyright (C) 2004 The Trustees of Indiana University | |
// Use, modification and distribution is subject to the Boost Software | |
// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
// http://www.boost.org/LICENSE_1_0.txt) | |
// Authors: Douglas Gregor | |
// Andrew Lumsdaine | |
// Message Passing Interface 1.1 -- Section 4.9.1. Reduce | |
#ifndef BOOST_MPI_REDUCE_HPP | |
#define BOOST_MPI_REDUCE_HPP | |
#include <boost/mpi/exception.hpp> | |
#include <boost/mpi/datatype.hpp> | |
// For (de-)serializing sends and receives | |
#include <boost/mpi/packed_oarchive.hpp> | |
#include <boost/mpi/packed_iarchive.hpp> | |
// For packed_[io]archive sends and receives | |
#include <boost/mpi/detail/point_to_point.hpp> | |
#include <boost/mpi/communicator.hpp> | |
#include <boost/mpi/environment.hpp> | |
#include <boost/mpi/detail/computation_tree.hpp> | |
#include <boost/mpi/operations.hpp> | |
#include <algorithm> | |
#include <exception> | |
#include <boost/assert.hpp> | |
#include <boost/scoped_array.hpp> | |
namespace boost { namespace mpi { | |
/************************************************************************ | |
* Implementation details * | |
************************************************************************/ | |
namespace detail { | |
/********************************************************************** | |
* Simple reduction with MPI_Reduce * | |
**********************************************************************/ | |
// We are reducing at the root for a type that has an associated MPI | |
// datatype and operation, so we'll use MPI_Reduce directly. | |
template<typename T, typename Op> | |
void | |
reduce_impl(const communicator& comm, const T* in_values, int n, | |
T* out_values, Op op, int root, mpl::true_ /*is_mpi_op*/, | |
mpl::true_/*is_mpi_datatype*/) | |
{ | |
BOOST_MPI_CHECK_RESULT(MPI_Reduce, | |
(const_cast<T*>(in_values), out_values, n, | |
boost::mpi::get_mpi_datatype<T>(*in_values), | |
(is_mpi_op<Op, T>::op()), root, comm)); | |
} | |
// We are reducing to the root for a type that has an associated MPI | |
// datatype and operation, so we'll use MPI_Reduce directly. | |
template<typename T, typename Op> | |
void | |
reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | |
int root, mpl::true_ /*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/) | |
{ | |
BOOST_MPI_CHECK_RESULT(MPI_Reduce, | |
(const_cast<T*>(in_values), 0, n, | |
boost::mpi::get_mpi_datatype<T>(*in_values), | |
(is_mpi_op<Op, T>::op()), root, comm)); | |
} | |
/********************************************************************** | |
* User-defined reduction with MPI_Reduce * | |
**********************************************************************/ | |
// We are reducing at the root for a type that has an associated MPI | |
// datatype but with a custom operation. We'll use MPI_Reduce | |
// directly, but we'll need to create an MPI_Op manually. | |
template<typename T, typename Op> | |
void | |
reduce_impl(const communicator& comm, const T* in_values, int n, | |
T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/, | |
mpl::true_/*is_mpi_datatype*/) | |
{ | |
user_op<Op, T> mpi_op(op); | |
BOOST_MPI_CHECK_RESULT(MPI_Reduce, | |
(const_cast<T*>(in_values), out_values, n, | |
boost::mpi::get_mpi_datatype<T>(*in_values), | |
mpi_op.get_mpi_op(), root, comm)); | |
} | |
// We are reducing to the root for a type that has an associated MPI | |
// datatype but with a custom operation. We'll use MPI_Reduce | |
// directly, but we'll need to create an MPI_Op manually. | |
template<typename T, typename Op> | |
void | |
reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | |
int root, mpl::false_/*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/) | |
{ | |
user_op<Op, T> mpi_op(op); | |
BOOST_MPI_CHECK_RESULT(MPI_Reduce, | |
(const_cast<T*>(in_values), 0, n, | |
boost::mpi::get_mpi_datatype<T>(*in_values), | |
mpi_op.get_mpi_op(), root, comm)); | |
} | |
/********************************************************************** | |
* User-defined, tree-based reduction for non-MPI data types * | |
**********************************************************************/ | |
// Commutative reduction | |
template<typename T, typename Op> | |
void | |
tree_reduce_impl(const communicator& comm, const T* in_values, int n, | |
T* out_values, Op op, int root, | |
mpl::true_ /*is_commutative*/) | |
{ | |
std::copy(in_values, in_values + n, out_values); | |
int size = comm.size(); | |
int rank = comm.rank(); | |
// The computation tree we will use. | |
detail::computation_tree tree(rank, size, root); | |
int tag = environment::collectives_tag(); | |
MPI_Status status; | |
int children = 0; | |
for (int child = tree.child_begin(); | |
children < tree.branching_factor() && child != root; | |
++children, child = (child + 1) % size) { | |
// Receive archive | |
packed_iarchive ia(comm); | |
detail::packed_archive_recv(comm, child, tag, ia, status); | |
T incoming; | |
for (int i = 0; i < n; ++i) { | |
ia >> incoming; | |
out_values[i] = op(out_values[i], incoming); | |
} | |
} | |
// For non-roots, send the result to the parent. | |
if (tree.parent() != rank) { | |
packed_oarchive oa(comm); | |
for (int i = 0; i < n; ++i) | |
oa << out_values[i]; | |
detail::packed_archive_send(comm, tree.parent(), tag, oa); | |
} | |
} | |
// Commutative reduction from a non-root. | |
template<typename T, typename Op> | |
void | |
tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | |
int root, mpl::true_ /*is_commutative*/) | |
{ | |
scoped_array<T> results(new T[n]); | |
detail::tree_reduce_impl(comm, in_values, n, results.get(), op, root, | |
mpl::true_()); | |
} | |
// Non-commutative reduction | |
template<typename T, typename Op> | |
void | |
tree_reduce_impl(const communicator& comm, const T* in_values, int n, | |
T* out_values, Op op, int root, | |
mpl::false_ /*is_commutative*/) | |
{ | |
int tag = environment::collectives_tag(); | |
int left_child = root / 2; | |
int right_child = (root + comm.size()) / 2; | |
MPI_Status status; | |
if (left_child != root) { | |
// Receive value from the left child and merge it with the value | |
// we had incoming. | |
packed_iarchive ia(comm); | |
detail::packed_archive_recv(comm, left_child, tag, ia, status); | |
T incoming; | |
for (int i = 0; i < n; ++i) { | |
ia >> incoming; | |
out_values[i] = op(incoming, in_values[i]); | |
} | |
} else { | |
// There was no left value, so copy our incoming value. | |
std::copy(in_values, in_values + n, out_values); | |
} | |
if (right_child != root) { | |
// Receive value from the right child and merge it with the | |
// value we had incoming. | |
packed_iarchive ia(comm); | |
detail::packed_archive_recv(comm, right_child, tag, ia, status); | |
T incoming; | |
for (int i = 0; i < n; ++i) { | |
ia >> incoming; | |
out_values[i] = op(out_values[i], incoming); | |
} | |
} | |
} | |
// Non-commutative reduction from a non-root. | |
template<typename T, typename Op> | |
void | |
tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | |
int root, mpl::false_ /*is_commutative*/) | |
{ | |
int size = comm.size(); | |
int rank = comm.rank(); | |
int tag = environment::collectives_tag(); | |
// Determine our parents and children in the commutative binary | |
// computation tree. | |
int grandparent = root; | |
int parent = root; | |
int left_bound = 0; | |
int right_bound = size; | |
int left_child, right_child; | |
do { | |
left_child = (left_bound + parent) / 2; | |
right_child = (parent + right_bound) / 2; | |
if (rank < parent) { | |
// Go left. | |
grandparent = parent; | |
right_bound = parent; | |
parent = left_child; | |
} else if (rank > parent) { | |
// Go right. | |
grandparent = parent; | |
left_bound = parent + 1; | |
parent = right_child; | |
} else { | |
// We've found the parent | |
break; | |
} | |
} while (true); | |
// Our parent is the grandparent of our children. This is a slight | |
// abuse of notation, but it makes the send-to-parent below make | |
// more sense. | |
parent = grandparent; | |
MPI_Status status; | |
scoped_array<T> out_values(new T[n]); | |
if (left_child != rank) { | |
// Receive value from the left child and merge it with the value | |
// we had incoming. | |
packed_iarchive ia(comm); | |
detail::packed_archive_recv(comm, left_child, tag, ia, status); | |
T incoming; | |
for (int i = 0; i < n; ++i) { | |
ia >> incoming; | |
out_values[i] = op(incoming, in_values[i]); | |
} | |
} else { | |
// There was no left value, so copy our incoming value. | |
std::copy(in_values, in_values + n, out_values.get()); | |
} | |
if (right_child != rank) { | |
// Receive value from the right child and merge it with the | |
// value we had incoming. | |
packed_iarchive ia(comm); | |
detail::packed_archive_recv(comm, right_child, tag, ia, status); | |
T incoming; | |
for (int i = 0; i < n; ++i) { | |
ia >> incoming; | |
out_values[i] = op(out_values[i], incoming); | |
} | |
} | |
// Send the combined value to our parent. | |
packed_oarchive oa(comm); | |
for (int i = 0; i < n; ++i) | |
oa << out_values[i]; | |
detail::packed_archive_send(comm, parent, tag, oa); | |
} | |
// We are reducing at the root for a type that has no associated MPI | |
// datatype and operation, so we'll use a simple tree-based | |
// algorithm. | |
template<typename T, typename Op> | |
void | |
reduce_impl(const communicator& comm, const T* in_values, int n, | |
T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/, | |
mpl::false_ /*is_mpi_datatype*/) | |
{ | |
detail::tree_reduce_impl(comm, in_values, n, out_values, op, root, | |
is_commutative<Op, T>()); | |
} | |
// We are reducing to the root for a type that has no associated MPI | |
// datatype and operation, so we'll use a simple tree-based | |
// algorithm. | |
template<typename T, typename Op> | |
void | |
reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | |
int root, mpl::false_ /*is_mpi_op*/, | |
mpl::false_ /*is_mpi_datatype*/) | |
{ | |
detail::tree_reduce_impl(comm, in_values, n, op, root, | |
is_commutative<Op, T>()); | |
} | |
} // end namespace detail | |
template<typename T, typename Op> | |
void | |
reduce(const communicator& comm, const T* in_values, int n, T* out_values, | |
Op op, int root) | |
{ | |
if (comm.rank() == root) | |
detail::reduce_impl(comm, in_values, n, out_values, op, root, | |
is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
else | |
detail::reduce_impl(comm, in_values, n, op, root, | |
is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
} | |
template<typename T, typename Op> | |
void | |
reduce(const communicator& comm, const T* in_values, int n, Op op, int root) | |
{ | |
BOOST_ASSERT(comm.rank() != root); | |
detail::reduce_impl(comm, in_values, n, op, root, | |
is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
} | |
template<typename T, typename Op> | |
void | |
reduce(const communicator& comm, const T& in_value, T& out_value, Op op, | |
int root) | |
{ | |
if (comm.rank() == root) | |
detail::reduce_impl(comm, &in_value, 1, &out_value, op, root, | |
is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
else | |
detail::reduce_impl(comm, &in_value, 1, op, root, | |
is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
} | |
template<typename T, typename Op> | |
void reduce(const communicator& comm, const T& in_value, Op op, int root) | |
{ | |
BOOST_ASSERT(comm.rank() != root); | |
detail::reduce_impl(comm, &in_value, 1, op, root, | |
is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
} | |
} } // end namespace boost::mpi | |
#endif // BOOST_MPI_REDUCE_HPP |