Hi all,
I'm trying to understand how to parallelize with HPX for the first time
simple tasks, but I do actually do not know how to proceed in a correct
way. So I ask you an help.
In MatricesMultiplicationSerial.txt you find the matrices multiplication
function in serial mode:
the two matrices are split in "blocks", which are smaller matrices.
The corresponding smaller matrices are multiplied in serial (no
parallelization), and then the single results are put into a vector, and
composed, trhough the function "totalSumElemWise2D" in the bigger matrix.
In MatricesMultiplicationAsyncParallel.txt I tried to asynchronously
parallelize the single small matrices multiplications, and then compose the
results into the bigger matrix.
Did I lay out the parallelization in a correct way? For example, I'm not so
sure that unwrapping is necessary and also correctly used.
How to gather all the smaller matrices multiplications into a results
vector and then use the gathering function "totalSumElemWise2D" to combine
the results into the bigger matrix?
Looking forward to your kind help.
Marco
template
Tensor2 prod_hpx(Tensor2 A, Tensor2 B) { // dyadicproduct2D: matrix
x matrix multiplication
if(A.size().first!=B.size().second) {
std::cout << "prod_hpx : The number of columns of first matrix(left) must
be equal to the number of rows of the second matrix(right)";
std::cout << std::endl;
exit(1);
}
// page 25 of "Matrix Computations 3rd Edition" - Gene H. Golub
(MatrixComputations.pdf)
// IF THE TWO MATRICES T1 AND T2 AND BOTH AT LEAST ONE OF THE DIMENSIONS >
MATRIX_DIM_THRESHOLD, THEY MUST BOTH BE DECOMPOSED IN BLOCKS (MATRICES) :
// if (((t1.size().first > MATRIX_DIM_THRESHOLD) || (t1.size().second >
MATRIX_DIM_THRESHOLD)) && ((t2.size().first > MATRIX_DIM_THRESHOLD) ||
(t2.size().second > MATRIX_DIM_THRESHOLD))) {
// Matrix t1 and Matrix t2 must be decomposed in blocks (Matrices)
// Calculate the maximum common divisor of the rows dimension of t1 and of
the columns dimension of t2:
bool ABcheck = matricesDimsCheck(A,B);
Tensor2 AdyadicB(B.size().first,A.size().second);
if(ABcheck) {
std::cout << "prod_hpx : BLOCKS NEEDED" << std::endl;
// A || B :
// (ra x ca) (rb x cb) con ca==rb : AdyadicB = ra x cb
// (cb x rb) (ca x ra) con rb==ca : AdyadicB = cb x ra :
// Thus the partionsNumb to be used is the max common divisor of
A.size().second and B.size().first
std::vector> Ablocks = getDyadicBlocks(A,'l');
std::vector> Bblocks = getDyadicBlocks(B,'r');
std::vector> ApBVect;
using namespace hpx::parallel;
using hpx::dataflow;
using hpx::parallel::for_each;
using hpx::parallel::execution::par;
// I want to launch the single twoMatricesDyadicProd functions in
parallel and asynchroously
auto Op = unwrapping(twoMatricesDyadicProd);
for_each(par, [&](int i) {
Tensor2 ApB = dataflow(
hpx::launch::async, Op, Ablocks[i], Bblocks[i]
);
++i;
});
return hpx::when_all(
// }
else {
//std::cout << "operator || : blocks not needed" << std::endl;
AdyadicB = twoMatricesDyadicProd(A,B);
}
// return AdyadicB;
}
template
Tensor2 operator||(Tensor2 A, Tensor2 B) { // dyadicproduct2D:
matrix x matrix multiplication
if(A.size().first!=B.size().second) {
std::cout << "operator|| : The number of columns of first matrix(left)
must be equal to the number of rows of the second matrix(right)";
std::cout << std::endl;
exit(1);
}
// page 25 of "Matrix Computations 3rd Edition" - Gene H. Golub
(MatrixComputations.pdf)
// IF THE TWO MATRICES T1 AND T2 AND BOTH AT LEAST ONE OF THE DIMENSIONS >
MATRIX_DIM_THRESHOLD, THEY MUST BOTH BE DECOMPOSED IN BLOCKS (MATRICES) :
// if (((t1.size().first > MATRIX_DIM_THRESHOLD) || (t1.size().second >
MATRIX_DIM_THRESHOLD)) && ((t2.size().first > MATRIX_DIM_THRESHOLD) ||
(t2.size().second > MATRIX_DIM_THRESHOLD))) {
// Matrix t1 and Matrix t2 must be decomposed in blocks (Matrices)
// Calculate the maximum common divisor of the rows dimension of t1 and of
the columns dimension of t2:
bool ABcheck = matricesDimsCheck(A,B);
Tensor2 AdyadicB(B.size().first,A.size().second);
if(ABcheck) {
std::cout << "operator || : BLOCKS NEEDED" << std::endl;
// A || B :
// (ra x ca) (rb x cb) con ca==rb : AdyadicB = ra x cb
// (cb x rb) (ca x ra) con rb==ca : AdyadicB = cb x ra :
// Thus the partionsNumb to be used is the max common divisor of
A.size().second and B.size().first
std::vector> Ablocks = getDyadicBlocks(A,'l');
std::vector> Bblocks = getDyadicBlocks(B,'r');
std::vector> ApBVect;
for(int i=0;i ApB = twoMatricesDyadicProd(Ablocks[i],Bblocks[i]);
ApBVect.push_back(std::move(ApB));
}
AdyadicB = totalSumElemWise2D(std::move(ApBVect));
}