Hey,

I wanted to direct your attention to the following question on
stackoverflow:

https://stackoverflow.com/questions/58071344/is-eigen-slow-at-multiplying-small-matrices#58071344

I also measured this on my machine and got the same result: Eigen ist
always twice as slow as the simple for loop (I've attached the code from
the question and added some code for measuring).

What could be the reason for this?

Best regards
Janos

#include <chrono>
#include <Eigen/Core>


constexpr int dimension = 10;
using StaticMatrix = Eigen::Matrix<double, dimension, dimension>;
using VectorOfMatrices = std::vector<StaticMatrix, Eigen::aligned_allocator<StaticMatrix >>;


StaticMatrix CustomMultiply(const StaticMatrix& a, const StaticMatrix& b) {
StaticMatrix result = StaticMatrix::Zero(10, 10);
for (int bcol_idx = 0; bcol_idx < dimension; ++bcol_idx) {
  for (int brow_idx = 0; brow_idx < dimension; ++brow_idx) {
    result.col(bcol_idx).noalias() += a.col(brow_idx) * b(brow_idx, bcol_idx);
  }
}
return result;
}

StaticMatrix PairwiseMultiplyEachMatrixNoAlias(int num_repetitions, const VectorOfMatrices& input) {
    StaticMatrix acc = StaticMatrix::Zero(10, 10);
for (int i = 0; i < num_repetitions; ++i) {
  for (const auto& matrix_a : input) {
    for (const auto& matrix_b : input) {
      acc.noalias() += matrix_a * matrix_b;
    }
  }
}
return acc;
}

StaticMatrix PairwiseMultiplyEachMatrixCustom(int num_repetitions, const VectorOfMatrices& input) {
StaticMatrix acc = StaticMatrix::Zero(10, 10);
for (int i = 0; i < num_repetitions; ++i) {
  for (const auto& matrix_a : input) {
    for (const auto& matrix_b : input) {
      acc.noalias() += CustomMultiply(matrix_a, matrix_b);
    }
  }
}
return acc;
}

using my_clock = std::chrono::steady_clock;
using my_dur = std::chrono::duration<double>;
int main() {

    VectorOfMatrices v1(100);
    std::generate(v1.begin(), v1.end(), []{ return StaticMatrix::Random();});

    auto start = my_clock::now();
    auto r1 = PairwiseMultiplyEachMatrixCustom(100, v1);
    auto end = my_clock::now();
    my_dur d = end - start;
    printf("Custom GEMM took: %f\n", d.count());

    start = my_clock::now();
    auto r2 = PairwiseMultiplyEachMatrixNoAlias(100, v1);
    end = my_clock::now();
    d = end - start;
    printf("Eigen GEMM took: %f\n", d.count());

    return 0;
}

Reply via email to