I am running NHEP across 16 MPI processors over 16 nodes in a matrix of global 
size of 1,000,000x1,000,000 with approximately global 16,000,000 non-zeros. 
Each node has the 1D  row distribution of the matrix with exactly 62500 rows 
and 1 million columns with 1million non-zeros as CSR/COO matrix.

I am generating this graph as follows. It takes approximately 12 seconds to 
insert 25000 NNZ into petsc matrix with MatSetValues which means it is taking 
closer to 10 minutes to 1million NNZ’s in every processes. It takes 12 seconds 
for assembly. Is these times normal? Is there a faster way of doing it? I am 
unable to construct matrices of 1 billion global nnz’s in which each process 
has closer to 100 million entries.

  Generate_petsc_matrix(int n_rows, int n_cols, int n_nnz,
                PetscInt *row_idx, PetscInt *col_idx, PetscScalar *val,
                const MPICommunicator& communicator) {
    int *start_row = new int[communicator.size()];
    MPI_Allgather(&n_rows, 1, MPI_INT, all_proc_rows, 1, MPI_INT, 
MPI_COMM_WORLD);
    start_row[0] = 0;
    for (int i = 0; i < communicator.size(); i++) {
      if (i > 0) {
        start_row[i] = start_row[i - 1] + all_proc_rows[i];
      }
    }
    MatCreate(PETSC_COMM_WORLD, &A);
    MatSetType(A, MATMPIAIJ);
    MatSetSizes(A, n_rows, PETSC_DECIDE, global_rows, n_cols);
    MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, PETSC_NULL, PETSC_DEFAULT, 
PETSC_NULL);
    PetscInt local_row_idx;
    PetscInt local_col_idx;
    PetscScalar local_val;
    int my_start_row = start_row[MPI_RANK];
    int my_start_col = 0;
double petsc_insert_time=0.0;
    for (int i = 0; i < n_nnz; i++) {
      local_row_idx = my_start_row + row_idx[i];
      local_col_idx = my_start_col + col_idx[i];
      local_val = val[i];
tic();
      ierr = MatSetValues(A, 1, &local_row_idx, 1, &local_col_idx, &local_val, 
INSERT_VALUES);
      petsc_insert_time += toc();
      if (i % 25000 == 0){
        PRINTROOT("25000 time::" << petsc_insert_time);
        petsc_insert_time=0;
      }
      CHKERRV(ierr);
    }
    PRINTROOT("100000 time::" << petsc_insert_time);
    tic();
    MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
    petsc_insert_time = toc();
    PRINTROOT("calling assembly to end::took::" << petsc_insert_time);
}
--
Regards,
Ramki

Reply via email to