Dear community,

   In my current simulations (with not more than 160k elements) system
   assembly takes around 90% of computational time and I am not
   happy with this fact. I clearly understand that my implementation
   (see below) is fully non-optimal but rather fully readable, it
   follows the tutorial style. But in any case, could you share your
   thoughts on how to speed up the system assembly?

   Here is my example:
=======================================================================
 void PhaseFieldEq::assemble_system ()
 {
   QGauss<2>   quadrature_formula(2);
   FEValues<2> fe_values (fe, quadrature_formula,
                          update_values | update_gradients |
                          update_quadrature_points | update_JxW_values);

   const unsigned int   dofs_per_cell = fe.dofs_per_cell;
   const unsigned int   n_q_points    = quadrature_formula.n_quadrature_points;

   FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
   Vector<double>       cell_rhs (dofs_per_cell);

   std::vector<std::vector<Tensor<1,2> > >
          grad_solution  (n_q_points, std::vector<Tensor<1,2> >(2));
   std::vector<Vector<double> >
          value_solution (n_q_points, Vector<double>(2));

   const FEValuesExtractors::Scalar phi  (0);
   const FEValuesExtractors::Scalar conc (1);

   //Define temporary variables for the ease of notation
   double theta, DphiDx, DphiDy, phi_old, c_old, grad_phi_norm;
   double timestep_inv = 1.0/time_step;

   DoFHandler<2>::active_cell_iterator
     cell = dof_handler.begin_active(),
     endc = dof_handler.end();
   for (; cell!=endc; ++cell)
     {
       fe_values.reinit (cell);

       //Initialize all cell matricies
       cell_matrix = 0;
       cell_rhs = 0;

       //Calculate values and gradients of FE-functions on
       //the given cell in quadrature points
       fe_values.get_function_gradients (solution_old, grad_solution);
       fe_values.get_function_values (solution_old, value_solution);

       for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
       {
        DphiDy = grad_solution[q_point][0][1];
        DphiDx = grad_solution[q_point][0][0];

        theta = atan2(DphiDy,DphiDx);

        phi_old = value_solution[q_point](0);
        c_old   = value_solution[q_point](1);

        grad_phi_norm = grad_solution[q_point][0].norm();

        for (unsigned int i=0; i<dofs_per_cell; ++i)
         for (unsigned int j=0; j<dofs_per_cell; ++j)
         {
           cell_matrix(i,j) +=
              (timestep_inv * tau_func (theta) *
               fe_values[phi].value (i, q_point) *
               fe_values[phi].value (j, q_point)
               +
               timestep_inv *
               fe_values[conc].value (i, q_point) *
               fe_values[conc].value (j, q_point)
              ) *
              fe_values.JxW(q_point);

           //This includes anti-trapping term
           if (grad_phi_norm>params.machine_zero)
           cell_matrix(i,j) +=
               params.a_coef * params.W_coef * params.c_l0_coef *
               (1.0 - params.k_coef) * timestep_inv / grad_phi_norm *
               exp(u_func(phi_old,c_old)) *
               fe_values[phi].value (j, q_point) *
               fe_values[conc].gradient (i, q_point) *
               grad_solution[q_point][0]
               *
               fe_values.JxW(q_point);
         }


        for (unsigned int j=0; j<dofs_per_cell; ++j)
        {
          cell_rhs(j) +=
             (timestep_inv * tau_func(theta) *
              fe_values[phi].value (j, q_point) *
              phi_old
              -
              fe_values[phi].value (j, q_point) *
              DfDphi(phi_old)
              -
              params.lambda_coef/(1.0 - params.k_coef) *
              fe_values[phi].value (j, q_point) *
              DgDphi(phi_old) *
              (exp(u_func(phi_old,c_old)) - 1.0)
              -
              W_func(theta)*W_func(theta) *
              fe_values[phi].gradient (j, q_point) *
              grad_solution[q_point][0]
              +
              W_func(theta) * DWDtheta (theta) *
              (fe_values[phi].gradient (j, q_point)[0]*DphiDy
               - fe_values[phi].gradient (j, q_point)[1]*DphiDx)
              +
              timestep_inv *
              fe_values[conc].value (j, q_point) *
              c_old
              -
              params.D_coef * c_old * q_func(phi_old) *
              fe_values[conc].gradient (j, q_point) *
              (DuDphi(phi_old) * grad_solution[q_point][0]
               + DuDc(c_old) * grad_solution[q_point][1])
             ) *
             fe_values.JxW(q_point);

          //This includes anti-trapping term
          if (grad_phi_norm>params.machine_zero)
          cell_rhs(j) +=
               params.a_coef * params.W_coef * params.c_l0_coef *
               (1.0 - params.k_coef) * timestep_inv / grad_phi_norm *
               exp(u_func(phi_old,c_old)) *
               phi_old *
               fe_values[conc].gradient (j, q_point) *
               grad_solution[q_point][0]
               *
               fe_values.JxW(q_point);
         }
       }

       cell->distribute_local_to_global(cell_matrix, system_matrix);

       cell->distribute_local_to_global(cell_rhs, system_rhs);
     }
 }


Regards,
Slawa

_______________________________________________
dealii mailing list http://poisson.dealii.org/mailman/listinfo/dealii

Reply via email to