LifeV
lifev/eta/tutorials/8_ETA_block_manip/main.cpp
Go to the documentation of this file.
1 //@HEADER
2 /*
3 *******************************************************************************
4 
5  Copyright (C) 2004, 2005, 2007 EPFL, Politecnico di Milano, INRIA
6  Copyright (C) 2010 EPFL, Politecnico di Milano, Emory University
7 
8  This file is part of LifeV.
9 
10  LifeV is free software; you can redistribute it and/or modify
11  it under the terms of the GNU Lesser General Public License as published by
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14 
15  LifeV is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  Lesser General Public License for more details.
19 
20  You should have received a copy of the GNU Lesser General Public License
21  along with LifeV. If not, see <http://www.gnu.org/licenses/>.
22 
23 *******************************************************************************
24 */
25 //@HEADER
26 
27 /*!
28  @file
29  @brief Tutorial for a "better" usage of the blocks.
30 
31  @author Samuel Quinodoz <samuel.quinodoz@epfl.ch>
32  @date 02-07-2012
33 
34  In the previous tutorial, we saw how to use the ETA framework with
35  block structured matrices. This tutorial aims at investigating
36  that more in detail.
37 
38  In particular, 4 different strategies are proposed for the assembly
39  of the Stokes matrix. The first one is the most simple strategy, but
40  not the most efficient one. Two intermediate strategies are developed
41  to show the influence of the different points. The last strategy
42  represents the most efficient way of assembling the matrix.
43 
44  In practice, only the first (simplest) and last (fastest) strategy
45  should be considered since intermediate strategies have no advantages.
46 
47  Tutorials that should be read before: 1,4,7
48 
49  */
50 
51 // ---------------------------------------------------------------
52 // We reuse the same files as in the previous tutorial. We add the
53 // chrono to measure the timings and the file
54 // MatrixEpetraStructuredUtility to use the block copy features.
55 // ---------------------------------------------------------------
56 
57 
58 #include <Epetra_ConfigDefs.h>
59 #ifdef EPETRA_MPI
60 #include <mpi.h>
61 #include <Epetra_MpiComm.h>
62 #else
63 #include <Epetra_SerialComm.h>
64 #endif
65 
66 
67 #include <lifev/core/LifeV.hpp>
68 
69 #include <lifev/core/mesh/MeshPartitioner.hpp>
70 #include <lifev/core/mesh/RegionMesh3DStructured.hpp>
71 #include <lifev/core/mesh/RegionMesh.hpp>
72 
73 #include <lifev/core/array/MatrixEpetra.hpp>
74 #include <lifev/core/array/VectorEpetra.hpp>
75 
76 #include <lifev/core/array/MatrixEpetraStructured.hpp>
77 #include <lifev/core/array/VectorEpetraStructured.hpp>
78 #include <lifev/core/array/MatrixEpetraStructuredUtility.hpp>
79 
80 #include <lifev/eta/fem/ETFESpace.hpp>
81 #include <lifev/eta/expression/Integrate.hpp>
82 
83 #include <lifev/core/fem/FESpace.hpp>
84 
85 #include <lifev/core/util/LifeChrono.hpp>
86 
87 #include <boost/shared_ptr.hpp>
88 
89 
90 // ---------------------------------------------------------------
91 // We work in the LifeV namespace and define the mesh, matrix and
92 // vector types that we will need several times. We add new
93 // typedefs for the block matrices and block vectors.
94 // ---------------------------------------------------------------
95 
96 using namespace LifeV;
97 
99 typedef MatrixEpetra<Real> matrix_Type;
100 typedef VectorEpetra vector_Type;
101 typedef MatrixEpetraStructured<Real> blockMatrix_Type;
102 typedef VectorEpetraStructured blockVector_Type;
103 
104 
105 // ---------------------------------------------------------------
106 // As usual, we start by the MPI communicator, the definition of
107 // the mesh and its partitioning.
108 // ---------------------------------------------------------------
109 
110 int main ( int argc, char** argv )
111 {
112 
113 #ifdef HAVE_MPI
114  MPI_Init (&argc, &argv);
115  std::shared_ptr<Epetra_Comm> Comm (new Epetra_MpiComm (MPI_COMM_WORLD) );
116 #else
117  std::shared_ptr<Epetra_Comm> Comm (new Epetra_SerialComm);
118 #endif
119 
120  const bool verbose (Comm->MyPID() == 0);
121 
122 
123  if (verbose)
124  {
125  std::cout << " -- Building and partitioning the mesh ... " << std::flush;
126  }
127 
128  const UInt Nelements (10);
129 
130  std::shared_ptr< mesh_Type > fullMeshPtr (new mesh_Type ( Comm ) );
131 
132  regularMesh3D ( *fullMeshPtr, 1, Nelements, Nelements, Nelements, false,
133  2.0, 2.0, 2.0,
134  -1.0, -1.0, -1.0);
135 
136  std::shared_ptr< mesh_Type > meshPtr;
137  {
138  MeshPartitioner< mesh_Type > meshPart (fullMeshPtr, Comm);
139  meshPtr = meshPart.meshPartition();
140  }
141 
142  fullMeshPtr.reset();
143 
144  if (verbose)
145  {
146  std::cout << " done ! " << std::endl;
147  }
148 
149 
150  // ---------------------------------------------------------------
151  // We define now the ETFESpaces. We need one space for the
152  // velocity (vectorial, P2) and one space for the pressure
153  // (scalar, P1).
154  //
155  // For reasons explained hereafter, we also need a scalar
156  // ETFESpace for the velocity.
157  // ---------------------------------------------------------------
158 
159  if (verbose)
160  {
161  std::cout << " -- Building the spaces ... " << std::flush;
162  }
163 
164  std::shared_ptr<ETFESpace< mesh_Type, MapEpetra, 3, 3 > > ETuSpace
165  ( new ETFESpace< mesh_Type, MapEpetra, 3, 3 > (meshPtr, &feTetraP2, Comm) );
166 
167  std::shared_ptr<ETFESpace< mesh_Type, MapEpetra, 3, 1 > > ETpSpace
168  ( new ETFESpace< mesh_Type, MapEpetra, 3, 1 > (meshPtr, &feTetraP1, Comm) );
169 
170  std::shared_ptr<ETFESpace< mesh_Type, MapEpetra, 3, 1 > > ETuCompSpace
171  ( new ETFESpace< mesh_Type, MapEpetra, 3, 1 > (meshPtr, &feTetraP2, Comm) );
172 
173  if (verbose)
174  {
175  std::cout << " done ! " << std::endl;
176  }
177  if (verbose)
178  {
179  std::cout << " ---> Velocity dofs: " << ETuSpace->dof().numTotalDof() << std::endl;
180  }
181  if (verbose)
182  {
183  std::cout << " ---> Pressure dofs: " << ETpSpace->dof().numTotalDof() << std::endl;
184  }
185 
186 
187  // ---------------------------------------------------------------
188  // We recall here the implementation that we used for the
189  // Stokes system in the previous tutorial. It consists in defining
190  // a 2x2 structure on the matrix and assemble the different blocks
191  // separately.
192  // ---------------------------------------------------------------
193 
194  if (verbose)
195  {
196  std::cout << " -- Assembly of the Stokes matrix (I) ... " << std::flush;
197  }
198  LifeChrono chronoI;
199  chronoI.start();
200 
201  std::shared_ptr<blockMatrix_Type> ETsystemMatrixI (new blockMatrix_Type ( ETuSpace->map() | ETpSpace->map() ) );
202  *ETsystemMatrixI *= 0.0;
203 
204  {
205  using namespace ExpressionAssembly;
206 
207  integrate ( elements (ETuSpace->mesh() ),
208  quadRuleTetra4pt,
209  ETuSpace,
210  ETuSpace,
211 
212  dot ( grad (phi_i) , grad (phi_j) )
213 
214  )
215  >> ETsystemMatrixI->block (0, 0);
216 
217  integrate ( elements (ETuSpace->mesh() ),
218  quadRuleTetra4pt,
219  ETuSpace,
220  ETpSpace,
221 
222  phi_j * div (phi_i)
223 
224  )
225  >> ETsystemMatrixI->block (0, 1);
226 
227  integrate ( elements (ETuSpace->mesh() ),
228  quadRuleTetra4pt,
229  ETpSpace,
230  ETuSpace,
231 
232  phi_i * div (phi_j)
233 
234  )
235  >> ETsystemMatrixI->block (1, 0);
236  }
237 
238  chronoI.stop();
239 
240  ETsystemMatrixI->globalAssemble();
241 
242  if (verbose)
243  {
244  std::cout << " done! " << std::endl;
245  }
246  if (verbose)
247  {
248  std::cout << " Time: " << chronoI.diff() << std::endl;
249  }
250 
251 
252  // ---------------------------------------------------------------
253  // This implementation is however "suboptimal". Indeed, the
254  // (0,0)-block is itself block diagonal, but this is not taken
255  // into account. So, many zeros are added to the matrix, which
256  // might result in both a slower assembly and a longer time
257  // required to compute the preconditioner.
258  //
259  // To avoid this bottlneck, a first option is to define the
260  // structure to be 4x4 (3 velocity components and pressure).
261  // ---------------------------------------------------------------
262 
263  if (verbose)
264  {
265  std::cout << " -- Assembly of the Stokes matrix (II) ... " << std::flush;
266  }
267  LifeChrono chronoII;
268  chronoII.start();
269 
270  std::shared_ptr<blockMatrix_Type> ETsystemMatrixII
271  (new blockMatrix_Type ( ETuCompSpace->map() | ETuCompSpace->map() | ETuCompSpace->map() | ETpSpace->map() ) );
272  *ETsystemMatrixII *= 0.0;
273 
274  {
275  using namespace ExpressionAssembly;
276 
277  VectorSmall<3> e1 (1, 0, 0);
278  VectorSmall<3> e2 (0, 1, 0);
279  VectorSmall<3> e3 (0, 0, 1);
280 
281  integrate ( elements (ETuSpace->mesh() ),
282  quadRuleTetra4pt,
283  ETuCompSpace,
284  ETuCompSpace,
285 
286  dot ( grad (phi_i) , grad (phi_j) )
287 
288  )
289  >> ETsystemMatrixII->block (0, 0);
290 
291  integrate ( elements (ETuSpace->mesh() ),
292  quadRuleTetra4pt,
293  ETuCompSpace,
294  ETuCompSpace,
295 
296  dot ( grad (phi_i) , grad (phi_j) )
297 
298  )
299  >> ETsystemMatrixII->block (1, 1);
300 
301  integrate ( elements (ETuSpace->mesh() ),
302  quadRuleTetra4pt,
303  ETuCompSpace,
304  ETuCompSpace,
305 
306  dot ( grad (phi_i) , grad (phi_j) )
307 
308  )
309  >> ETsystemMatrixII->block (2, 2);
310 
311 
312 
313  integrate ( elements (ETuSpace->mesh() ),
314  quadRuleTetra4pt,
315  ETuCompSpace,
316  ETpSpace,
317 
318  phi_j * dot (grad (phi_i), e1)
319 
320  )
321  >> ETsystemMatrixII->block (0, 3);
322 
323  integrate ( elements (ETuSpace->mesh() ),
324  quadRuleTetra4pt,
325  ETuCompSpace,
326  ETpSpace,
327 
328  phi_j * dot (grad (phi_i), e2)
329 
330  )
331  >> ETsystemMatrixII->block (1, 3);
332 
333  integrate ( elements (ETuSpace->mesh() ),
334  quadRuleTetra4pt,
335  ETuCompSpace,
336  ETpSpace,
337 
338  phi_j * dot (grad (phi_i), e3)
339 
340  )
341  >> ETsystemMatrixII->block (2, 3);
342 
343 
344  integrate ( elements (ETuSpace->mesh() ),
345  quadRuleTetra4pt,
346  ETpSpace,
347  ETuCompSpace,
348 
349  phi_i * dot (grad (phi_j), e1)
350 
351  )
352  >> ETsystemMatrixII->block (3, 0);
353 
354  integrate ( elements (ETuSpace->mesh() ),
355  quadRuleTetra4pt,
356  ETpSpace,
357  ETuCompSpace,
358 
359  phi_i * dot (grad (phi_j), e2)
360 
361  )
362  >> ETsystemMatrixII->block (3, 1);
363 
364  integrate ( elements (ETuSpace->mesh() ),
365  quadRuleTetra4pt,
366  ETpSpace,
367  ETuCompSpace,
368 
369  phi_i * dot (grad (phi_j), e3)
370 
371  )
372  >> ETsystemMatrixII->block (3, 2);
373  }
374 
375  chronoII.stop();
376 
377  ETsystemMatrixII->globalAssemble();
378 
379  if (verbose)
380  {
381  std::cout << " done! " << std::endl;
382  }
383  if (verbose)
384  {
385  std::cout << " Time: " << chronoII.diff() << std::endl;
386  }
387 
388 
389  // ---------------------------------------------------------------
390  // The difference in timings should be quite large between the
391  // two first implementations. However, we can still remark that
392  // the blocks (0,0), (1,1) and (2,2) (in the latest
393  // implementation) are exact similar. So, instead of recomputing
394  // this block 3 times, we can assemble it simply once and then
395  // copy it where needed.
396  //
397  // The following code realizes this idea.
398  // ---------------------------------------------------------------
399 
400  if (verbose)
401  {
402  std::cout << " -- Assembly of the Stokes matrix (III) ... " << std::flush;
403  }
404  LifeChrono chronoIII;
405  chronoIII.start();
406 
407 
408  // ---------------------------------------------------------------
409  // First, we build the small matrix, assemble it and close it.
410  // ---------------------------------------------------------------
411 
412  std::shared_ptr<blockMatrix_Type> ETcomponentMatrixIII
413  (new blockMatrix_Type ( ETuCompSpace->map() ) );
414  *ETcomponentMatrixIII *= 0.0;
415 
416  {
417  using namespace ExpressionAssembly;
418 
419  integrate ( elements (ETuSpace->mesh() ),
420  quadRuleTetra4pt,
421  ETuCompSpace,
422  ETuCompSpace,
423 
424  dot ( grad (phi_i) , grad (phi_j) )
425 
426  )
427  >> ETcomponentMatrixIII->block (0, 0);
428  }
429 
430  ETcomponentMatrixIII->globalAssemble();
431 
432 
433  // ---------------------------------------------------------------
434  // We define then the large matrix.
435  // ---------------------------------------------------------------
436 
437  std::shared_ptr<blockMatrix_Type> ETsystemMatrixIII
438  (new blockMatrix_Type ( ETuCompSpace->map() | ETuCompSpace->map() | ETuCompSpace->map() | ETpSpace->map() ) );
439  *ETsystemMatrixIII *= 0.0;
440 
441 
442  // ---------------------------------------------------------------
443  // We copy the three blocks
444  // ---------------------------------------------------------------
445 
446  MatrixEpetraStructuredUtility::copyBlock (*ETcomponentMatrixIII->block (0, 0), *ETsystemMatrixIII->block (0, 0) );
447  MatrixEpetraStructuredUtility::copyBlock (*ETcomponentMatrixIII->block (0, 0), *ETsystemMatrixIII->block (1, 1) );
448  MatrixEpetraStructuredUtility::copyBlock (*ETcomponentMatrixIII->block (0, 0), *ETsystemMatrixIII->block (2, 2) );
449 
450  // ---------------------------------------------------------------
451  // Assemble the remaing blocks
452  // ---------------------------------------------------------------
453 
454  {
455  using namespace ExpressionAssembly;
456 
457  VectorSmall<3> e1 (1, 0, 0);
458  VectorSmall<3> e2 (0, 1, 0);
459  VectorSmall<3> e3 (0, 0, 1);
460 
461  integrate ( elements (ETuSpace->mesh() ),
462  quadRuleTetra4pt,
463  ETuCompSpace,
464  ETpSpace,
465 
466  phi_j * dot (grad (phi_i), e1)
467 
468  )
469  >> ETsystemMatrixIII->block (0, 3);
470 
471  integrate ( elements (ETuSpace->mesh() ),
472  quadRuleTetra4pt,
473  ETuCompSpace,
474  ETpSpace,
475 
476  phi_j * dot (grad (phi_i), e2)
477 
478  )
479  >> ETsystemMatrixIII->block (1, 3);
480 
481  integrate ( elements (ETuSpace->mesh() ),
482  quadRuleTetra4pt,
483  ETuCompSpace,
484  ETpSpace,
485 
486  phi_j * dot (grad (phi_i), e3)
487 
488  )
489  >> ETsystemMatrixIII->block (2, 3);
490 
491 
492  integrate ( elements (ETuSpace->mesh() ),
493  quadRuleTetra4pt,
494  ETpSpace,
495  ETuCompSpace,
496 
497  phi_i * dot (grad (phi_j), e1)
498 
499  )
500  >> ETsystemMatrixIII->block (3, 0);
501 
502  integrate ( elements (ETuSpace->mesh() ),
503  quadRuleTetra4pt,
504  ETpSpace,
505  ETuCompSpace,
506 
507  phi_i * dot (grad (phi_j), e2)
508 
509  )
510  >> ETsystemMatrixIII->block (3, 1);
511 
512  integrate ( elements (ETuSpace->mesh() ),
513  quadRuleTetra4pt,
514  ETpSpace,
515  ETuCompSpace,
516 
517  phi_i * dot (grad (phi_j), e3)
518 
519  )
520  >> ETsystemMatrixIII->block (3, 2);
521  }
522 
523  chronoIII.stop();
524 
525  ETsystemMatrixIII->globalAssemble();
526 
527  if (verbose)
528  {
529  std::cout << " done! " << std::endl;
530  }
531  if (verbose)
532  {
533  std::cout << " Time: " << chronoIII.diff() << std::endl;
534  }
535 
536 
537  // ---------------------------------------------------------------
538  // The variant three is usually a bit faster than the variant II,
539  // which shows that some more time can be gained.
540  //
541  // For the final variant, we exploit further the block structure
542  // by changing the structure of the matrix on the fly. The
543  // strategy consists in assembly the velocity-velocity block
544  // as in the variant III (by copy of a small block), while
545  // for the remain parts of the matrix, the structure is changed
546  // and only two blocks are assembled.
547  // ---------------------------------------------------------------
548 
549  if (verbose)
550  {
551  std::cout << " -- Assembly of the Stokes matrix (IV) ... " << std::flush;
552  }
553  LifeChrono chronoIV;
554  chronoIV.start();
555 
556 
557  // ---------------------------------------------------------------
558  // First, we build the small matrix, assemble it and close it.
559  // ---------------------------------------------------------------
560 
561  std::shared_ptr<blockMatrix_Type> ETcomponentMatrixIV
562  (new blockMatrix_Type ( ETuCompSpace->map() ) );
563  *ETcomponentMatrixIV *= 0.0;
564 
565  {
566  using namespace ExpressionAssembly;
567 
568  integrate ( elements (ETuSpace->mesh() ),
569  quadRuleTetra4pt,
570  ETuCompSpace,
571  ETuCompSpace,
572 
573  dot ( grad (phi_i) , grad (phi_j) )
574 
575  )
576  >> ETcomponentMatrixIV->block (0, 0);
577  }
578 
579  ETcomponentMatrixIV->globalAssemble();
580 
581 
582  // ---------------------------------------------------------------
583  // We define then the large matrix.
584  // ---------------------------------------------------------------
585 
586  std::shared_ptr<blockMatrix_Type> ETsystemMatrixIV
587  (new blockMatrix_Type ( ETuCompSpace->map() | ETuCompSpace->map() | ETuCompSpace->map() | ETpSpace->map() ) );
588  *ETsystemMatrixIV *= 0.0;
589 
590 
591  // ---------------------------------------------------------------
592  // We copy the three blocks
593  // ---------------------------------------------------------------
594 
595  MatrixEpetraStructuredUtility::copyBlock (*ETcomponentMatrixIV->block (0, 0), *ETsystemMatrixIV->block (0, 0) );
596  MatrixEpetraStructuredUtility::copyBlock (*ETcomponentMatrixIV->block (0, 0), *ETsystemMatrixIV->block (1, 1) );
597  MatrixEpetraStructuredUtility::copyBlock (*ETcomponentMatrixIV->block (0, 0), *ETsystemMatrixIV->block (2, 2) );
598 
599  // ---------------------------------------------------------------
600  // Now change the structure of the matrix back to the 2x2
601  // structure and assemble to two remaining parts.
602  // ---------------------------------------------------------------
603 
604  ETsystemMatrixIV->setBlockStructure (ETuSpace->map() | ETpSpace->map() );
605 
606  {
607  using namespace ExpressionAssembly;
608 
609  integrate ( elements (ETuSpace->mesh() ),
610  quadRuleTetra4pt,
611  ETuSpace,
612  ETpSpace,
613 
614  phi_j * div (phi_i)
615 
616  )
617  >> ETsystemMatrixIV->block (0, 1);
618 
619  integrate ( elements (ETuSpace->mesh() ),
620  quadRuleTetra4pt,
621  ETpSpace,
622  ETuSpace,
623 
624  phi_i * div (phi_j)
625 
626  )
627  >> ETsystemMatrixIV->block (1, 0);
628  }
629 
630  chronoIV.stop();
631 
632  ETsystemMatrixIV->globalAssemble();
633 
634  if (verbose)
635  {
636  std::cout << " done! " << std::endl;
637  }
638  if (verbose)
639  {
640  std::cout << " Time: " << chronoIV.diff() << std::endl;
641  }
642 
643 
644  // ---------------------------------------------------------------
645  // We finally compare the different matrices obtained.
646  // ---------------------------------------------------------------
647 
648  if (verbose)
649  {
650  std::cout << " -- Computing the error ... " << std::flush;
651  }
652 
653  std::shared_ptr<matrix_Type> checkMatrixIvsII (new matrix_Type ( ETuSpace->map() + ETpSpace->map() ) );
654  *checkMatrixIvsII *= 0.0;
655 
656  *checkMatrixIvsII += *ETsystemMatrixI;
657  *checkMatrixIvsII += (*ETsystemMatrixII) * (-1);
658 
659  checkMatrixIvsII->globalAssemble();
660 
661  Real errorNormIvsII ( checkMatrixIvsII->normInf() );
662 
663 
664  std::shared_ptr<matrix_Type> checkMatrixIvsIII (new matrix_Type ( ETuSpace->map() + ETpSpace->map() ) );
665  *checkMatrixIvsIII *= 0.0;
666 
667  *checkMatrixIvsIII += *ETsystemMatrixI;
668  *checkMatrixIvsIII += (*ETsystemMatrixIII) * (-1);
669 
670  checkMatrixIvsIII->globalAssemble();
671 
672  Real errorNormIvsIII ( checkMatrixIvsIII->normInf() );
673 
674 
675  std::shared_ptr<matrix_Type> checkMatrixIvsIV (new matrix_Type ( ETuSpace->map() + ETpSpace->map() ) );
676  *checkMatrixIvsIV *= 0.0;
677 
678  *checkMatrixIvsIV += *ETsystemMatrixI;
679  *checkMatrixIvsIV += (*ETsystemMatrixIV) * (-1);
680 
681  checkMatrixIvsIV->globalAssemble();
682 
683  Real errorNormIvsIV ( checkMatrixIvsIV->normInf() );
684 
685 
686 
687  if (verbose)
688  {
689  std::cout << " done ! " << std::endl;
690  }
691  if (verbose)
692  {
693  std::cout << " I vs II : " << errorNormIvsII << std::endl;
694  }
695  if (verbose)
696  {
697  std::cout << " I vs III : " << errorNormIvsIII << std::endl;
698  }
699  if (verbose)
700  {
701  std::cout << " I vs IV : " << errorNormIvsIV << std::endl;
702  }
703 
704 
705 #ifdef HAVE_MPI
706  MPI_Finalize();
707 #endif
708 
709  Real tolerance (1e-10);
710 
711  if ( (errorNormIvsII < tolerance)
712  && (errorNormIvsIII < tolerance)
713  && (errorNormIvsIV < tolerance)
714  )
715  {
716  return ( EXIT_SUCCESS );
717  }
718  return ( EXIT_FAILURE );
719 }
int main(int argc, char **argv)
MatrixEpetra< Real > matrix_Type
MatrixEpetraStructured< Real > blockMatrix_Type
double Real
Generic real data.
Definition: LifeV.hpp:175
RegionMesh< LinearTetra > mesh_Type
VectorEpetraStructured blockVector_Type