My Project
ChowPatelIlu.hpp
1 /*
2  Copyright 2020 Equinor ASA
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #ifndef CHOW_PATEL_ILU_HEADER_INCLUDED
21 #define CHOW_PATEL_ILU_HEADER_INCLUDED
22 
23 
24 #include <mutex>
25 
26 #include <opm/simulators/linalg/bda/opencl/opencl.hpp>
27 
28 // Variables CHOW_PATEL, CHOW_PATEL_GPU and CHOW_PATEL_GPU_PARALLEL are set by CMake
29 // Pass -DUSE_CHOW_PATEL_ILU=1 to cmake to define CHOW_PATEL and use the iterative ILU decomposition
30 // Pass -DUSE_CHOW_PATEL_ILU_GPU=1 to run the ILU decomposition sweeps on the GPU
31 // Pass -DUSE_CHOW_PATEL_ILU_GPU_PARALLEL=1 to use more parallelisation in the GPU kernel, see ChowPatelIlu.cpp
32 
33 // if CHOW_PATEL is 0, exact ILU decomposition is performed on CPU
34 // if CHOW_PATEL is 1, iterative ILU decomposition (FGPILU) is done, as described in:
35 // FINE-GRAINED PARALLEL INCOMPLETE LU FACTORIZATION, E. Chow and A. Patel, SIAM 2015, https://doi.org/10.1137/140968896
36 // if CHOW_PATEL_GPU is 0, the decomposition is done on CPU
37 // if CHOW_PATEL_GPU is 1, the decomposition is done by gpu_decomposition() on GPU
38 // the apply phase of the ChowPatelIlu uses two triangular matrices: L and U
39 // the exact decomposition uses a full matrix LU which is the superposition of L and U
40 // ChowPatelIlu could also operate on a full matrix LU when L and U are merged, but it is generally better to keep them split
41 
42 #if CHOW_PATEL
43 
44 namespace Opm
45 {
46 namespace Accelerator
47 {
48 
49 class BlockedMatrix;
50 
51 // This class implements a blocked version on GPU of the Fine-Grained Parallel ILU (FGPILU) by Chow and Patel 2015:
52 // FINE-GRAINED PARALLEL INCOMPLETE LU FACTORIZATION, E. Chow and A. Patel, SIAM 2015, https://doi.org/10.1137/140968896
53 // only blocksize == 3 is supported
54 // decomposition() allocates the cl::Buffers on the first call, these are C++ objects that deallocate automatically
55 template <unsigned int block_size>
56 class ChowPatelIlu
57 {
58 private:
59  cl::Buffer d_Ut_vals, d_L_vals, d_LU_vals;
60  cl::Buffer d_Ut_ptrs, d_Ut_idxs;
61  cl::Buffer d_L_rows, d_L_cols;
62  cl::Buffer d_LU_rows, d_LU_cols;
63  cl::Buffer d_Ltmp, d_Utmp;
64 
65  cl::Event event;
66  std::vector<cl::Event> events;
67  cl_int err;
68  std::once_flag initialize_flag;
69  std::once_flag pattern_uploaded;
70  int verbosity = 0;
71 
72  std::unique_ptr<cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&,
73  cl::Buffer&, cl::Buffer&, cl::Buffer&,
74  cl::Buffer&, cl::Buffer&, cl::Buffer&,
75  cl::Buffer&, cl::Buffer&,
76  const int, cl::LocalSpaceArg, cl::LocalSpaceArg> > chow_patel_ilu_sweep_k;
77 
78 public:
83  void decomposition(
84  cl::CommandQueue *queue, cl::Context *context,
85  BlockedMatrix *LUmat, BlockedMatrix *Lmat, BlockedMatrix *Umat,
86  double *invDiagVals, std::vector<int>& diagIndex,
87  cl::Buffer& d_diagIndex, cl::Buffer& d_invDiagVals,
88  cl::Buffer& d_Lvals, cl::Buffer& d_Lcols, cl::Buffer& d_Lrows,
89  cl::Buffer& d_Uvals, cl::Buffer& d_Ucols, cl::Buffer& d_Urows);
90 
91 
110  void gpu_decomposition(
111  cl::CommandQueue *queue, cl::Context *context,
112  int *Ut_ptrs, int *Ut_idxs, double *Ut_vals, int Ut_nnzbs,
113  int *L_rows, int *L_cols, double *L_vals, int L_nnzbs,
114  int *LU_rows, int *LU_cols, double *LU_vals, int LU_nnzbs,
115  int Nb, int num_sweeps);
116 
118  void setVerbosity(int verbosity_) {
119  this->verbosity = verbosity_;
120  }
121 
122 };
123 
124 } // namespace Accelerator
125 } // namespace Opm
126 
127 #endif // CHOW_PATEL
128 
129 #endif // CHOW_PATEL_ILU_HEADER_INCLUDED
This file contains a set of helper functions used by VFPProd / VFPInj.
Definition: BlackoilPhases.hpp:27