@article{Chaplygin_Gusev_Diansky_2022, title={High-performance Shallow Water Model for Use on Massively Parallel and Heterogeneous Computing Systems}, volume={8}, url={https://superfri.org/index.php/superfri/article/view/416}, DOI={10.14529/jsfi210407}, abstractNote={<p>This paper presents the shallow water model, formulated from the ocean general circulation sigma model INMOM (Institute of Numerical Mathematics Ocean Model). The shallow water model is based on software architecture, which separates the physics-related code from parallel implementation features, thereby simplifying the model’s support and development. As an improvement of the two-dimensional domain decomposition method, we present the blocked-based decomposition proposing load-balanced and cache-friendly calculations on CPUs. We propose various hybrid parallel programming patterns in the shallow water model for effective calculation on massively parallel and heterogeneous computing systems and evaluate their scaling performances on the Lomonosov-2 supercomputer. We demonstrate that performance per a single grid point on GPUs dramatically decreases for small grid sizes starting from 2<sup>19</sup> points per node, while performance on CPUs scales up to 2<sup>17</sup> well. Although, calculations on GPUs outperform calculations on CPUs by a factor of 4.7 at 30 nodes using 60 GPUs and 360 CPU cores at 6100 x 4460 grid size. We demonstrate that overlapping kernel execution with data transfers on GPUs increases performance by 28%. Furthermore, we demonstrate the advantage of using the load-balancing method in the Azov Sea model on CPUs and GPUs.</p>}, number={4}, journal={Supercomputing Frontiers and Innovations}, author={Chaplygin, Andrey V. and Gusev, Anatoly V. and Diansky, Nikolay A.}, year={2022}, month={Feb.}, pages={74–93} }