@article{Voevodin_Antonov_Nikitenko_Shvets_Sobolev_Sidorov_Stefanov_Voevodin_Zhumatiy_2019, title={Supercomputer Lomonosov-2: Large Scale, Deep Monitoring and Fine Analytics for the User Community}, volume={6}, url={https://superfri.org/index.php/superfri/article/view/278}, DOI={10.14529/jsfi190201}, abstractNote={The huge number of hardware and software components, together with a large number of parameters affecting the performance of each parallel application, makes ensuring the efficiency of a large scale supercomputer extremely difficult. In this situation, all basic parameters of the supercomputer should be constantly monitored, as well as many decisions about its functioning should be made by special software automatically. In this paper we describe the tight connection between complexity of modern large high performance computing systems and special techniques and tools required to ensure their efficiency in practice. The main subsystems of the developed complex (Octoshell, DiMMoN, Octotron, JobDigest, and an expert software system to bring fine analytics on parallel applications and the entire supercomputer to users and sysadmins) are actively operated on the large supercomputer systems at Lomonosov Moscow State University. A brief description of the architecture of Lomonosov-2 supercomputer is presented, and questions showing both a wide variety of emerging complex issues and the need for an integrated approach to solving the problem of effectively supporting large supercomputer systems are discussed.}, number={2}, journal={Supercomputing Frontiers and Innovations}, author={Voevodin, Vladimir V. and Antonov, Alexander S. and Nikitenko, Dmitry A. and Shvets, Pavel A. and Sobolev, Sergey I. and Sidorov, Igor Yu. and Stefanov, Konstantin S. and Voevodin, Vadim V. and Zhumatiy, Sergey A.}, year={2019}, month={Jun.}, pages={4–11} }