<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta />
    <article-meta>
      <title-group>
        <article-title>IT-ECOSYSTEM OF THE HYBRILIT HETEROGENEOUS PLATFORM FOR HIGH-PERFORMANCE COMPUTING AND TRAINING OF IT-SPECIALISTS</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <string-name>Gheorge Adam</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Maxim Bashashin</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Dmitry Belyakov</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Margarit Kirakosyan</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Mikhail Matveev</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Dmitry Podgainy</string-name>
          <email>podgainy@jinr.ru</email>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Tatiana Sapozhnikova</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Oksana Streltsova</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Shushanik Torosyan</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Martin Vala</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Lucia Valova</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Alexei Vorontsov</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Tatiana Zaikina</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Elena Zemlyanaya</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Maxim Zuev</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <aff id="aff0">
          <label>0</label>
          <institution>Dubna State University</institution>
          ,
          <addr-line>Dubna, Moscow region, 141980</addr-line>
          ,
          <country country="RU">Russia</country>
        </aff>
        <aff id="aff1">
          <label>1</label>
          <institution>Horia Hulubei National Institute for Physics and Nuclear Engineering (IFIN-HH)</institution>
          ,
          <addr-line>30, Reactorului St., Mgǎurele - Bucharest, 077125</addr-line>
          ,
          <country country="RO">Romania</country>
        </aff>
        <aff id="aff2">
          <label>2</label>
          <institution>Institute of experimental physics SAS</institution>
          ,
          <addr-line>040 01 Košice</addr-line>
          ,
          <country>Slovak Republic</country>
        </aff>
        <aff id="aff3">
          <label>3</label>
          <institution>Laboratory of Information Technologies, Joint Institute for Nuclear Research</institution>
          ,
          <addr-line>Dubna, Moscow region, 141980</addr-line>
          ,
          <country country="RU">Russia</country>
        </aff>
        <aff id="aff4">
          <label>4</label>
          <institution>Peoples' Friendship University of Russia (RUDN University)</institution>
          ,
          <addr-line>Moscow, 117198</addr-line>
          ,
          <country country="RU">Russia</country>
        </aff>
        <aff id="aff5">
          <label>5</label>
          <institution>Plekhanov Russian University of Economics</institution>
          ,
          <addr-line>Moscow, 117997</addr-line>
          ,
          <country country="RU">Russia</country>
        </aff>
        <aff id="aff6">
          <label>6</label>
          <institution>2018 Gheorge Adam</institution>
          ,
          <addr-line>Maxim Bashashin, Dmitry Belyakov, Margarit Kirakosyan, Mikhail Matveev, Dmitry Podgainy, Tatiana Sapozhnikova, Oksana Streltsova, Shushanik Torosyan, Martin Vala, Lucia Valova, Alexei Vorontsov, Tatiana Zaikina, Elena Zemlyanaya, Maxim Zuev</addr-line>
        </aff>
      </contrib-group>
      <pub-date>
        <year>2018</year>
      </pub-date>
      <fpage>638</fpage>
      <lpage>644</lpage>
      <abstract>
        <p>HybriLIT heterogeneous platform that is a component of the Multifunctional Information and Computing Complex (MICC) of Joint Institute for Nuclear Research. HybriLIT includes GOVORUN supercomputer and education and testing polygon; its platform is based on the latest computation architectures (processors; co-processors; graphical accelerators), and also modern software such as Intel Cluster Studio, CUDA, MATLAB, etc; thus, allowing to carry out extra-massive computations and reach sufficient acceleration, and also to provide training courses on parallel programming technologies and work on HPC-platforms for students, PhD students and young scientists [1]. In order to increase the efficiency of work on the heterogeneous platform HybriLIT, there appears a need to develop an information-computing environment for work with parallel programming technologies that provides users with possibility to use the resources of the supercomputer for carrying out resource-intensive and massive parallel tasks, use the applied software packages adapted for hybrid architectures, and also develop and debug applications using modern IT solutions and frameworks such as machine learning approaches for accelerating experimental data analysis. Software and information environment are also a convenient tool for organization of tutorials. The developing software and information environment, together with a heterogeneous computing system and a components for providing computations using application packages with a developed graphical interface, form an IT-ecosystem that allows you not only to carry out effective HPC computations, but it is also a fruitful platform for training IT-specialists in the HPC-sphere.</p>
      </abstract>
      <kwd-group>
        <kwd>High Performance Computing</kwd>
        <kwd>heterogeneous platforms</kwd>
        <kwd>parallel programming technologies</kwd>
        <kwd>IT Ecosystem</kwd>
        <kwd>education programs</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>1. HybriLIT platform</title>
      <p>
        The platform consists of the HybriLIT training and testing polygon and GOVORUN
supercomputer [
        <xref ref-type="bibr" rid="ref2">2</xref>
        ]. At the movement the supercomputer includes computing nodes with CPU, GPU
and Intel Xeon Phi processors that allows users of the platform use the main HPC solutions and
develop the following directions in the tasks of data processing and analysis in high energy physics:
1) create their own software for carrying out research that require resource intensive computations;
2) use applied software and mathematical libraries and frameworks adapted for hybrid
architectures;
3) develop parallel algorithms for different tasks using heterogeneous computing paragidm.
      </p>
      <p>Heterogeneous structure of the platform allows develop software and carry out computations
using parallel programming technologies: Message Passing Interface (MPI) – for multi-node
CPU-component, OpenMP – for computations on one node, CUDA – for computation using GPU,
MPI and OpenMP for computations on Intel Xeon Phi, and also for hybrid technologies:
MPI+OpenMP, MPI+CUDA, MPI+OpenMP+CUDA – for using different architectures in
computations.</p>
      <p>HybriLIT training and testing polygon contains 10 computing nodes with heterogeneous
architecture. Each computing node contains two 12-core processors Intel Xeon E5-2695v2
(and Intel Xeon E5-2695v3 14 cores). In total, the cluster contains 252 CPU-cores, 77184
CUDAcores, 182 PHI-cores; 2,5 Tb RAM; 470 TB HDD. The peak performance of the polygon is 142
TFlops for single precision floating point computations and 50 TFlops for double precision floating
point computations.</p>
      <p>The GOVORUN supercomputer is a natural development of the HybriLIT cluster and preserve
the main idea of HybriLIT – heterogeneity structure of the computational nodes. The putting into
operation of supercomputer has led to a significant increase in the performance of both the CPU and
the GPU components of initial cluster HybriLIT and it provides a new possibilities for
resourceintensive, massively parallel computations for the solution of tasks of JINR. The GPU component
includes 5 NVIDIA DGX-1 servers. Each server has 8 NVIDIA Tesla V100 GPUs based on the latest
NVIDIA Volta architecture. In addition, a single NVIDIA DGX-1 server includes 40,960 CUDA
cores, the performance of which is equal to 800 high-performance CPUs. It uses a set of new
technologies, including the NVLink 2.0 with a capacity of up to 300 Gbit/s.</p>
      <p>
        The new supercomputer includes a high-density and energy-efficient solution by the
“RSC Tornado” with liquid cooling system [
        <xref ref-type="bibr" rid="ref3">3</xref>
        ]. The compute nodes are based on Intel server products:
the most powerful 72-core Intel Xeon Phi 7290 server processors, Intel Xeon Scalable family
processors (Intel Xeon Gold 6154 models) and the latest high-speed Intel Solid State Drives SSD DC
P4511 with NVMe interface with a capacity of 1 TB. For high-speed data transfer between compute
nodes, the supercomputer uses advanced Intel Omni-Path technology, providing non-blocking
switching speeds up to 100 Gbit/s.
      </p>
      <p>The supercomputer GOVORUN has been added to the information environment of the
HybriLIT cluster and together they formed a HybriLIT heterogeneous platform (Figure 1). Currently,
the platform is used to solve problems that require massively parallel computations in various fields of
physics, biophysics and applied fields, in particular, in lattice quantum chromodynamics to study the
properties of hadronic matter at high energy density and baryon charge, also at strong electromagnetic
fields, modeling the dynamics of collisions of relativistic heavy ions, computations of Josephson
junctions, designing and optimizing cryogenic cells, and others.</p>
      <p>One of the important parts of the HybriLIT platform is a component that supports
computations using application packages with an advanced graphical user interface (GUI) based on
VDI (Virtual Desktop Infrastructure) technology. VDI is a special kind of visualization of several
systems in which all desktop elements are available remotely, and users connect to a virtual desktop
via a special client. Due to this technology, users have access to the platform both in console mode and
with GUI support. In addition, users can also access packages such as COMSOL Multiphysics,
MATLAB, Mathematica, Maple, Geant4, ROOT and perform resource-intensive computations on the
supercomputer GOVORUN.</p>
    </sec>
    <sec id="sec-2">
      <title>2. Software and information environment</title>
      <p>
        For the purposes of efficiency increase of application development and carrying out
computation by means of the resources of the HybriLIT platform, heterogeneous computation team
develops and supports information-software environment that can be divided into two parts [
        <xref ref-type="bibr" rid="ref4">4</xref>
        ]. The
first level is considered to be system one and it includes basic software: Scientific Linux 7.5 OS and
bootloader xCAT (Extreme Cluster Administration Tool); NFS4 and EOS file systems; SLURM
17.02.9 workload manager; Modules 3.2.10 package enables dynamic modification (interrogation,
allocation, and deallocation) of user’s environment and CernVM-FS software repository. The
following software for development, debugging and profiling of parallel applications and carrying out
resource-demanding computations are installed: OpenMPI 1.8.8, 2.1.2, 3.1.2; CUDA 8.0, 9.2, 10.0;
GNU 4.9, 6.2, 7.2; Intel Parallel Studio XE 2018, 2019.
      </p>
      <p>The second level includes a set of services by means of which users can arrange their work
more efficiently and have access to required and useful information while working on the platform.
Among services provided for users there are those, which contain necessary information about the
platform, and about upcoming events that will be held by the HybriLIT team. Such services include:
HybriLIT web-page; Indico system – it is used by the HybriLIT team in order to organize
conferences, seminars and meetings dedicated to parallel programming technologies; HybriLIT User
Support – it is a project developed in the Project Management Service system that allows answer the
questions of users, upload useful materials, publish news, etc.; GitLab – it is a service aimed at mutual
parallel development of applications.</p>
      <p>One of the most important parts of the software and information of the HybriLIT environment
is monitoring services that allow keeping track on the load of computing nodes, efficiency of their use,
monitoring of the storage system, monitoring of virtual machines, and network infrastructure. Such a
monitoring system makes it possibly to swiftly react to changes in the computing platform and
inefficient use of resources, resolve errors of the system; as for users, they can keep track of the load
of computing resources. This allows increasing efficiency of using the resources of the computing
platform.</p>
      <p>The computational resources of the HybriLIT platform, unified by the software-information
environment, allow users of the platform carry out efficient parallel computations for soluti on of tasks
in different fields of physics, biophysics and applied fields too. In particular:
1) for theoretical research in the field of quantum chromodynamics, for the NICA megaproject,
2) Josephson barrier computations,
3) designing and optimization of cryogenic cell,
4) investigation of the dynamics of Many-Body Theory of Bosonic systems in magneto-optical
traps,
5) modeling of structure and conducting machine of NMDA glutamate receptors,
6) mathematical modeling of interaction between antiprotons and protons with cores in the
computation environment of the future PANDA experiment on FAIR – PandaRoot by means
of DPM, FTF and UrQMD+SMM generators that are being developed in JINR and that are of
great interest for NICA-MPD experiment,
7) computation of matrix element correction in the first Born approximation in case of reaction
of direct ionization of a helium atom with proton with regard to different models of final state,
8) applied computations using application packages with a GUI, such as COMSOL Multiphysics,</p>
      <p>MATLAB, LAMMPS, etc.</p>
    </sec>
    <sec id="sec-3">
      <title>3. Education program on HybriLIT platform</title>
      <p>
        HybriLIT platform is used not only for parallel computations, but also as a polygon for
training students, post-graduate students and young scientists in the field of HPC [
        <xref ref-type="bibr" rid="ref5">5</xref>
        ]. This is also an
important aspect of using the HybriLIT platform. The training is conducted both by the young staff
members of the Institute and its Member-states, and by the students of the Dubna University. The
main tool in the educational process is the training and testing polygon which is the basic platform for
the basic courses on “Computer Systems Architecture”, “High Performance Computing Technologies”
and “Mathematical Models in Physics”, the number of students is about 200 people per academic year.
The educational program can be divided into three directions.
      </p>
      <p>The main purpose of the first direction is to introduce of students with bases parallel
programming technologies, as well as with the IT-solutions and tools necessary for the effective use of
HPC platforms. During 2014-2018, on the basis of the platform, there were held 41 tutorials and
lectures in which more than 341 people from JINR and 218 participants from JINR Member States
(Figure 2). Tutorials were carried out in frames of conferences and schools held by LIT JINR
(MPANCS’2014, MMCP’2015 and 2017, NEC’2015 and 2017, GRID’2016 and 2018), within
programs on international cooperation at Sofia University in Bulgaria, the Mongolian State University.
In frames of the regular training courses on parallel programming technologies organized at LIT for
the JINR employees, students and post-graduates of the University “Dubna”, the heterogeneous
computations team HybriLIT held tutorials on C/C++ program languages, ROOT/PROOF program
packages parallel programming technologies CUDA, OpenMP, OpenCL, MPI as well as on the
userfriendly GitLab web-interface for mutual parallel development of applications.</p>
      <p>The second direction is related to the advanced learning of parallel programming
technologies as part of regular training courses conducted by the HybriLIT team for JINR staff,
students, PhD students of the Dubna University. The training courses are held on C/C ++, MPI,
OpenMP, CUDA, OpenCL, and also hybrid technologies: MPI + OpenMP, MPI + CUDA, etc., as
well as specialized software, such as ROOT, PROOF, etc., used to solve tasks in high energy physics.
These courses are aimed at improving the skills and retraining of specialists.</p>
      <p>In order to better learn the latest parallel computing technologies and the usage of the latest
computing architectures, HybriLIT team provides tutorials on parallel programming by the software
developers. In particular, tutorials on MPI parallel programming and on tools for debugging and
profiling parallel applications (by the Intel copropation), CUDA (by NVIDIA), COMSOL
Multiphisics, MATLAB applied programs were held on the base of the HybriLIT platform.</p>
      <p>The third direction is a regular training of IT-specialists at the department of System
Analysis and Management of the Dubna University [6]. In that direction, the courses on “Computing
Systems Architectures”, “Architectures and Computing Systems Technologies”, “Mathematical
Models in Physics” are held for second year students (bachelor’s degree) and fifth year students
(master). During these courses, students are also learn the tools for working on HPC platforms and
related IT services. Also, along with their regular studies, students of the Dubna University participate
in solving specific research tasks based on the HybriLIT platform and have possibility to choose topics
for undergraduate and master's theses on solving problems related to JINR’s research topics using
HPC technologies.</p>
      <p>This adaptive educational program based on the HybriLIT platform, allows training students
and specialists considering the rapidly developing HPC sector in the field of scientific computing.</p>
    </sec>
    <sec id="sec-4">
      <title>4. Conclusion</title>
      <p>For more efficient use of HPC-resources in solving scientific and applied problems facing
JINR, it required not only the provision of computing resources to users, but also the development of a
software and information environment that allows users to simplify work with the existing computing
resources. Another aspect that influenced the development of the information environment was the
integration with the HPC-resources of application packages supporting computing on the
HPCplatforms, which are increasingly being used to solve complex technical problems that are also
necessary for JINR. All this naturally led to the formation of the IT-ecosystem (see Figure 3), which
was not only a convenient means for carrying out resource-intensive calculations, but also became a
fruitful educational environment, allowing students to get acquainted with the latest computing
architectures, learn the latest technologies and tools of parallel programming, learn how to work with
complex packages applied and mathematical programs. All this together allows us to prepare IT
specialists in the HPC-sphere, primarily to solve the problems facing JINR, which are already
involved in real JINR projects in the learning process and immediately after graduation are included in
the workflow.</p>
      <p>The developed IT-ecosystem is of great interest both in various laboratories and in Russian
and foreign universities and research centers. For this reason, the heterogeneous computing group
conducted many training courses at universities and research centers in the participating countries.</p>
    </sec>
    <sec id="sec-5">
      <title>5. Acknowledgement</title>
      <p>HybriLIT team expresses gratitude to Nikolay Mester, the enterprise development director in
CIS (Intel), and Intel branch in Nizhny Novgorod, in particular Olga Andriyanova, Gennady Fedorov,
Dmitry Sivkov and Dmitry Durnov for organizing and carrying out tutorials on parallel programming
technologies and on using Intel tools for deep learing.</p>
      <p>Works on development of components that allows carrying out computations with the use of
applied software packages are done in the framework of Russian Scinence Foundation grant (project
№ 18-71-10095).</p>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          [1]
          <string-name>
            <surname>Gh. Adam</surname>
            ,
            <given-names>V.</given-names>
          </string-name>
          <string-name>
            <surname>Korenkov</surname>
            ,
            <given-names>D.</given-names>
          </string-name>
          <string-name>
            <surname>Podgainy</surname>
            ,
            <given-names>O.</given-names>
          </string-name>
          <string-name>
            <surname>Streltsova</surname>
            ,
            <given-names>T.</given-names>
          </string-name>
          <string-name>
            <surname>Strizh</surname>
            ,
            <given-names>P.</given-names>
          </string-name>
          <string-name>
            <surname>Zrelov</surname>
          </string-name>
          .
          <article-title>HybriLIT - the main component of the MICC for heterogeneous computations at JINR</article-title>
          .
          <source>CEUR Workshop Proceedings (CEUR-WS.org)</source>
          ,
          <source>V</source>
          <year>2023</year>
          ,
          <year>2017</year>
          , Pp.
          <fpage>351</fpage>
          -
          <lpage>356</lpage>
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          [2]
          <string-name>
            <surname>Heterogeneous</surname>
            <given-names>platform</given-names>
          </string-name>
          “HybriLIT”, URL http://hlit.jinr.ru/en/
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          [3]
          <string-name>
            <given-names>RSC</given-names>
            <surname>Group</surname>
          </string-name>
          , URL http://www.rscgroup.ru/en/
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          [4]
          <string-name>
            <surname>Gh. Adam</surname>
            ,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Vala</surname>
            ,
            <given-names>L.</given-names>
          </string-name>
          <string-name>
            <surname>Valova</surname>
            ,
            <given-names>T.</given-names>
          </string-name>
          <string-name>
            <surname>Zaikina</surname>
            ,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Kirakosyan</surname>
          </string-name>
          , Sh. Torosyan.
          <article-title>Information environment of the heterogeneous cluster HybriLIT</article-title>
          .
          <source>Proc. of ITTMM'</source>
          <year>2016</year>
          ,
          <year>2016</year>
          , Pp.
          <fpage>199</fpage>
          -
          <lpage>200</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref5">
        <mixed-citation>
          [5]
          <string-name>
            <given-names>V.V.</given-names>
            <surname>Korenkov</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.V.</given-names>
            <surname>Podgainy</surname>
          </string-name>
          ,
          <string-name>
            <surname>O.I. Streltsova</surname>
          </string-name>
          ,
          <article-title>Educational program on HPC technologies on the basic of the HybriLIT heterogeneous cluster (LIT JINR)</article-title>
          .
          <source>Modern Information Technology and ITeducation. V13, №4</source>
          ,
          <year>2017</year>
          ,
          <fpage>P141</fpage>
          -
          <lpage>146</lpage>
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>