<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta />
    <article-meta>
      <title-group>
        <article-title>Data Science Platform Applied to Health in Contribution to the Brazilian Unified Health System</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <string-name>Marcel Pedroso</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Rebecca Salles</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Raphael Saldanha</string-name>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Vinicius Kreischer de Almeida</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Gabriel Souto</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Balthazar Paixão</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Sérgio Ricardo de Borba Cruz</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Carlos Cardoso</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Victor Ribeiro</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Raquel Gritz</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Carmen Bonifácio</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Matheus Miloski</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Carlos Augusto de Sousa</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Gizelton Pereira Alencar</string-name>
          <xref ref-type="aff" rid="aff7">7</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Ariane Alves</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Nelson Niero Neto</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Letícia Sabbadini</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Eduardo Ogasawara</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Christovam Barcellos</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Fabio Porto</string-name>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Lucas Zinato Carraro</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Jeferson Lima</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <aff id="aff0">
          <label>0</label>
          <institution>DTIES/FCM, Rio de Janeiro State University (UERJ)</institution>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff1">
          <label>1</label>
          <institution>Data Science Platform applied to Health (PCDaS)/Lis/Icict</institution>
          ,
          <addr-line>Oswaldo Cruz Foundation (Fiocruz)</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff2">
          <label>2</label>
          <institution>Federal Center for Technological Education of Rio de Janeiro (CEFET/RJ)</institution>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff3">
          <label>3</label>
          <institution>Laboratory of Health Information (LIS)/Icict</institution>
          ,
          <addr-line>Oswaldo Cruz Foundation (Fiocruz)</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff4">
          <label>4</label>
          <institution>National Institute for Research in Digital Science and Technology (INRIA)</institution>
          ,
          <country country="FR">France</country>
        </aff>
        <aff id="aff5">
          <label>5</label>
          <institution>National Laboratory for Scientific Computing (LNCC)</institution>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff6">
          <label>6</label>
          <institution>PESC/COPPE,Federal University of Rio de Janeiro (UFRJ)</institution>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff7">
          <label>7</label>
          <institution>School of Public Health, University of São Paulo (USP)</institution>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff8">
          <label>8</label>
          <institution>The Data Science Platform applied to Health (“Plataforma de Ciência de Dados aplicada à Saúde” - PCDaS) is a research and technological development project of the Laboratory of Health Information (“Laboratório de Informação em Saúde” - LIS) from the Institute of Scientific and Technological Communication and Information in Health (“Instituto de Comunicação e Informação Cientí- ifca e Tecnológica em Saúde” - ICICT), of the Oswaldo Cruz Foundation</institution>
          , “
          <addr-line>Fundação Oswaldo Cruz” - Fiocruz</addr-line>
        </aff>
      </contrib-group>
      <abstract>
        <p>The Data Science Platform Applied to Health (PCDaS) is a research and technological development project that aims to develop and apply novel data analysis methods to public health data. It fills a technological gap between the variety of data sources available in legacy and unstandardized formats and the current needs and possibilities of Data Science applications to consume and explore data for the benefit of the Brazilian Health System. PCDaS provides democratic access to health-related datasets and information by requiring fewer technological abilities from its users while maintaining a continuously updated stack of technologies. As a data ecosystem, our primary goal is to provide secure and remote access to health data, technological tools, and a robust infrastructure provided by our platform to process and analyze a large amount of data that generally demand computational power often unavailable to researchers. The infrastructure consists of multi-region on-premise and cloud servers prepared to deal with the heavy analysis of Big Data from anywhere from multiple users simultaneously. Providing secure and remote access to health databases, whether in their original form or processed, is a daily breakthrough for a public health researcher. Knowing that there is a place where they can access integrated data in a standard format makes the research process much more manageable. To ensure quality, our data engineering and governance teams process these data sources following a gold standard based on cross-tables provided by the Health Ministry (the TabNET system) and decoding the original variables into meaningful names provided by the sources. It is very relevant to emphasize the comprehensive documentation of metadata, attributes, and the ETL (Extract, Transform, Load) process for databases. Every part of these steps is described in detail on the PCDaS website, ensuring the comprehension and reproducibility of the process. These features ensure that PCDaS users can efectively leverage the platform's resources and capabilities, enabling them to conduct research, perform data analysis, and collaborate within a secure and supportive environment to contribute to the Brazilian Health System.</p>
      </abstract>
      <kwd-group>
        <kwd>eol&gt;Public Health</kwd>
        <kwd>PaaS</kwd>
        <kwd>Data Science</kwd>
        <kwd>Data Ecosystem</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>1. Introduction</title>
      <p>oratório Nacional de Computação Científica” - LNCC). ments, trying to respond to diferent needs from
epidemiBoth Fiocruz and LNCC are two of the main research ological and administrative points of view. Most of those
institutions in Brazil. Fiocruz is the most prominent in- systems were implemented using 1990 legacy database
stitution of science and technology in health in Latin technologies, such as dBase and other DOS and early
America, while LNCC is the first Brazilian institution in versions of Windows applications. Its technological
modthe field of Scientific Computing having the most power- ernization is planned but taking place slowly by facing
ful IT resources in Latin America. the challenges of covering a country with continental</p>
      <p>PCDaS aims to develop and apply novel methods of extensions with inequalities in Internet access,
technidata analysis on public health data, filling a technological cal working force availability, political interference, and
gap between the variety of data sources of interest to funding.
the Public Health available in legacy and unstandardized The DataSUS made Brazilian health data publicly
availformats and the current needs and possibilities of Data able since its creation, fulfilling a mission of data
dissemScience applications to consume and explore data for the ination, being a pioneer government agency of open data
benefit of the Brazilian Health System. in the 90’s. Open data principles implemented on HIS</p>
      <p>
        Data science emerged as an interdisciplinary field, cov- and the citizen rights to information access are keystones
ering the study of structured and non-structured data of for the PCDaS creation.
diferent volume, complexity, variety, and other proper- Using the available data from DataSUS and other
ties, also called “Big Data” [
        <xref ref-type="bibr" rid="ref5">1</xref>
        ]. Public Health may benefit sources of relevant sociodemographic information, such
from Data Science techniques and paradigms, with condi- as Census and population thematic inquiries, usually
imtions to advance statistical and epidemiological practical poses steps of downloading and handling large amounts
applications to more complex and heterodox data sources. of files with storage needs and processing and filtering
      </p>
      <p>Like other research fields, Public Health has histor- the data for specific research needs. Due to the nature of
ically been guided by a theory-driven or hypothesis- the available data formats in its sources, this
Extractiondriven approach to science, with a priori assumptions. Transform-Load (ETL) process includes the use of legacy
The new challenges imposed by Big-Data go beyond software, undocumented sources, and handling of
largerscaling-up computer servers to use the same methods. than-memory data by non-technological savvy users,
New approaches are needed to overhaul the methods to such as social scientists, epidemiologists, geographers,
use better the full potential of available data in its diver- and others.
sity and complexity, leading to a data-driven approach to The current ecosystem of data sources of health data
science [2, 3]. For the context of PCDaS, Data Science is and relevant sources of information for Public Health
iminterpreted as a field of study that can aid the discovery poses on researchers and managers a long learning curve,
of knowledge of useful information from big or complex with non-standardized and shared practices that lead
databases and aid the decision-making guided by data to repeating the ETL processes among several research
[4]. groups to obtain similar but not comparable results.</p>
      <p>The Brazilian Health System is publicly funded and Aiming at providing a Data Science platform for Public
ofers universal free health care coverage to the Brazilian Health, the PCDaS is structured as Platform-as-a-Service
population, known as the Unified Health System (“Sis- (PaaS) to cover aspects such as ETL, data analysis, data
tema Único de Saúde” – SUS). It was established in 1988, visualization, modeling, artificial intelligence, and
knowlalong with the re-democratization process of the country. edge dissemination. At PCDaS, we strive to create a
A component of the SUS is the Department of Informatics community of data scientists who collaborate with SUS
(DataSUS), which is responsible for gathering, organizing, to ofer advanced technology and scientific computing
and disseminating Brazilian health data. services. Our primary focus is to help manage, store,</p>
      <p>
        The health data at DataSUS is structured by several analyze, visualize, and share extensive data related to
Health Information Systems (HIS) dedicated to covering healthcare and its socio-environmental influences. Our
diferent aspects of a person’s life cycle, resulting in spe- services cater to researchers, professors, students,
educacific HIS like the SINASC for birth data, SIH for hospital tional and research institutions, as well as government
admissions data, and SIM for mortality data. There are oficials. Our objective is to advocate for positive
adother dozens of HIS maintained by the DataSUS, cover- vancements in public health policies and society as a
ing aspects such as vaccinations, ambulatorial services, whole.
records of health professionals, health services, health Besides this introduction, Section 2 presents a
literaequipment, suspected cases of transmissible diseases, vio- ture review, a background on PaaS, and discusses related
lence, and other themes. The anonymized raw data from works. Section 3 further details the components of the
those systems are publicly available through the DataSUS PCDaS data ecosystem. Section 4 shares some of the
website. main research projects furthered by PCDaS as well as
In common, those HIS were created in diferent mo- their products and scientific results. Finally, Section 5
concludes and describes the plans for expansion and con- API (Application Programming Interface).
tinuous improvement of PCDaS. PaaS ofers access to its resources through network
connections, delivering services to users. The concept
of an open platform relies on enabling third-party
devel2. Literature review opers to create internet-based value-added applications.
This integration is facilitated through Application
ProOver the past years, industry and academia have shown gramming Interfaces (APIs), granting third-party
devela growing interest in Big Data and analytics. Despite opers access to resources and services. Consequently,
advancements in computer systems, handling large-scale this accessibility yields tangible benefits for
organizadata remains an important challenge. Commonly, we en- tions, developers, and users.
counter hardware and time limitations, which drive the One plausible solution to address Big Data challenges is
improvement of data processing methods. Big Data refers through data ecosystems. Section 3 presents a more
accuto the vast amount of data created and exchanged. Its ap- rate description of our architecture, highlighting how we
plications are characterized by the “3Vs”: volume
(information volume), velocity (data generation and consump- wpreovhiadveePiCmDpalSemaseantPeldatafodrmat-aase-cao-sSyersvteicme. tIhnatthfisacciolintatetxets,
tion time), and variety (heterogeneous data sources) [5]. collaborative eforts to enhance our understanding of
Additional dimensions such as veracity, validity, value, public health in Brazil.
variability, venue, vocabulary, and vagueness have been Data ecosystems transcend traditional production
proposed to complement the understanding of Big Data chains by incorporating three key characteristics:
net[
        <xref ref-type="bibr" rid="ref5">1</xref>
        ]. work, platform, and co-evolution [
        <xref ref-type="bibr" rid="ref37 ref70">10</xref>
        ]. As the mentioned
      </p>
      <p>Over the past decade, data science has emerged as a work describes, networks within data ecosystems are
highly relevant field. It encompasses a multidisciplinary formed by developers, providers, technology suppliers,
approach, gathering diferent knowledge areas [ 6]. The and infrastructure. Platforms serve as the means through
fundamental objective of data science is to extract valu- which ecosystem participants interact. Moreover, data
able insights and knowledge from data, leveraging ana- ecosystems provide resources that enable participants
lytical techniques and computational tools. Collaborative to evolve through interactions among stakeholders and
platforms such as Kaggle and OpenML performs a rel- across diferent knowledge fields. A more specific
disevant role as players on data science community. For cussion [8] about PaaS and the relation between their
instance, the metioned platforms provides datasets, data components. A data ecosystem can also be understood as
documentation and some exploratory analysis. a complex set of interactions between heterogeneous</p>
      <p>In the context of public health, data science plays a agents and their environment, similar to a biological
crucial role. Goldsmith et al. [7] defines data science in ecosystem [11].
public health as a discipline that focuses on formulating The increasing variety and volume of data have
preand answering questions related to public health and sented us with numerous challenges. Some solutions
well-being through data-centric approaches. In recent have been proposed to overcome time and hardware
years, public health researchers in Brazil have increas- restrictions. For integrating data from heterogeneous
ingly turned to data science tools and methodologies sources while ensuring data quality, Ramalli et al. [12]
to understand the Brazilian health system better and propose using SciExpeM, a framework designed to speed
improve healthcare outcomes. The analyses using the up and support the development of scientific models. This
information provided by DataSus present themselves as work was further extended [13], where a data ecosystem
a typical Big Data problem due to its volume. was proposed specifically for chemical engineering.</p>
      <p>DataSUS is the primary information system responsi- The increasing adoption of IoT sensors has led to the
ble for providing computational support to all instances continuous monitoring of daily activities. In Yu et al.
of SUS. Despite its crucial role, accessing data from Data- [14], a data ecosystem is proposed to address predictive
SUS can be challenging due to the fragmented nature of maintenance problems in an industrial context. In
mathe available data sources, which makes it dificult for terials science, Blaiszik et al. [15] emphasize enhancing
researchers and stakeholders to obtain a comprehensive data ecosystems to develop new technologies. The
paview of the public health data landscape. per presents two projects supporting machine learning</p>
      <p>According to [8] Platform-as-a-Service (PaaS) is an in- applications in materials science.
terface that provides access to a complex set of techno- Although the mentioned works primarily focus on
enlogical components. Today, PaaS plays a central role gineering applications, data ecosystems can be applied
in internet applications, abstracting architecture and in- across various domains. For instance, in an analysis of
frastructure complexities for users. PaaS exhibits three the efects of territorial politics and metropolitan
govsignificant characteristics [ 9]: it is internet resource cen- ernance, Kitchin and Moore-Cherry [16] discuss how
tered, is open to third-party developers, and utilizes web fragmented governance can reduce economies of scale
and limit the efectiveness of public policies. These pa- It is also important do mention Global Health
Obserpers contribute to the growing body of knowledge on vatory (GHO) data repository, is an World Health
Ordata ecosystems. They typically present domain-specific ganization initiative that aims to provide health-related
problems and strategies for solving Big Data challenges. statistics for all 194 Member States of Unated Nations.
Table 1 provides a comprehensive list of data ecosystems, They provide access to more than 1000 indicators via an
and to the best of the author’s knowledge, PCDaS is the API interface on collective health topics, i.e.: mortality
ifrst data ecosystem specialized in public health in Brazil. and burden of diseases, immunization, malaria among</p>
      <p>We selected some features for comparing some of the others.
existing data ecosystems: Notably, most of the listed data ecosystems do not
provide data from a centralized view. By centralizing data
• Educational Services: indicates if the given plat- access, PCDaS can guarantee the data science community
form provides training and technological literacy; with simpler usage. This decision allows us to enrich
• Teams: the data ecosystem provider explicitly al- data, add value and enable more complex analysis. By
locates teams for supporting data-driven projects; releasing microdata, PCDaS ofers more flexibility for the
• ETL and Documentation: indicates if the plat- platform users. It is also worth to mention that, openning
form provides ETL process and data documenta- the choice of making our methodology public ensure
tion; reproducibility.
• Quality: indicates if the given data ecosystem</p>
      <p>provides some data quality discussion;
• Centralized View: data provided after cleaning 3. PCDaS</p>
      <p>and enrichment process;
• Funding: indicates how data ecosystem are</p>
      <p>founded; and
• Domain: application area.</p>
      <sec id="sec-1-1">
        <title>As discussed in the previous sections, the utilization of</title>
        <p>health data from Brazilian Health Information Systems
in research endeavors presents numerous complex
challenges that demand careful consideration. These
chal</p>
        <p>An important player in data ecosystems is the World lenges can manifest from various aspects, including
inBank1, an international organization dedicated to promot- trinsic characteristics of the data itself and the necessary
ing equity and reducing poverty worldwide. Among its infrastructure for handling and analyzing the vast
volobjectives is the support of countries in improving their umes of data available.
statistical capacity through advisory services, project sup- From a data perspective, several critical issues arise
port, partnership management, and financial resources. when dealing with health data, including the
integra</p>
        <p>Despite beeing a more mature data ecosystem when tion of data from diverse sources, data processing tasks
copared to the other presented in Table 1, Canadian In- such as cleaning and enrichment, the challenge of
workstitute for Healt Information (CIHI) does not provides ing with pre-aggregated data lacking granularity,
nona centralized access for data. The institution releases standard file formats, managing the sheer volume of data,
frequent reports and metrics wich can provide a wider and ensuring appropriate data modeling for various
recomprehention on canadian public health landscape, and search needs.
also ofers training on how to access the information From infrastructure perspective, having a robust
envimade available. ronment that can provide the necessary computational
power to acquire, process, and analyze large volumes
of data while ensuring privacy and security is crucial.</p>
      </sec>
      <sec id="sec-1-2">
        <title>1World Bank provides capacitation over mentoring and funding</title>
        <p>programs
In addition to meeting the hardware requirements, the a strong focus on high-performance computing. The
infrastructure must encompass the selection of suitable second part of the infrastructure is located at the Data
tools and services that can efectively support the re- Processing Center of Oswaldo Cruz Foundation (Fiocruz),
search objectives. This infrastructure entails considering in Rio de Janeiro, which is widely recognized as one of
factors such as data storage, processing capabilities, scal- the leading research centers in the field of public health.
ability, and the ability to implement robust privacy and By leveraging the complementary nature of
infrastrucsecurity measures. ture and staf members of these two institutions, PCDaS</p>
        <p>PCDaS was created by establishing a robust data can provide computational power and services capable of
ecosystem that efectively addresses several of the afore- meeting the specific needs of internal engineering teams
mentioned challenges. One of our primary focuses is to and users interested in using the platform.
provide services that facilitate the acquisition and explo- Regarding availability, the infrastrucutre located at
ration of data by users. By promoting data integration, Fiocruz complies with the ABNT NBR 15247 (Safe Storage
PCDaS aims to foster collaboration among diferent re- Environment with Fire Resistance Classification and Test
search groups, encouraging active participation in data Method) and NBR 60529 (Protection of Electrical
Equipsharing and promoting the reuse of valuable informa- ment) standards. Additionally, it has 2 uninterruptible
tion. This collaborative approach can potentially enhance power supplies (UPS) and its own power generator,
enknowledge exchange, accelerate research projects, and suring the operation of the equipment 24 hours a day and
ultimately contribute to data-driven healthcare decision- providing protection against humidity, corrosive gases,
making. magnetism, and high temperatures.</p>
        <p>
          The initial version of the platform, named PCDaS 1.0, Furthermore, since the services are located in diferent
became available in 2016 [17]. Despite its limitations, this places, any network or power related issues in one
locarelease marked the realization of the concept of ofering tion only impact the services in that location, minimizing
users a unified platform for accessing and analyzing vast overall disruption to the platform’s services. Additionally,
amounts of HIS data. Subsequently, in 2019, PCDaS 1.5 having diferent teams responsible for maintaining the
was introduced [18], featuring enhancements to the user separate infrastructures and tools allows for a more
speinterface, comprehensive tutorials to aid platform utiliza- cialized focus on specific concerns. This fine-grained
spetion, and the addition of new and updated public datasets. cialization ensures that each team can eficiently address
PCDaS 2.0 was launched in 2021 [
          <xref ref-type="bibr" rid="ref42 ref49 ref64">19</xref>
          ], incorporating a maintenance tasks and any issues that arise, contributing
range of improvements, such as Single Sign-On (SSO) to the overall stability and reliability of the platform.
functionality for users, seamless integration with Google The platform prioritizes using free and open-source
Colab for notebooks and tutorials, and a broader selection software (FOSS) for its tools and services. This approach
of public datasets. More recently, a RESTful API was de- ofers several advantages in terms of infrastructure
mainveloped and released to users of partner projects (such as tenance. FOSS solutions are cost-efective, providing a
the ones presented in Section 4), with the aim of simplify- more afordable alternative than commercial options.
Ading access to datasets hosted on the platform. In addition ditionally, FOSS software is transparent, allowing for
to enhancing the user experience, the platform’s backend greater visibility into the code and ensuring the
platundergoes continuous evolution through the expansion form’s operations are built on trustworthy foundations.
and optimization of its infrastructure, architecture, and Finally, the customizable nature of FOSS enables the
plattools. form to adapt and meet specific requirements eficiently.
        </p>
        <p>Currently, PCDaS has around 1,450 active users, sup- This way, the platform can function with minimal impact
porting dozens of research and technological develop- even when budget constraints arise.
ment projects from a variety of groups (academics and A simplified overview of the infrastructure and
archifrom the government), with particular interest in research tecture of the platform is presented in Figure 1. Despite
and analysis on Public Health and socio-environmental the specification of current infrastructure, tools, and
serdeterminants of health. The following subsections de- vices, the platform follows a tool-agnostic and
evoluscribe in detail the components of the PCDaS data ecosys- tionary approach, with planned updates and migration
tem. to new tools and infrastructure when favorable. It can
be seen that the platform provides a set of services
tai3.1. Infrastructure and Architecture lored to specific needs. Internal engineering teams of
the platform dispose of tools for ETL jobs (Apache
AirThe infrastructure of the platform is hosted in two com- flow and Jupyter Hub), as well as complete access (write,
putational environments, in diferent geographical loca- read) to a Data Warehouse solution (ElasticSearch).
Plattions. The first is located at the Data Processing Center form users’ services are focused on providing
compuof the National Laboratory for Scientific Computation tational power (JupyterHub and Google Colaboratory),
(LNCC), a renowned Brazilian research institution with data analysis tools (Kibana), and data consumption
interLNCC</p>
        <p>FIOCRUZ
NFS Server</p>
        <p>Read</p>
        <p>Read / Write</p>
        <p>Read / Write
Read / Write</p>
        <p>Logs
Engineering Teams
 Exclusive Service</p>
        <p>Authentication and Authorization</p>
        <p>Read</p>
        <p>Read / Write Read
faces (FastAPI). in TabNET.</p>
        <p>Finally, it is important poiting out that, in addition to It is very relevant to emphasize the comprehensive
docthe mentioned infrastructure, PCDaS has the capability umentation of metadata, attributes, and the ETL (Extract,
to harness cloud solutions when faced with scenarios Transform, Load) process for databases. Every part of
that exceed the capacity or feasibility of our existing these steps is described in detail on the website, ensuring
infrastructure and tools. the comprehension and reproducibility of the process.
These features ensure that PCDaS users can efectively
3.2. Services leverage the platform’s resources and capabilities,
enabling them to conduct research, perform data analysis,
As a data ecosystem, our main goal is to provide secure and collaborate within a secure and supportive
environand remote access to health data, technological tools, and ment.
a robust infrastructure provided by our platform to pro- As a way to organize the use and access to the platform,
cess and analyze a large amount of data that researchers the users are organized into three categories: Basic Users,
often lack the computational power to handle. This ro- Academic Users, and Partner Users. What diferentiates
bust infrastructure consists of multi-region on-premise them are the services available for each one. Table 2
beand cloud servers prepared to deal with the heavy anal- low summarizes the characteristics and features available
ysis of Big Data from anywhere from multiple users si- to each user type in the PCDaS platform.
multaneously. Basic Users have access to two key features: a) Support</p>
        <p>Providing secure and remote access to health databases for data mining and predictive analysis through tutorials
available in PCDaS, whether in their original form or pro- on Google Colab. Our team provides tutorials on how
cessed, is a daily breakthrough for a researcher. Knowing to utilize the data we have made available using
openthat there is a place where they can access integrated source tools. b) Access to the community of researchers,
data in an ordinary format like CSV (comma-separated data scientists, and PCDaS users in a public Slack Channel
values) makes the research process much easier. where they can interact and collaborate.</p>
        <p>To ensure quality, our data engineering and gover- Academic Users have access to two additional levels
nance teams process these data sources following a gold of infrastructure, building upon the features available to
standard based on cross-tables provided by the Health Basic Users: c) Secure and reliable remote access to PCDaS
Ministry in the TabNET system and decoding the original via JupyterHub (Python or R), which ensures researchers
variables into meaningful names provided by the sources. have complete access and support while utilizing our
By leveraging information provided by TabNET, we try technological infrastructure to prepare their analyses.
to minimize potential issues regarding data inconsisten- d) Promotion of academic and partner projects on the
cies by comparing the processed data against that found PCDaS website. This feature provides a dedicated space
on our website to showcase the project’s main goals and data retrieval processes.
the individuals involved.</p>
        <p>Finally, Partner Users receive the highest level of sup- 3.3.2. Data Transformation
port and benefit from three additional features: e) Basic
training in Python or R, ofering classes on data usage, Following data collection, the subsequent phase involves
analysis, and manipulation with the chosen program- data processing. A significant challenge that users
enming language. f) Training of the research team for us- counter when working with data from various HIS is
ing PCDaS to its fullest potential, and providing com- the presence of diverse legacy and unstandardized file
prehensive support on leveraging our infrastructure and formats. Consequently, converting these files into
userdata sources. g) Extraction, Transformation, and Load- friendly formats, such as CSV, Parquet, and other
coming (ETL) of databases relevant to the research team’s patible file formats, is imperative. Although this task
interests. Our experienced engineering team assists in may appear straightforward, it often necessitates
extenextracting and structuring data, allowing researchers to sive research, understanding domain requirements, and
focus on their project’s core objectives without managing potentially demanding the development of specialized
the technological aspects. libraries capable of handling these conversion processes
efectively.</p>
        <p>Another very important step of data transformation
3.3. Data Management is data enriching. In the case of public HIS, data files
This section outlines the Data Management process often consist of numeric values representing categories
within PCDaS’ data ecosystem. Data management refers or amounts, with separate files containing mappings for
to the activities undertaken to ensure the accuracy, in- these categories to their respective string values. It is
estegrity, and accessibility of the datasets. It encompasses sential to perform data mappings that generate datasets
data collection, organization, processing, storage, inte- containing numeric category values and their
correspondgration, and governance. Through efective data manage- ing string representations. Such a process enhances the
ment practices, PCDaS strives to maintain the quality and usability of the data. It enables easier data
interpretareliability of data, enabling researchers to derive mean- tion and analysis, ensuring meaningful insights can be
ingful insights and make informed decisions based on derived from the enriched dataset.
reliable and comprehensive information. In certain situations, it becomes necessary to integrate
diferent datasets. This integration can be achieved by
3.3.1. Data Extraction utilizing a shared column to enrich the resulting dataset
with additional information. By joining datasets, valuable
Data extraction is facilitated by utilizing two approaches: insights can be gained from the merged data, providing
leveraging the orchestration capabilities of Apache Air- more comprehensive underlying information. This
prolfow or employing custom code on Jupyter Notebooks. cess enables researchers and analysts to leverage multiple
Apache Airflow is employed when there is a need for datasets’ combined knowledge and attributes, facilitating
automated data collection based on predefined sched- more informed analysis.
ules or events. On the other hand, Jupyter Notebooks In cases where the final dataset is stored in our data
are utilized for simpler data collection scenarios where warehouse, the data processing workflow includes the
a one-shot extraction is suficient. This approach pro- crucial step of data modeling. Data modeling ensures the
vides flexibility and ease of use for ad-hoc data collection data and the destination storage system’s data model
comtasks. By employing both Apache Airflow and Jupyter patibility. It also ensures that the data is appropriately
Notebooks, the platform can accommodate various data formatted to facilitate analytical queries. By
performcollection requirements and ensure eficient and efective ing data modeling, we ensure that the data is structured
1–16
and organized to support eficient and efective analysis,
enabling users to derive meaningful insights from the
dataset.</p>
        <sec id="sec-1-2-1">
          <title>3.3.3. Data Validation</title>
        </sec>
        <sec id="sec-1-2-2">
          <title>3.3.5. Data Access Management</title>
        </sec>
      </sec>
      <sec id="sec-1-3">
        <title>Data acceess is managed by leveraging the concept of</title>
        <p>Elasticsearch roles. A role associates a collection of users
to a set of permissions. These permissions can include
diferent types of access (read, write) to specific indices,
clusters, and objects in Elasticsearch and Kibana. Users
are granted specific permissions they need to do their
jobs, without giving them access to more data than they
need. This helps to protect data from unauthorized access
and allows to fine-tune the access that users have to
diferent artifacts managed by the PCDaS plataform.
3.3.6. Example
Data validation is an essential step in the data processing
workflows of HIS for several reasons. One of the main
motivations is the reliance on manual human input for
data entry in many of these systems. Despite
verifications being performed in the source systems, it is not
uncommon to encounter inconsistencies in the provided
data. By conducting data validation, these inconsistencies
can be identified and addressed, ensuring the accuracy
and reliability of the processed data. Additionally, data
validation plays a vital role in maximizing trust in the
transformation process, as it helps prevent issues with
the data from propagating to downstream tasks.</p>
        <p>Data validation involves examining and verifying
transformed data to ensure its adherence to predefined
rules. These rules can be described in documents. It is the
case of various HIS systems, as domain specialists can
define them. Regardless of their origin, these rules serve
as guidelines for ensuring data accuracy and consistency.</p>
        <p>Figure 2 illustrates the data management process for the
System of Hospital Admissions (SIH) data. This
worklfow represents the data management process for single
coverage of SIH, which releases data monthly.</p>
        <p>The process starts with creating the folder structure
to accommodate the downloaded and generated data.</p>
        <p>After this, a zip file containing the mappings used during
the transformation step is downloaded from DataSUS
FTP. The zip file decompression results in several files
with CNV and DBF formats. These files represent the
mappings and are parsed to CSV file format.
3.3.4. Data Load The next step is downloading data files representing
Once the data has been transformed, the subsequent step each state’s hospital admissions. As these files are in
involves loading the transformed data into the target DBC file format, an additional step is necessary to
consystems. In the case of public datasets provided by the vert them to a format compatible with popular processing
platform, this entails inserting the data into our Elas- tools. After format conversion, the dataset is validated
ticSearch cluster and loading the transformed CSV files by applying a set of rules provided in SIH documentation.
into a shared environment accessible to users. This load After the pre-validation, data is enriched by mapping
allows users to query the inserted data using Elastic- categorical values to respective strings and integrating
Search’s powerful analytics engine. Additionally, users them with other datasets. A post-validation step is
percan utilize custom code on Jupyter or Colaboratory note- formed after data enriching to verify the correctness of
books to perform more detailed analytics on the CSV the data, and if the data is valid, it is loaded into our data
ifles. warehouse.</p>
        <p>Another advantage of utilizing a distributed analyt- The loaded data is validated against cross-tables
availics system like ElasticSearch is its horizontal scalability. able at the TabNET, a system provided by DataSUS, in
By consistently monitoring the system’s resource usage, which users can consult diferent data information about
we can identify demand increases and, if needed, add several HIS of SUS. Finally, the enriched data are
gathadditional machines to the cluster. This scalability fea- ered in a zip file and made available in a shared space to
ture ensures the platform can handle growing workloads which platform users have access.
efectively and maintain optimal performance. It is worth emphasizing that this process is designed</p>
        <p>The data can be loaded into their preferred systems specifically for SIH data, although it can be adapted for
in any acceptable format for private datasets generated other datasets if needed. However, having distinct data
for Partner Users. However, since the platform provides management processes tailored to diferent datasets is
infrastructure that users may not possess, it is common more common.
for datasets to be loaded into a data warehouse. It
allows centralized storage, eficient data management, and 3.4. Data Analysis and Visualization
easy integration with the platform’s analytical tools and
services.</p>
      </sec>
      <sec id="sec-1-4">
        <title>As mentioned earlier, a primary objective of the platform is to facilitate users’ access to HIS data. In order to achieve this objective, it is essential to collect, enhance, and make the data readily available and ofer diferent interfaces</title>
        <p>that support distinct needs for analyzing and visualizing cluding data collection, processing, and utilization. This
the information. documentation is a valuable resource for users and
col</p>
        <p>When client-side data manipulations are required, laborators, enabling them to understand how the data is
users can utilize our JupyterHub service or Google Colab obtained and handled.
notebooks to execute queries over our Data Warehouse Regarding private datasets, all code generated by the
or manipulate CSV or other formats made available by platform’s engineering teams is shareable with our
colthe platform. Additionally, an API developed using the laborators through simple notebooks and packaged
apFastAPI framework is provided to simplify the querying plications. Additionally, we employ docker containers
process by supporting SQL query language. Utilizing this to facilitate the sharing process. This approach ensures
API ofers the advantage of logging user queries, enabling seamless collaboration and enables our collaborators to
monitoring of request-related issues, and providing sup- work eficiently with the code and access the necessary
port when necessary. data analysis and processing tools.</p>
        <p>Additionally, we ofer a Kibana interface for users who
prefer a more visual approach to data analysis or have 3.6. Data Protection
limited familiarity with programming concepts. This
interface provides a user-friendly and intuitive way to Following Brazilian guidelines on research that includes
explore and analyze data stored in our ElasticSearch clus- data from humans, especially from the Ethics Committee
ter, enhancing and simplifying the overall data analysis from the Escola Politécnica de Saúde Joaquim Venâncio,
experience. we adopt specific legal documents as the Data Use
Com</p>
        <p>Examples of analysis of the platform’s public data are mitment Term, or “Termo de Compromisso de Utilização
available on our site, both in the form of Kibana dash- de dados” (TCUD), where are stated the project leaders,
boards and in tutorial examples that can be easily cloned objectives of the research, and how the data will be used
and opened in Google Colaboratory’s notebooks. and guarded. Projects that contain data with sensitive</p>
        <p>Various examples of analyses conducted on the plat- information follow specific guidelines from the Ethics
form’s public data are presented on the PCDaS website. Committee for data handling, storage, and retention
poliThese examples are available in Kibana dashboards and cies.
tutorial examples that can be efortlessly cloned and
opened in Google Colaboratory notebooks. These re- 3.7. Data FAIRness
sources serve as valuable references for users looking to
explore and learn from practical use cases of data analysis
on our platform.</p>
      </sec>
      <sec id="sec-1-5">
        <title>Data sharing is a core element of PCDaS activities. Thus,</title>
        <p>with the aim of improving its quality and expanding
the possibilities of reuse, it was decided to guide this
process through the FAIR principles (Findable, Accessible,
3.5. Data Transparency Interoperable, and Reusable).</p>
        <p>
          Data transparency is crucial for enabling users and col- Adopting FAIR principles makes it easier to discover
laborators to comprehend the complete data generation and access data, while interoperability makes it
possiprocess. By making the entire process transparent, there ble to automate processes and integrate with other
anare several advantages. Firstly, users can verify that the alytical tools, which can lead to new discoveries faster.
process aligns with their expectations. They can suggest In addition, there is the improvement of transparency
necessary modifications to meet their needs if any incon- and reliability, which can lead to greater reuse of data,
sistencies arise. This transparency empowers users to in addition to promoting collaboration and innovation
actively participate in shaping the data generation pro- [
          <xref ref-type="bibr" rid="ref2">13, 20, 21</xref>
          ], which is fundamental to the field of public
cess, fostering a collaborative and inclusive environment. health.
        </p>
        <p>For public datasets, to ensure data transparency, we At PCDaS, based on FAIR principles, information about
ofer detailed documentation that provides clear and com- data provenance, descriptive metadata, and the record of
prehensive information about the entire data lifecycle, in- the data processing and dataset enrichment process are
shared. The data is shared in open formats such as CSV
and through an API, which makes the data readable by
humans and machines, which together with the shared
documentation, expands the possibility of
interoperability and reuse.</p>
      </sec>
    </sec>
    <sec id="sec-2">
      <title>4. Success cases</title>
      <sec id="sec-2-1">
        <title>This section presents the main research projects con</title>
        <p>ducted in partnership with PCDaS that provided
services of scientific training, technological infrastructure,
ETL, and data analysis and visualization. The described
projects presented high impact especially in the field of
public health as it can be observed by their achievements
and public adoption of their products and scientific
results. These projects exemplify, validate and represent
the realization of the PCDaS goal of creating a community
of data scientists and researchers, as well as government
oficials who collaborate with SUS through the use of
advanced technology and scientific computing services,
furthering positive advancements in public health
policies and society as a whole.
4.1. GCE</p>
      </sec>
      <sec id="sec-2-2">
        <title>Created in 2003 by the Bill &amp; Melinda Gates Foundation,</title>
        <p>Grand Challenges Explorations is a program that invests
in impactful research to solve significant health and
development challenges worldwide [22]. Grand Challenges
Brazil is the result of a partnership between the
Department of Science and Technology (DECIT) of the Ministry
of Health, the National Council for Scientific and
Technological Development (CNPq), and the Bill &amp; Melinda
Gates Foundation. In Brazil, projects supported in data
science [23] used innovative approaches to data analysis
and modeling with the support of the PCDaS team in
providing databases, infrastructure compatible with
analysis in the Big Data scenario, teams dedicated to projects
funded, and many others useful resources as can be seen
in Table 3.
seminars and conferences, which caught the attention of
researchers, journalists, and managers [25] [26] [27].
Accessibility to services and the reduction of
maternal and child mortality (ASSMI) Another project
with the support of the PCDaS team and infrastructure
named "Accessibility to services and the reduction of
maternal and child mortality" (ASSMI) focused on
studying the impact of the displacements of many pregnant
women when leaving their homes and cities and
traveling long distances to health establishments that ofer
conditions for childbirth and neonatal care. Among the
conclusions, the research indicated that the distances
traveled for pregnant women to carry out deliveries and
neonatal care increased in the period analyzed — between
2006 and 2017 and that infant deaths can be avoided by
reducing the distances between mothers and the place
of delivery or neonatal care [28]. A detailed summary
of other project’s contributions and support provided by
PCDaS can be found at [29, 30]</p>
      </sec>
      <sec id="sec-2-3">
        <title>Amplia Saúde The “Amplia Saúde” project [31], which</title>
        <p>also received support from PCDaS, ofers an
unparalleled alternative for visual analysis of maternal and child
health data during the pre-and perinatal periods. The
project takes into consideration environmental and
climate factors. As part of this initiative, the First Big Data
View Workshop in Maternal and Neonatal Health was
developed during the second call of the Grand Challenges
Explorations – Brazil. The workshop ofers interactive
tools for exploring and visually analyzing maternal and
early neonatal health data correlated with data on
environmental problems and extreme weather conditions.</p>
        <p>Vax*Sim and MATRECI Immunizing children under
ifve is highly efective and afordable in preventing
dangerous and potentially fatal illnesses like polio, measles,
diphtheria, tetanus, and pertussis. Sadly, around 20
million Brazilian children are still not receiving routine
vaccination services, which creates disparities in healthcare
coverage. With this scenario in mind, the project entitled
Brazilian Obstetric Observatory (OOBR) Among "The Role of social media, the Bolsa-Família Program and
the main results achieved by PCDaS partner projects in Primary Health Care in immunization coverage for
chilthe second call of the Grand Challenges Explorations dren under five in Brazil," — also called Vax*Sim, had the
– Brazil: Data Science to improve maternal and child support of PCDaS in relevant stages of the process, such
health, women’s and children’s health in Brazil, it is as monitoring the social network Twitter to analyze what
worth mentioning that the eforts and contributions of was being published and commented by Brazilians on
OOBr (Brazilian Obstetric Observatory) [24], which mon- the topic of vaccination, use of data from the Information
itors and analyzes the area of maternal and child health System on Live Births (SINASC) and the Information
Sysin Brazil. Remarkable advances have been achieved by tem on Mortality (SIM), in addition to sociodemographic
the initiative in maternal, child, and obstetric health in data from municipalities, such as population and basic
a brief timeframe. Many resources have been generated, sanitation coverage [32].
including articles, data visualization panels, indicators, Another project PCDaS supported is Breastfeeding in
and a book. Moreover, they have actively contributed to Brazil in the MATRECI Model: Mapping, Trend,
Clus- Dedicated servers at LNCC and Fiocruz;
- Using the PCDaS API, over 40 tokens were distributed, with over 1.6 million requests;
- The researchers had access to 8 Kibana Workspaces containing over 10 Kibana dashboards;
- Researchers created over 290 Jupyter Notebooks for data analysis and ETL, consisting of over 70
thousand lines of code.</p>
        <p>5,000 messages exchanged through 14 Slack channels;
Communication and Management -- ETaficsikenmt acnoamgmemunenictaitnioWnowriktshtreams.AI, with 10 Kanban boards and over 200 tasks in motion;
- Operations for GCE were consolidated into one tribe consisting of 7 squads and 17 professionals.</p>
        <p>Monitoring of projects</p>
        <p>Over 100 meetings with partners and 70 internal meetings with dedicated squads were conducted.</p>
        <p>Qualifications and training
- 3 introductory courses to the use of PCDaS, and the R and Python language for data analysis;
- Eight weeks of training, 18 synchronous classes recorded on Youtube;
- 54 entries: 21 for the R language and 33 for Python Courses;
- Five classes and 46 certified researchers;
- One year of access to 368 courses in data science ofered through Datacamp.
4.2. PNS
tering, and Impact. This project aims to track the im- As a result of the PNS project, a website was generated
plementation and progress of breastfeeding initiatives in that gathers the previously mentioned information about
primary health care (PHC). The team identified success- the indicators in addition to dashboards with maps and
ful pro-breastfeeding programs and their impact. They ifgures, making it more accessible for researchers to carry
created a dataset on PHC infrastructure, breastfeeding out specific queries 2.
rates, and programs. They also qualified and verified From its launch on April 1, 2021, to May 23, 2023,
SISVAN breastfeeding information [33]. These last two search and access to the site reached 50k users, and the
projects yielded significant contributions in published ar- engagement rate that measures user interactions on the
ticles with their respective conclusions [34, 35, 36, 37, 38]. site in terms of navigation was 51.5% distributed in Brazil
(94.63%), United States (3.02%) and Portugal (0.41%). Table
4 presents the five most accessed pages.</p>
        <p>The National Health Survey (“Pesquisa Nacional de Table 4
Saúde” - PNS) is a survey of the Brazilian Ministry of The five most accessed pages in the PNS site
Health in partnership with the Brazilian Institute of
Geography and Statistics (IBGE) focusing on the health Page User access
conditions of the Brazilian population, obtained from a PNS (Home) 36.59%
telephone questionnaire with population samples applied Indicators Panel 29.03%
in distinct regions to provide subsidies to the formula- Data Bases 6.82%
tion of public policies. PCDaS teamed up with other RRepqruoegsrtasmbaNsoetebooks 34..9939%%
experienced researchers at Fiocruz to perform complex
sampling calculations of 183 indicators referring to the
surveys conducted in 2013 and 2019, make available the The PNS website has been referenced in articles such
data referring to each stage of processing of these indi- as [39], a study about the elderly, socioeconomic factors
cators, in addition to the metadata, the notebooks con- and functionality, [40] surveillance of chronic diseases,
taining reproducible codes in R language and thematic [41] a study of the relationship between smoking and the
dashboards, all open access. worsening of Covid-19, at the congress as [42]. Other</p>
        <p>These 183 indicators, organized into 15 modules and websites such as [43] are a precious resource for public
evaluated by statistical domains such as gender, region, health managers, thus contributing to the dissemination
age, color or race, household income, and level of educa- of information and support for new scientific
collaboration, seek to estimate eating habits, alcohol consumption, tions.
smoking, the practice of physical activities, chronic
diseases, perception of states of health, older adult health,
women’s health, disability, access and use of health
services and health care in the Brazilian population, among
others.</p>
      </sec>
      <sec id="sec-2-4">
        <title>2The PNS project website is available at https://www.pns.icict.fiocr</title>
        <p>
          uz.br/painel-de-indicadores-mobile-desktop/
4.3. MonitoraCovid-19
second-place award in the Covid-19 Challenges organize
by the National School of Public Administration (ENAP)
[
          <xref ref-type="bibr" rid="ref62">51</xref>
          ].
        </p>
      </sec>
      <sec id="sec-2-5">
        <title>The MonitoraCovid-19 was created within the scope of</title>
        <p>ICICT/Fiocruz. Its main objective was to overcome the
challenge of lacking open and centralized data from the
Federal Government on COVID-19 cases. The project 4.4. SUS Ombudsman
gathered data from diferent state governments to address In partnership with the “Instituto Aggeu Magalhães –
this issue. Such data were consolidated into a single Fiocruz Pernambuco” and with the Brazilian Health
Mindataset and made available through platforms such as istry Ombudsman, the PCDaS supports a data science
Kaggle and GitHub [44]. project that daily collects information about SUS health</p>
        <p>
          The project was crucial during many moments of the users’ demands and its profiles.
pandemic and served as the primary source of data for This anonymized information is received in an
aumedia outlets such as newspapers [45, 46, 47], news por- tomatic workflow, processed and stored at our
Elastictals [48, 49] and also a Public Civil Action by the Federal Search cluster, and visualized on Kibana dashboards.
CurPublic Ministry [
          <xref ref-type="bibr" rid="ref59">50</xref>
          ]. Throughout this period, the PCDaS rently, there are 4.993.669 user demands and 2.089.537
managed and maintained the infrastructure, ensuring its user profiles.
availability even when faced with thousands of simul- Those interactive data visualization panels are being
taneous accesses following its dissemination in major used by the Health Ministry Ombudsman to monitor the
media channels and social networks. number of requisitions, their topics, and user profile to
        </p>
        <p>Figure 3 depicts the interactive content structure ac- better organized its actions and guide the Health
Mincessible to the public. The left black panel displays var- istry’s actions through its policies.
ious sections, including Incidence, Accumulated Cases,
Growth Factor, Scenarios, Dashboards (Brazil, States, and
municipalities), an "About the Project" section, and the 4.5. GeoAcess
date of the last update. The central portion of the page
showcases the plots corresponding to the user’s selection.</p>
        <p>In the provided image, two graphs from the Brazil agency
are displayed. The upper graph represents new cases per
day, while the lower graph represents new daily deaths.</p>
      </sec>
      <sec id="sec-2-6">
        <title>The GeoAcess project, a partnership between Fiocruz</title>
        <p>and IBGE researchers, studies the geographical access
to health services in Brazil by using a validated
methodology and software (ACCESSMOD 5) developed by the
World Health Organization. This project is funded by a
research grant called “Fiocruz Inova: Geração de
Conhecimento - Novos Talentos” from Fiocruz.</p>
        <p>PCDaS provided its technical expertise to host the
software execution and the necessary databases, which
include massive raster data, helping the researchers to gain
insights from the results with custom data visualizations.
4.6. Academic researches</p>
      </sec>
      <sec id="sec-2-7">
        <title>Among the services provided by PCDaS, the platform</title>
        <p>also supports academic master’s and doctoral projects.
Building a network of partnerships, PCDaS provides
infrastructure and assistance for academic research and
provides a plan for academic users, whose services are
described in Table 2.</p>
        <p>Among the master and doctoral projects, to date,
PCDaS has a total of 7 supported projects, which have
generated dissertations and articles in the area of health
[52, 53].</p>
        <p>With graduate students from diferent educational
institutions, such as the National School of Public Health
Sérgio Arouca (ENSP), Federal Centers for Technological
Education of Rio de Janeiro (CEFET/RJ), Graduate
Program in Information and Communication in Health
(PPGICS – ICICT/Fiocruz), Federal University of Rio de Janeiro
(UFRJ) and National Laboratory of Scientific Computing</p>
      </sec>
      <sec id="sec-2-8">
        <title>The project’s impressive achievements throughout its</title>
        <p>trajectory garnered significant attention and recognition.
It recorded over 300,000 accesses and nearly 800,000
sessions, highlighting its widespread usage and impact. As a
testament to its success, the project was honored with the</p>
      </sec>
    </sec>
    <sec id="sec-3">
      <title>5. Conclusions</title>
      <p>the SUS by aiding, with data and technological tools, the
implementation and monitoring of health policies for the
Brazilian population. To ensure the success of PCDaS, we
have planned to (i) expand our institutional partnerships,
(ii) provide ongoing training for health data scientists,
(iii) make new datasets available, and (iv) continuously
improve the software ecosystem that supports the
Platform.</p>
    </sec>
    <sec id="sec-4">
      <title>Acknowledgments</title>
      <sec id="sec-4-1">
        <title>This work was supported by Institute for Scientic</title>
        <p>and Technological Communication and Information on
Health (Icict/Fiocruz) in partnership with the National
Laboratory of Scientific Computing (LNCC). Research
Fund of the Rio de Janeiro State Research Foundation
(FAPERJ), National Council for Scientific and
Technological Development (CNPq), Bill and Melinda Gates
Foundation, INOVA Fiocruz Program, Higher Education
Personnel (CAPES). Various PCDaS partners provided additional
funding. We also thank for contributing Artur Ziviani (in
memoriam), João Vitor Maués Dias Carneiro, Jorge
Nundes, Leandro Zirondi, Marcelo Rabaço, Marcus Vinicius
Carneiro Magalhães, Matheus Ferreira. Special thanks
to Igor da Silva Morais, who materialized the ideas and
aspirations of the PCDaS, "live long and prosper"!</p>
      </sec>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          <article-title>forma-publica-e-gratuita-de-dados-da-saude-gan [33] GCE, Breastfeeding in brazil in the matreci model:</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          <source>ha-versao-20</source>
          . Mapping, trend, clustering and impact,
          <year>2021</year>
          . URL: [20]
          <string-name>
            <surname>M. D. Wilkinson</surname>
            ,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Dumontier</surname>
            ,
            <given-names>I. J.</given-names>
          </string-name>
          <string-name>
            <surname>Aalbersberg</surname>
          </string-name>
          , https://gcgh.grandchallenges.org/grant/breastf ee
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          <article-title>The fair guiding principles for scientific data man-</article-title>
          [34]
          <string-name>
            <surname>P. de Moraes Mello Boccolini</surname>
          </string-name>
          , L. Baroni,
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          <article-title>agement and stewardship</article-title>
          ,
          <source>Scientific data 3</source>
          (
          <string-name>
            <surname>2016) L. de Almeida</surname>
          </string-name>
          Relvas-Brandt, R. de Abreu
        </mixed-citation>
      </ref>
      <ref id="ref5">
        <mixed-citation>
          1-
          <fpage>9</fpage>
          .
          <string-name>
            <given-names>Junqueira</given-names>
            <surname>Gritz</surname>
          </string-name>
          , et al.,
          <article-title>Brazilian spatial</article-title>
          , demo[21]
          <string-name>
            <given-names>J.</given-names>
            <surname>Wise</surname>
          </string-name>
          ,
          <string-name>
            <surname>A. G. de Barron</surname>
            ,
            <given-names>A.</given-names>
          </string-name>
          <string-name>
            <surname>Splendiani</surname>
            ,
            <given-names>B.</given-names>
          </string-name>
          <article-title>Balali- graphic, and socioeconomic data from 1996 to</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref6">
        <mixed-citation>
          <string-name>
            <surname>Mood</surname>
            ,
            <given-names>D.</given-names>
          </string-name>
          <string-name>
            <surname>Vasant</surname>
            , E. Little, G. Mellino,
            <given-names>I. Harrow</given-names>
          </string-name>
          ,
          <year>2020</year>
          .,
          <source>BMC Research Notes</source>
          <volume>15</volume>
          (
          <year>2022</year>
          )
          <fpage>159</fpage>
          -
          <lpage>159</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref7">
        <mixed-citation>
          <string-name>
            <given-names>I.</given-names>
            <surname>Smith</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Taubert</surname>
          </string-name>
          , et al.,
          <source>Implementation</source>
          and rele- [35]
          <string-name>
            <given-names>R. F. S.</given-names>
            <surname>Alves</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C. S.</given-names>
            <surname>Boccolini</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L. R.</given-names>
            <surname>Baroni</surname>
          </string-name>
          , P. d. M. M.
        </mixed-citation>
      </ref>
      <ref id="ref8">
        <mixed-citation>
          r&amp;d, Drug discovery today
          <volume>24</volume>
          (
          <year>2019</year>
          )
          <fpage>933</fpage>
          -
          <lpage>938</lpage>
          .
          <article-title>a dataset from 1998 to 2020</article-title>
          , BMC Research Notes [22]
          <string-name>
            <given-names>G. C.</given-names>
            <surname>Brazil</surname>
          </string-name>
          , Quem somos, ????. URL: https://www. 16 (
          <year>2023</year>
          )
          <fpage>63</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref9">
        <mixed-citation>
          grandchallengesbrazil.org/quem-somos/. [36]
          <string-name>
            <given-names>P.</given-names>
            <surname>d. M. M. Boccolini</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C. S.</given-names>
            <surname>Boccolini</surname>
          </string-name>
          , L. de Almeida [23]
          <string-name>
            <given-names>G. C.</given-names>
            <surname>Brazil</surname>
          </string-name>
          , Ciência de dados - chamadas, ????. URL:
          <string-name>
            <surname>Relvas-Brandt</surname>
            ,
            <given-names>R. F. S.</given-names>
          </string-name>
          <string-name>
            <surname>Alves</surname>
          </string-name>
          , Dataset on child vac-
        </mixed-citation>
      </ref>
      <ref id="ref10">
        <mixed-citation>https://www.grandchallengesbrazil.org/chamada/ cination in brazil from 1996 to 2021, Scientific Data</mixed-citation>
      </ref>
      <ref id="ref11">
        <mixed-citation>
          <string-name>
            <surname>ciencia-</surname>
          </string-name>
          de-dados/.
          <volume>10</volume>
          (
          <year>2023</year>
          )
          <fpage>23</fpage>
          . [24]
          <string-name>
            <surname>PCDaS</surname>
          </string-name>
          , Observatório obstétrico brasileiro,
          <year>2022</year>
          . [37]
          <string-name>
            <given-names>C. L.</given-names>
            <surname>Szwarcwald</surname>
          </string-name>
          , C. S. e. a. Boccolini, Covid-
          <fpage>19</fpage>
        </mixed-citation>
      </ref>
      <ref id="ref12">
        <mixed-citation>
          URL: https://pcdas.icict.fiocruz.br/rede- de- par mortality in brazil, 2020-
          <fpage>21</fpage>
          : consequences of the
        </mixed-citation>
      </ref>
      <ref id="ref13">
        <mixed-citation>
          <article-title>cerias/observatorio-obstetrico-brasileiro/. pandemic inadequate management</article-title>
          ,
          <source>Archives of [25] Fiocruz, Conheça o observatório obstétrico Public Health</source>
          <volume>80</volume>
          (
          <year>2022</year>
          )
          <fpage>255</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref14">
        <mixed-citation>
          <string-name>
            <surname>brasileiro</surname>
          </string-name>
          covid-19
          <source>(oobr covid-19)</source>
          ,
          <year>2021</year>
          . URL: [38]
          <string-name>
            <given-names>G. R. e. a. Salles R</given-names>
            ,
            <surname>Ribeiro</surname>
          </string-name>
          <string-name>
            <surname>VPD</surname>
          </string-name>
          , A comprehen-
        </mixed-citation>
      </ref>
      <ref id="ref15">
        <mixed-citation>
          <article-title>o-mulher/conheca-o-observatorio-obstetrico-bra from 2005 to 2021</article-title>
          ., PREPRINT available at Research
        </mixed-citation>
      </ref>
      <ref id="ref16">
        <mixed-citation>
          <string-name>
            <surname>sileiro-covid-</surname>
          </string-name>
          19
          <string-name>
            <surname>-</surname>
          </string-name>
          oobr-covid-
          <volume>19</volume>
          /. Square (
          <year>2022</year>
          ) (????). doi:https://doi.org/10 [26]
          <string-name>
            <surname>OOBr</surname>
          </string-name>
          , Oobr srag,
          <year>2021</year>
          . URL: https://observatorio .21203/rs.3.rs-
          <volume>2358225</volume>
          /v1.
        </mixed-citation>
      </ref>
      <ref id="ref17">
        <mixed-citation>
          obstetrico.shinyapps.io/covid_gesta_puerp_br/. [39]
          <string-name>
            <given-names>R. J.P.</given-names>
            , G. Oliveira, N. J.
            <surname>C.D.</surname>
          </string-name>
          ,
          <string-name>
            <surname>B. A.M.G.</surname>
          </string-name>
          ,
          <string-name>
            <surname>B. Â.J.G</surname>
          </string-name>
          , [
          <volume>27</volume>
          ]
          <string-name>
            <surname>PCDaS</surname>
          </string-name>
          ,
          <article-title>Com o apoio da pcdas, disponibilização de Impact of clinical and socio-economic factors and</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref18">
        <mixed-citation>
          mães brasileiras são alvo de pesquisa,
          <year>2023</year>
          . URL: elderly,
          <source>Geriatr Gerontol Aging</source>
          <volume>11</volume>
          (
          <year>2017</year>
          )
          <fpage>124</fpage>
          -
          <lpage>132</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref19">
        <mixed-citation>https://pcdas.icict.fiocruz.br/publicacoes/com-o-a doi:https://doi.org/10.5327/Z2447- 211</mixed-citation>
      </ref>
      <ref id="ref20">
        <mixed-citation>
          <article-title>poio-da-pcdas-disponibilizacao-de-dados-e-anali 520171700051</article-title>
          .
        </mixed-citation>
      </ref>
      <ref id="ref21">
        <mixed-citation>
          <article-title>ses-sobre-mortalidade-e-morbidade-das-</article-title>
          <string-name>
            <surname>maes-</surname>
          </string-name>
          b [40]
          <string-name>
            <surname>M. D.C.</surname>
            ,
            <given-names>S. A. da.</given-names>
          </string-name>
          , G. C.S.,
          <string-name>
            <surname>S. S.R.</surname>
          </string-name>
          ,
          <string-name>
            <surname>O. M.</surname>
          </string-name>
          <year>de</year>
          .,
          <string-name>
            <surname>S. L.M.V.</surname>
          </string-name>
          ,
        </mixed-citation>
      </ref>
      <ref id="ref22">
        <mixed-citation>
          <article-title>rasileiras-sao-</article-title>
          <string-name>
            <surname>alvo-</surname>
            de-pesquisa/. C. R.B.,
            <given-names>P. C.A.</given-names>
          </string-name>
          ,
          <string-name>
            <surname>R.-N. E.L.G.</surname>
          </string-name>
          ,
          <article-title>Monitoring the [28] PCDaS, Pesquisa analisa o impacto das distâncias goals of the plans for coping with chronic non-</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref23">
        <mixed-citation>
          <source>do parto</source>
          ,
          <year>2023</year>
          . URL: https://pcdas.icict.fiocruz.br/ health survey, brazil,
          <year>2013</year>
          and 2019, Epidemiolo-
        </mixed-citation>
      </ref>
      <ref id="ref24">
        <mixed-citation>
          <article-title>publicacoes/pesquisa-analisa-o-impacto-das-dis gia e</article-title>
          <source>Serviços de Saúde</source>
          <volume>31</volume>
          (
          <year>spe1</year>
          ) (
          <year>2022</year>
          )
          <article-title>e2021364</article-title>
          .
        </mixed-citation>
      </ref>
      <ref id="ref25">
        <mixed-citation>
          <article-title>tancias-percorridas-por-gestantes-brasileiras-par doi</article-title>
          :https://doi.org/10.1590/SS2237-962
        </mixed-citation>
      </ref>
      <ref id="ref26">
        <mixed-citation>
          <article-title>a-realizacao-do-parto/</article-title>
          . 2202200008.especial. [29]
          <string-name>
            <surname>PCDaS</surname>
          </string-name>
          , Comunidade de analistas de dados utiliza [41]
          <string-name>
            <surname>C. M. Peixer</surname>
            ,
            <given-names>T. R.</given-names>
          </string-name>
          <string-name>
            <surname>Camargo</surname>
            ,
            <given-names>L. L. L.</given-names>
          </string-name>
          <string-name>
            <surname>Silva</surname>
            ,
            <given-names>L. A.</given-names>
          </string-name>
        </mixed-citation>
      </ref>
      <ref id="ref27">
        <mixed-citation>
          <article-title>a pcdas em oficina presencial</article-title>
          ,
          <year>2023</year>
          . URL: https: Colnago,
          <string-name>
            <given-names>L. L.</given-names>
            <surname>Ferronatto</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Lindenberg</surname>
          </string-name>
          , O
        </mixed-citation>
      </ref>
      <ref id="ref28">
        <mixed-citation>
          //pcdas.icict.fiocruz.br/publicacoes/comunidade uso de tabaco e
          <article-title>o desenvolvimento do covid-19 em</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref29">
        <mixed-citation>
          <string-name>
            <surname>-de-</surname>
          </string-name>
          analistas-de
          <article-title>-dados-utiliza-a-pcdas-em-oficin adultos de 18 a 59 anos</article-title>
          , uma breve revisão de litera-
        </mixed-citation>
      </ref>
      <ref id="ref30">
        <mixed-citation>
          <article-title>a-presencial/. ture / tobacco use and the development</article-title>
          of covid-
          <volume>19</volume>
          [30]
          <string-name>
            <surname>PCDaS</surname>
          </string-name>
          ,
          <article-title>Pcdas encerra segundo ciclo de apoio ao in adults aged 18 to 59 years, a brief literature re-</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref31">
        <mixed-citation>
          <article-title>gce/gates com avanços e conquistas, 2023. URL: ht view</article-title>
          .,
          <source>Brazilian Journal of Development</source>
          <volume>8</volume>
          (
          <issue>3</issue>
          ) (
          <year>2022</year>
          )
        </mixed-citation>
      </ref>
      <ref id="ref32">
        <mixed-citation>
          tps://pcdas.icict.fiocruz.br/publicacoes/pcdas-enc 19226-
          <fpage>19246</fpage>
          . doi:https://doi.org/10.34117
        </mixed-citation>
      </ref>
      <ref id="ref33">
        <mixed-citation>
          <article-title>erra-segundo-ciclo-de-apoio-ao-gce-gates-com-a /bjdv8n3-253.</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref34">
        <mixed-citation>
          <string-name>
            <surname>vancos-</surname>
            e-conquistas/. [42]
            <given-names>A. MELO MENDONÇA</given-names>
          </string-name>
          , M.;
          <string-name>
            <surname>SIQUEIRA</surname>
            <given-names>ROCHA</given-names>
          </string-name>
          , [
          <volume>31</volume>
          ]
          <string-name>
            <surname>AmpliaSaude</surname>
          </string-name>
          , Amplia saude site,
          <year>2021</year>
          . URL: https: Expectativa de vida com e sem doença crônica de
        </mixed-citation>
      </ref>
      <ref id="ref35">
        <mixed-citation>
          //ampliasaude.org/en/. coluna no brasil:
          <article-title>Estudo comparativo a partir da [32] PCDaS</article-title>
          , Vax*sim,
          <year>2021</year>
          . URL: https://pcdas.icict.fio pesquisa nacional de saúde, nos anos de
          <year>2013</year>
          a
          <year>2019</year>
          .,
        </mixed-citation>
      </ref>
      <ref id="ref36">
        <mixed-citation>
          cruz.br/rede-de-parcerias/covac/. Refas - Revista
          <source>Fatec Zona Sul</source>
          <volume>9</volume>
          , n.
          <volume>2</volume>
          (
          <year>2022</year>
          )
          <fpage>49</fpage>
          -
          <lpage>62</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref37">
        <mixed-citation>doi:DOI:10.26853/Refas_ISSN-2359-182X_</mixed-citation>
      </ref>
      <ref id="ref38">
        <mixed-citation>
          v09n02_
          <fpage>08</fpage>
          . [43]
          <string-name>
            <given-names>A.</given-names>
            <surname>Ogata</surname>
          </string-name>
          , Rh pra você,
          <year>2022</year>
          . URL: https://rhpravoc
        </mixed-citation>
      </ref>
      <ref id="ref39">
        <mixed-citation>
          <article-title>a-os-gestores-em-saude-corporativa/</article-title>
          . [44] R. d. Saldanha,
          <string-name>
            <given-names>D. R.</given-names>
            <surname>Xavier</surname>
          </string-name>
          , M. d. Magalhães,
          <string-name>
            <surname>P. R.</surname>
          </string-name>
        </mixed-citation>
      </ref>
      <ref id="ref40">
        <mixed-citation>
          <string-name>
            <given-names>Souza</given-names>
            <surname>Junior</surname>
          </string-name>
          , M. d. Pedroso, C. Barcellos,
          <volume>14</volume>
        </mixed-citation>
      </ref>
      <ref id="ref41">
        <mixed-citation>
          <string-name>
            <surname>-</surname>
          </string-name>
          monitoracovid-19: Informação e disseminação
        </mixed-citation>
      </ref>
      <ref id="ref42">
        <mixed-citation>
          <article-title>Covid-19 no Brasil: cenários epidemiológicos e vig-</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref43">
        <mixed-citation>
          <article-title>ilância em saúde (</article-title>
          <year>2021</year>
          )
          <fpage>229</fpage>
          -
          <lpage>249</lpage>
          . doi:
          <volume>10</volume>
          .7476/97
        </mixed-citation>
      </ref>
      <ref id="ref44">
        <mixed-citation>
          86557081211.
          <fpage>0015</fpage>
          . [45]
          <string-name>
            <given-names>F.</given-names>
            <surname>Grandin</surname>
          </string-name>
          , Ferramentas criadas por pesquisadores
        </mixed-citation>
      </ref>
      <ref id="ref45">
        <mixed-citation>
          <source>avírus no brasil</source>
          ,
          <year>2020</year>
          . URL: https://g1.globo.com/b
        </mixed-citation>
      </ref>
      <ref id="ref46">
        <mixed-citation>emestar/coronavirus/noticia/2020/04/01/f errame</mixed-citation>
      </ref>
      <ref id="ref47">
        <mixed-citation>
          rasil.ghtml. [46]
          <string-name>
            <surname>CONASS</surname>
          </string-name>
          , Monitoracovid-
          <volume>19</volume>
          : ferramenta online per-
        </mixed-citation>
      </ref>
      <ref id="ref48">
        <mixed-citation>
          <string-name>
            <surname>dia</surname>
          </string-name>
          ,
          <year>2020</year>
          . URL: https://www.conass.org.br/monito
        </mixed-citation>
      </ref>
      <ref id="ref49">
        <mixed-citation>
          <string-name>
            <surname>racovid-</surname>
          </string-name>
          19
          <article-title>-f erramenta-online-permite-monitorar</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref50">
        <mixed-citation>
          <article-title>-avanco-da-epidemia-no-brasil-dia-a-dia/</article-title>
          . [47]
          <string-name>
            <given-names>A. L.</given-names>
            <surname>Azevedo</surname>
          </string-name>
          ,
          <article-title>Sistema que monitora covid-19</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref51">
        <mixed-citation>
          <source>em xeque redução do isolamento</source>
          ,
          <year>2020</year>
          . URL: https:
        </mixed-citation>
      </ref>
      <ref id="ref52">
        <mixed-citation>
          <article-title>ue-monitora-covid-19-mostra-avanco-do-coron</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref53">
        <mixed-citation>
          <fpage>lamento</fpage>
          -
          <lpage>24359218</lpage>
          . [48]
          <string-name>
            <given-names>A.</given-names>
            <surname>Bezerra</surname>
          </string-name>
          , Monitoracovid-19 avalia desigualdades
        </mixed-citation>
      </ref>
      <ref id="ref54">
        <mixed-citation>
          no processo de vacinação,
          <year>2021</year>
          . URL: https://agen
        </mixed-citation>
      </ref>
      <ref id="ref55">
        <mixed-citation>
          cia.fiocruz.br/monitoracovid-19
          <string-name>
            <surname>-</surname>
          </string-name>
          avalia-desiguald
        </mixed-citation>
      </ref>
      <ref id="ref56">
        <mixed-citation>
          <article-title>ades-no-</article-title>
          <string-name>
            <surname>processo-</surname>
          </string-name>
          de-vacinacao. [49]
          <string-name>
            <surname>Plox</surname>
          </string-name>
          ,
          <article-title>Covid-19 ganha força com a vacinação estag-</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref57">
        <mixed-citation>
          <string-name>
            <surname>nada</surname>
          </string-name>
          ,
          <year>2022</year>
          . URL: https://plox.com.br/noticia/01/07/
        </mixed-citation>
      </ref>
      <ref id="ref58">
        <mixed-citation>
          2022/covid-19
          <string-name>
            <surname>-</surname>
          </string-name>
          ganha
          <article-title>-f orca-com-a-vacinacao-est</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref59">
        <mixed-citation>agnada. [50] MPF, AÇÃo civil pÚblica com pedido de tutela pro-</mixed-citation>
      </ref>
      <ref id="ref60">
        <mixed-citation>
          visÓria de urgÊncia,
          <year>2021</year>
          . URL: https://www.mpf.
        </mixed-citation>
      </ref>
      <ref id="ref61">
        <mixed-citation>mp.br/df /sala-de-imprensa/docs/copy2_of_ACP_</mixed-citation>
      </ref>
      <ref id="ref62">
        <mixed-citation>
          DF.pdf . [51]
          <string-name>
            <surname>ENAP</surname>
          </string-name>
          ,
          <article-title>Saíram os vencedores do desafios covid-19,</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref63">
        <mixed-citation>2020. URL: https://www.enap.gov.br/pt/acontece/</mixed-citation>
      </ref>
      <ref id="ref64">
        <mixed-citation>
          -
          <fpage>19</fpage>
          . [52]
          <string-name>
            <surname>Tarini de Souza Faria</surname>
          </string-name>
          ,
          <source>Painel unificador covid-19</source>
        </mixed-citation>
      </ref>
      <ref id="ref65">
        <mixed-citation>
          do rio de janeiro,
          <year>2022</year>
          . URL: https://pcdas.icict.fio
        </mixed-citation>
      </ref>
      <ref id="ref66">
        <mixed-citation>cruz.br/wp-content/uploads/2022/09/Projeto-d</mixed-citation>
      </ref>
      <ref id="ref67">
        <mixed-citation>
          <article-title>e-mestrado-</article-title>
          <string-name>
            <surname>Tarini-de-</surname>
            Souza-Faria.pdf . [53]
            <given-names>L.</given-names>
          </string-name>
          <string-name>
            <surname>Baroni</surname>
            ,
            <given-names>B.</given-names>
          </string-name>
          <string-name>
            <surname>Paixão</surname>
            ,
            <given-names>A.</given-names>
          </string-name>
          <string-name>
            <surname>Chrispino</surname>
          </string-name>
          , G. Guedes, C. Bar-
        </mixed-citation>
      </ref>
      <ref id="ref68">
        <mixed-citation>
          <string-name>
            <surname>SBC</surname>
          </string-name>
          ,
          <string-name>
            <surname>Porto</surname>
            <given-names>Alegre</given-names>
          </string-name>
          ,
          <string-name>
            <surname>RS</surname>
          </string-name>
          , Brasil,
          <year>2019</year>
          . URL: https://so
        </mixed-citation>
      </ref>
      <ref id="ref69">
        <mixed-citation>l.sbc.org.br/index.php/bresci/article/view/10025.</mixed-citation>
      </ref>
      <ref id="ref70">
        <mixed-citation>
          <source>doi:10</source>
          .5753/bresci.
          <year>2019</year>
          .
          <volume>10025</volume>
          . [54]
          <string-name>
            <given-names>Letícia</given-names>
            <surname>Ange</surname>
          </string-name>
          <string-name>
            <surname>Pozza</surname>
          </string-name>
          , Projeto basis: Estudo de fer-
        </mixed-citation>
      </ref>
      <ref id="ref71">
        <mixed-citation>
          <article-title>experimentação e entendimento dos dados</article-title>
          ,
          <year>2022</year>
          .
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>