<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta />
    <article-meta>
      <title-group>
        <article-title>Overview of iDPP@CLEF 2024: The Intelligent Disease Progression Prediction Challenge</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <string-name>Giovanni Birolo</string-name>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Pietro Bosoni</string-name>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Guglielmo Faggioli</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Helena Aidos</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Roberto Bergamaschi</string-name>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Paola Cavalla</string-name>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Adriano Chiò</string-name>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Arianna Dagliati</string-name>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Mamede de Carvalho</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Giorgio Maria Di Nunzio</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Piero Fariselli</string-name>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Jose Manuel García Dominguez</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Marta Gromicho</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Alessandro Guazzo</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Enrico Longato</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Sara C. Madeira</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Umberto Manera</string-name>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Stefano Marchesin</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Laura Menotti</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Gianmaria Silvello</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Eleonora Tavazzi</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Erica Tavazzi</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Isotta Trescato</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Martina Vettoretti</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Barbara Di Camillo</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Nicola Ferro</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <aff id="aff0">
          <label>0</label>
          <institution>Gregorio Marañon Hospital in Madrid</institution>
          ,
          <country country="ES">Spain</country>
        </aff>
        <aff id="aff1">
          <label>1</label>
          <institution>IRCCS Foundation C. Mondino in Pavia</institution>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff2">
          <label>2</label>
          <institution>University of Lisbon</institution>
          ,
          <addr-line>Lisbon</addr-line>
          ,
          <country country="PT">Portugal</country>
        </aff>
        <aff id="aff3">
          <label>3</label>
          <institution>University of Padua</institution>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff4">
          <label>4</label>
          <institution>University of Pavia</institution>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff5">
          <label>5</label>
          <institution>University of Turin</institution>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff6">
          <label>6</label>
          <institution>Città della Salute e della Scienza”</institution>
          ,
          <addr-line>Turin</addr-line>
          ,
          <country country="IT">Italy</country>
        </aff>
      </contrib-group>
      <pub-date>
        <year>2024</year>
      </pub-date>
      <abstract>
        <p>Multiple Sclerosis (MS) and Amyotrophic Lateral Sclerosis (ALS) are neurodegenerative diseases characterized by progressive or fluctuating impairments in motor, sensory, visual, and cognitive functions. Patients with these diseases endure significant physical, psychological, and economic burdens due to hospitalizations and home care while grappling with uncertainty about their conditions. AI tools hold promise for aiding patients and clinicians by identifying the need for intervention and suggesting personalized therapies throughout disease progression. The objective of iDPP@CLEF is to develop AI-based approaches to describe the progression of these diseases. The ultimate goal is to enable patient stratification and predict disease progression, thereby assisting clinicians in providing timely care. iDPP@CLEF 2024 continues the work of the previous editions, iDPP@CLEF 2022 and 2023. The 2022 edition focused on predicting ALS progression and utilizing explainable AI. The 2023 edition expanded on this by including environmental data and introduced a new task for predicting MS progression. This edition extends the MS dataset with environmental data and introduces two new ALS tasks aimed at predicting disease progression using data from wearable devices. This marks the first iDPP edition to utilize prospective data directly collected from patients involved in the BRAINTEASER project.</p>
      </abstract>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>1. Introduction</title>
      <p>Amyotrophic Lateral Sclerosis (ALS) and Multiple Sclerosis (MS) are two severe and impactful diseases
that cause progressive neurological impairment in individuals living with them. The progression of
these diseases is typically heterogeneous, resulting in significant variability in aspects such as treatment,
outcomes, quality of life, and overall patient needs. This variability presents challenges not only for
patients but also for clinicians and caregivers.</p>
      <p>For example, patients with ALS often need specific treatments like Non-Invasive Ventilation (NIV) or
Percutaneous Endoscopic Gastrostomy (PEG) at certain stages of their disease progression. Similarly, MS
patients may experience debilitating relapses that severely impact their quality of life. Therefore, it
would be highly beneficial to anticipate the needs of individuals afected by these diseases to provide
them with the most timely and efective care. However, the heterogeneous nature of these conditions
makes it challenging to develop efective prognostic tools that work the same and are efective for every
patient.</p>
      <p>This underscores the importance of creating automatic tools to assist clinicians in decision-making
throughout disease progression, facilitating personalized therapeutic choices. In particular, developing
new automatic predictive approaches based on AI requires a proper framework for designing and
evaluating diferent tasks, such as:
• Stratifying patients according to their phenotype throughout disease evolution.
• Predicting disease progression in a probabilistic, time-dependent manner.
• Providing a better and more explainable understanding of the mechanisms underlying MS and</p>
      <p>ALS.</p>
      <p>A key aspect is that these approaches should rely on shared resources that enable proper benchmarking,
comparable, and reproducible experimentation. In fact, only by properly measuring and comparing the
efectiveness of the various developed tools we can understand how to improve them. The Intelligent
Disease Progression Prediction at CLEF (iDPP@CLEF) Lab aims to provide an evaluation infrastructure for
developing such AI algorithms. iDPP proposes to go beyond the current state of the art by systematically
addressing issues related to applying AI in clinical practice for ALS and MS. In addition to defining risk
scores based on the probability of short- or long-term events, iDPP@CLEF also focuses on providing
clinicians with structured and understandable data.</p>
      <p>iDPP@CLEF 2024 is the final iteration of an evaluation cycle begun in 2022, comprising three
challenges aimed at fostering reproducible and comparable evaluation of AI-based approaches for
predicting the progression of ALS and MS. The first edition, iDPP@CLEF 2022, focused exclusively
on ALS, challenging participants to predict the probability that patients would need specific medical
treatments based on their medical history. The second edition, iDPP@CLEF 2023, not only built upon
iDPP@CLEF 2022 by extending its dataset with environmental data to determine the impact of the
environment on patient needs, but it also introduced a new task to predict the risk for patients living
with MS to undergo deterioration.</p>
      <p>This final edition, iDPP@CLEF 2024, further extends the 2023 dataset by including environmental
data for MS patients to measure the impact of pollution and external environmental factors on MS
progression. Additionally, two new tasks have been introduced: predicting the progression of ALS,
measured by the ALSFRS-R scale, based on the patient’s clinical history and data obtained from wearable
devices and sensors.</p>
      <p>The paper is organized as follows: Section 2 presents related challenges; Section 3 describes its tasks;
Section 4 discusses the developed dataset; Section 5 explains the setup of the Lab and introduces the
participants; Section 6 introduces the evaluation measures adopted to score the runs; Section 7 analyzes
the experimental results for the diferent tasks; finally, Section 8 draws some conclusions and outlooks
some future work.</p>
      <p>
        This is an extended version of the condensed overview for the iDPP@CLEF 2024 Lab [
        <xref ref-type="bibr" rid="ref1">1</xref>
        ].
      </p>
    </sec>
    <sec id="sec-2">
      <title>2. Related Challenges</title>
      <p>There have been no other Labs on this or similar topics within CLEF before the start of iDPP@CLEF.
iDPP@CLEF 2022 and 2023 were the first two iterations of the Lab and the current is the third.</p>
      <p>
        While no major challenges – besides iDPP@CLEF 2023 – regarding MS have been carried out yet, more
interest has been shown toward ALS. In particular, three major challenges were organized on this topic:
the DREAM 7 ALS Prediction challenge1 in 2012 and the DREAM ALS Stratification challenge 2 in 2015
and a Kaggle challenge3 in 2021. The DREAM 7 ALS Prediction challenge consisted of using 3 months
of ALS clinical trial information (months 0–3) to predict the future progression of the disease (months
3–12), expressed as the slope of change in ALS Functional Rating Scale Revisited (ALSFRS-R) [
        <xref ref-type="bibr" rid="ref2">2</xref>
        ]. Later on,
the DREAM ALS Stratification challenge [
        <xref ref-type="bibr" rid="ref3">3</xref>
        ] required participants to stratify ALS into subgroups based
on their characteristics, to understand patient profiles better and provide personalized ALS treatments.
Finally, the Kaggle challenge employed clinical and genomic data to obtain a better understanding
of the mechanisms underlying ALS and determine why some people with ALS tend to have a faster
progression of the disease compared to others.
      </p>
      <p>
        At the current time, most of the datasets used to evaluate AI algorithms for MS are based on closed
and proprietary datasets. In this sense iDPP@CLEF paved the way for a reproducible and efectively
open science in the research domain of the AI used for predicting the progression of MS.
2.1. iDPP@CLEF 2022
iDPP@CLEF 20224 [
        <xref ref-type="bibr" rid="ref4 ref5">4, 5</xref>
        ] was the first edition of the Lab and concerned exclusively the ALS disease
progression prediction. Being the pilot Lab, a large share of efort was devoted to understanding the
challenges and limitations linked to the shared evaluation campaigns, when it comes to AI applied in
the medical domain. iDPP@CLEF 2022 was organized into 3 tasks:
• Pilot Task 1 - Ranking Risk of Impairment: The focus of the first task of iDPP@CLEF
2022 was on ranking patients based on the risk of impairment, defined as the need for specific
medical treatments, such as NIV, PEG, or death. Participants were given information on the
motor functioning of the patients, measured according to the ALSFRS-R scale [
        <xref ref-type="bibr" rid="ref2">2</xref>
        ], in time and
were asked to rank patients based on the time-to-event risk of experiencing impairment in each
specific domain.
• Pilot Task 2 - Predicting Time of Impairment: it refined Task 1 by asking participants to
predict when specific impairments will occur (i.e. in the correct time window). In this regard, The
task focused on assessing model calibration in terms of the ability of the proposed algorithms to
estimate the probability of an event close to the true probability within a specified time window.
• Position Paper Task 3 - Explainability of Artificial Intelligence ( AI) algorithms: The task
focused on the evaluation and discussion of AI-based explainable frameworks for intelligent
disease progression prediction able to explain the multivariate nature of the data and the model
predictions.
      </p>
      <p>
        One of the major outputs of iDPP@CLEF 2022 was the 3 datasets released. In particular, the datasets
contain data for the prediction of specific events related to ALS. Such datasets are fully anonymized
retrospective details about 2250 real patients. The patients were recruited from two medical institutions
in Turin, Italy, and Lisbon, Portugal. The datasets contain static data about patients (e.g. age, onset
date, gender) and event data (i.e. 18,512 ALSFRS-R questionnaires and 4,015 spyrometries). 6 groups
participated in iDPP@CLEF 2022 and submitted a total of 120 runs.
2.2. iDPP@CLEF 2023
Similarly to iDPP@CLEF 2022, also iDPP@CLEF 20235 [
        <xref ref-type="bibr" rid="ref6">6, 7</xref>
        ] were organized into three tasks, focusing
on either ALS or MS. More in detail, Tasks 1 and 2 of iDPP@CLEF 2023 concerned MS, while Task
3 built upon iDPP@CLEF 2022 and extended the ALS tasks of the previous iteration of the Lab. To
summarize iDPP@CLEF 2023 tasks:
• Task 1: Predicting Risk of Disease Worsening (MS) This task focused on predicting the
probability that, given the history of the patient, they would undergo a worsening, according to
two diferent definitions of worsening.
4https://brainteaser.health/open-evaluation-challenges/idpp-2022/
5https://brainteaser.dei.unipd.it/challenges/idpp2023/
• Task 2: Predicting Cumulative Probability of Worsening (MS) The second task had a similar
objective to task 1, with the major diference that, instead of predicting the risk at an absolute
level, participants were required to predict the cumulative probability of worsening over 10 years.
• Task 3: Position Papers on the Impact of Exposition to Pollutants (ALS) The third task
extended the first task of iDPP@CLEF 2022 and concerned the ranking of the patients based on
the risk of impairment. The major diference to iDPP@CLEF 2022 was that participants were
given environmental data to determine if such data was a good predictor of the risk of impairment.
iDPP@CLEF 2023 extended the iDPP@CLEF 2022 datasets with three 2 datasets for MS. In particular,
such datasets contained static data about patients, MS-related details (e.g., the EDSS score, results of
MRIs, evoked potentials measures), and a label indicating if the patient underwent a worsening, based
on the worsening definitions of Task 1 and 2. 10 teams submitted a total o 163 runs at the end of
iDPP@CLEF 2023.
      </p>
    </sec>
    <sec id="sec-3">
      <title>3. Tasks</title>
      <p>In the remainder of this section, we describe each task in more detail.</p>
      <sec id="sec-3-1">
        <title>3.1. Task 1: Predicting ALSFRS-R Score from Sensor Data (ALS)</title>
        <p>Task 1 focuses on predicting the twelve scores of the ALSFRS-R (ALS Functional Rating Scale - Revised),
assigned by medical doctors roughly every three months, from the sensor data collected via the app.
The ALSFRS-R is a somehow “subjective” evaluation usually performed by a medical doctor and this
task will help in answering a currently open question in the research community, i.e. whether it could
be derived from objective factors.</p>
        <p>Participants were given the ALSFRS-R questionnaire at the first visit with the scores for each
question together with the time (number of days from diagnosis) at which the questionnaire was taken.
Participants will be given the time of the second visit (number of days from diagnosis) together with all
the sensor data up to the time of the second visit.</p>
        <p>Participants had to predict the values of the ALSFRS-R sub-scores at the second visit.</p>
      </sec>
      <sec id="sec-3-2">
        <title>3.2. Task 2: Predicting Patient Self-assessment Score from Sensor Data (ALS)</title>
        <p>The second task concerning ALS focuses on predicting the self-assessment score assigned by patients
from the sensor data collected via the app. Self-assessment scores correspond to each of the ALSFRS-R
scores but, while the latter ones are assigned by medical doctors during visits, the these scores are
assigned via auto-evaluation by patients themselves using the provided app.</p>
        <p>If the self-assessment performed by patients, more frequently than the assessment performed by
medical doctors every three months or so, can be reliably predicted by sensor and app data, we can
imagine a proactive application which, monitoring the sensor data, alerts the patient if an assessment is
needed.</p>
        <p>Participants were given the first set of self-assessed scores together with the time (number of days
from diagnosis) at which the questionnaire was taken. Participants were also given the time of the
second auto-evaluation (number of days from diagnosis) together with all the sensor data up to the
time of the second auto-evaluation. Participants had to predict the values of the self-assessed scores at
the second auto-evaluation, happening one or two months after the first one.</p>
      </sec>
      <sec id="sec-3-3">
        <title>3.3. Task 3: Predicting Relapses from EDSS Sub-scores and Environmental Data (MS)</title>
        <p>The third task focuses on predicting a relapse using environmental data and EDSS (Expanded Disability
Status Scale) sub-scores. This task allows us to assess if exposure to diferent pollutants is a useful
variable in predicting a relapse.</p>
        <p>Participants were asked to predict the week of the first relapse after the baseline considering
environmental data based on a weekly granularity, given the status of the patient at the baseline, which is
the first visit available in the considered time span (after January 1, 2013). For each patient, the date of
the baseline will be week 0 and all the other weeks will be relative to it.</p>
        <p>Participants were given all the environmental data about a patient, i.e. also observations which may
happen after the relapse to be predicted. All the patients are guaranteed to experience, at least, one
relapse after the baseline.</p>
      </sec>
    </sec>
    <sec id="sec-4">
      <title>4. Dataset</title>
      <p>For iDPP@CLEF 2024 we release three datasets: two completely new datasets for ALS and an extension
of the iDPP@CLEF 2023 dataset concerning MS. More in detail, the two new ALS datasets comprise
a common training part with 52 training patients, whose ALSFRS-R scores were both annotated by
the clinicians and self-assessed. Concerning the test sets, 21 and 11 patients were included in them for
Task 1 and Task 2, respectively. Regarding MS, the part of the dataset concerning static variables and
MS-related information is the same as the one used for iDPP@CLEF 2023. The major improvement
regards environmental data that have been added to the dataset.</p>
      <sec id="sec-4-1">
        <title>4.1. Tasks 1 and 2: ASL Dataset with Clinical or self-assessed ALSFRS-R</title>
        <p>The datasets for Task 1 and Task 2 were collected from ALS-diagnosed patients recruited during the
BRAINTEASER project from three centers in Lisbon, Madrid, and Turin. At recruitment, patients were
given a commercial fitness tracker (the Garmin VivoActive 4 smartwatch), and data from its sensors
was collected during a follow-up period with a median duration of 270 days. Patients were encouraged
to wear the watch as much as they were comfortable with, ideally all the time, both while awake and
sleeping. Each day of data for each patient was summarized into a vector of 90 statistics related to
heart rate and beat-to-beat interval, respiration rate, and nocturnal pulse oximetry. Sensor data was not
available every day for each patient.</p>
        <p>During the same period, disease progression was assessed by their clinician using the ALSFRS-R
questionnaire (roughly every three months, following standard clinical practice). Patients also used the
same questionnaire to self-assess their progression through a smartphone app developed specifically by
the BRAINTEASER project. They were prompted for the assessment once per month, though the actual
frequency varied and depended on patient compliance.</p>
        <sec id="sec-4-1-1">
          <title>4.1.1. Creation of the datasets</title>
          <p>Patients with insuficient data were excluded from the challenge dataset. Specifically, this included
those with less than three months of follow-up data, those with more than 50% of sensor data missing,
and those without at least two clinical or self-assessed ALSFRS-R evaluations. After applying these
criteria, a dataset of 83 patients was obtained, with a median of 254 days of sensor data per patient.
These patients and their data were then divided into a training group (common to both Tasks 1 and 2)
and two task-specific testing groups.</p>
        </sec>
        <sec id="sec-4-1-2">
          <title>4.1.2. Split into training and test</title>
          <p>The patients were split into three groups:
training patients with at least two clinical and two self-assessed ALSFRS-R evaluations;
test-ct patients with at least two clinical but without two self-assessed ALSFRS-R evaluations;
test-app patients with at least two self-assessed but without two clinical ALSFRS-R evaluations.
The training set thus included 52 patients with a median of 3.5 clinical and 5 self-assessed ALSFRS-R
evaluations (189 and 301 in total, respectively). The test-ct set (the test set for Task 1) included 21
patients, whose first clinical ALSFRS-R evaluations were included as features and the second evaluations
were the prediction target. The test-app set (the test set for Task 2) included 11 patients and was built
in the same way using the self-assessed ALSFRS-R evaluations. The full available sensor data for all
patients was included in both the training and test datasets, while only the clinical (resp. self-assessed)
ALSFRS-R evaluations were included for Task 1 (resp. Task 2). A comparative description of the datasets
is shown in Table 1.</p>
        </sec>
      </sec>
      <sec id="sec-4-2">
        <title>4.2. Task 3: MS Dataset</title>
        <p>The dataset used for Task 3 in iDPP@CLEF 2024 is structured similarly to those from iDPP@CLEF 2023,
though some features (e.g., evoked potentials, MRIs) were not included, and certain records have been
ifltered based on the purpose of the task.</p>
        <sec id="sec-4-2-1">
          <title>4.2.1. Updates over IDPP@CLEF 2023</title>
          <p>In the 2024 dataset, EDSS data before January 1, 2013 (aligned with the start of environmental data
collection) were filtered, and patients without EDSS follow-ups were removed. Additionally, patients
who did not experience a relapse after their first non-filtered EDSS follow-up (i.e., the baseline for each
patient) were excluded.</p>
          <p>The dataset has been expanded to incorporate environmental data, which includes information on
patients’ exposure to various air pollutants identified as significant public health risks in the latest
World Health Organization (WHO) global air quality guidelines [8], such as particulate matter (PM)
encompassing both PM2.5 (particles with an aerodynamic diameter of 2.5 micrometers or less) and PM10
(particles with an aerodynamic diameter of 10 micrometers or less) - as well as ozone (O3), nitrogen
dioxide (NO2), sulfur dioxide (SO2), carbon monoxide (CO), and several weather factors (including wind
speed, relative humidity, sea level pressure, global radiation, precipitation, and average, minimum, and
maximum temperatures).</p>
          <p>Air pollutant data from public monitoring stations were collected daily from the European Air
Quality Portal using the DiscoMap tool 6. The geographical coordinates (longitude and latitude) of
each monitoring station were matched to specific postcodes, identifying the nearest station to each
patient’s residence postcode. Instead, weather data were gathered daily from the European Climate
Assessment and Dataset station network, which provides access to the E-OBS dataset, a daily gridded
land-only observational dataset over Europe 7. Each grid was matched with the nearest monitoring
station using Euclidean distance based on geographical coordinates. This approach ensured that
6https://discomap.eea.europa.eu/Index
7https://www.ecad.eu/download/ensembles/download.php
air pollution and weather data were aligned with the same spatial and temporal granularity. Daily
environmental measurements were aggregated into weekly averages from each patient’s baseline. As
additional features, the number of days per week spent over the respective WHO recommended air
quality guideline levels for short-term (24 hours) exposure was computed for each air pollutant [8].</p>
          <p>Finally, a subset of 380 MS patients from the Turin and Pavia research centers was selected for Task 3
in iDPP@CLEF 2024, compared to 550 patients for Task 1 and 638 for Task 2 in iDPP@CLEF 2023. The
resulting MS dataset 8 includes static variables with demographic and clinical information, EDSS scores
with corresponding Functional System (FS) sub-scores, environmental measurements, and the outcome
time, representing the week of the first relapse occurrence after the baseline for each patient. EDSS
follow-ups are reported between the baseline and the outcome time, while environmental measurements
span from January 1, 2013, to December 30, 2023. It is important to note that environmental data may
have gaps due to availability. When considering only environmental data preceding the outcome time,
the median number of weeks available for each patient is 59, with an interquartile range of 103.25
weeks. The distributions of air pollutant concentrations (measured in micrograms per cubic meter),
averaged across patients over these weeks, are depicted in the boxplots of Figure 1, where the red stars
indicate the WHO recommended air quality guideline levels for 24-hour exposure [8].</p>
        </sec>
        <sec id="sec-4-2-2">
          <title>4.2.2. Split into training and test</title>
          <p>The dataset was split into a training set (70%) and a test set (30%), with subjects stratified by outcome time
to ensure an even distribution across both sets. The distribution of static data, including demographic
and clinical information, and EDSS were verified to be similar in both training and test sets. Additionally,
since environmental exposure is considered, the distribution of patients from the two clinical centres
and their residence classification (Cities, Rural Areas, and Towns) was checked to be balanced.
8https://brainteaser.dei.unipd.it/challenges/idpp2024/assets/other/ms/ms-variables-description.txt</p>
          <p>Occurrence of MS in pediatric age FALSE</p>
          <p>TRUE
Variable
Sex
Ethnicity
Centre
Residence classification
Age at onset
Age at baseline
Diagnostic delay
Spinal cord symptom
Brainstem symptom
Eye symptom
Supratentorial symptom
Other symptoms
EDSS
Outcome time</p>
          <p>Level
Female
Male
Caucasian
Hispanic
Black African
NA
Cities
Rural Area
Towns
Pavia
Turin
median (IQR)
median (IQR)
median (IQR)
FALSE
TRUE
FALSE
TRUE
FALSE
TRUE
FALSE
TRUE
FALSE
Sensory
Epilepsy
median (IQR)
NA
median (IQR)</p>
          <p>Statistical tests, including the Kruskal-Wallis test for continuous variables and the Chi-squared test
for categorical and ordinal variables, were performed to assess the appropriateness of the stratification.
Special attention was given to sparsely observed levels in categorical variables to ensure rare levels
appeared only in the training set if at all. Table2 provides a comparison of variable distributions between
the training and test sets, confirming that the split meets the best-practice quality standards.</p>
        </sec>
      </sec>
    </sec>
    <sec id="sec-5">
      <title>5. Lab Setup and Participation</title>
      <p>In the remainder of this section, we detail the guidelines the participants had to comply with to submit
their runs and the submissions received by iDPP@CLEF.</p>
      <sec id="sec-5-1">
        <title>5.1. Guidelines</title>
        <p>Participating teams should satisfy the following guidelines:
• The runs should be submitted in the textual format described below;
• Each group can submit a maximum of 30 runs for each of Task 1 and Task 2 and Task 3.</p>
        <sec id="sec-5-1-1">
          <title>5.1.1. Task 1 Run Format</title>
          <p>
            Runs should be submitted as a text file (.txt) with the following format:
• ThefirstcolumnisthepatientID,anhashedversionoftheoriginalpatientID(shouldbeconsidered
just as a string);
• Columns from 2 to 13 represent the predicted ALSFRS-R sub-score. Each column corresponds to
an ALSFRS-R question, e.g. column 2 to Q1, column 3 to Q2, and so on). Each values is expected
to be integer in the range [
            <xref ref-type="bibr" rid="ref4">0, 4</xref>
            ];
• The last column is the run identifier, according to the format described below. It must uniquely
identify the participating team and the submitted run.
          </p>
        </sec>
        <sec id="sec-5-1-2">
          <title>5.1.2. Task 2 Run Format</title>
          <p>
            Runs should be submitted as a text file (.txt) with the following format:
10061925618906738677 1 2 3 4 1 2 3 4 1 2 3 4 upd_T1_myDesc
10160033396142711519 1 2 3 4 1 2 3 4 1 2 3 4 upd_T1_myDesc
10287479530859953248 1 2 3 4 1 2 3 4 1 2 3 4 upd_T1_myDesc
12398828804459792214 1 2 3 4 1 2 3 4 1 2 3 4 upd_T1_myDesc
10038199677222038201 1 2 3 4 1 2 3 4 1 2 3 4 upd_T1_myDesc
...
• Columns are separated by a white space;
• ThefirstcolumnisthepatientID,anhashedversionoftheoriginalpatientID(shouldbeconsidered
just as a string);
• Columns from 2 to 13 represent the predicted self-assessd sub-score. Each column corresponds to
an ALSFRS-R question, e.g. column 2 to Q1, column 3 to Q2, and so on). Each values is expected
to be integer in the range [
            <xref ref-type="bibr" rid="ref4">0, 4</xref>
            ];
• The last column is the run identifier, according to the format described below. It must uniquely
identify the participating team and the submitted run.
          </p>
          <p>It is important to include all the columns and have a white space delimiter between the columns. No
specific ordering is expected among patients (rows) in the submission file.</p>
        </sec>
        <sec id="sec-5-1-3">
          <title>5.1.3. Task 3 Run Format</title>
          <p>Runs should be submitted as a text file (.txt) with the following format:
10061925618906738677 10 upd_T3_myDesc
10160033396142711519 47 upd_T3_myDesc
10287479530859953248 13 upd_T3_myDesc
12398828804459792214 1 upd_T3_myDesc
10038199677222038201 9 upd_T3_myDesc
...
• The first column is the patient ID, a hashed version of the original patient ID (should be considered
just as a string);
• The second column is the predicted week at which the first relapse after the baseline happens.</p>
          <p>The value is expected to be an integer starting from 1;
• The third column is the run identifier, according to the format described below. It must uniquely
identify the participating team and the submitted run.</p>
          <p>It is important to include all the columns and have a white space delimiter between the columns. No
specific ordering is expected among patients (rows) in the submission file.</p>
        </sec>
        <sec id="sec-5-1-4">
          <title>5.1.4. Submission Upload</title>
          <p>Runs should be uploaded to the repository provided by the organizers. Following the repository
structure discussed above, for example, a run submitted for the first task should be included in
submission/task1.</p>
          <p>Runs should be uploaded using the following name convention for their identifiers:
&lt;teamname&gt;_T&lt;1|2|3&gt;_&lt;freefield&gt;, where:
• teamname is the name of the participating team;
• T&lt;1|2|3&gt; is the identifier of the task the run is submitted to, e.g. T1 for Task 1;
• freefield is a free field that participants can use as they prefer to further distinguish among
their runs. Please, keep it short and informative.</p>
          <p>For example, a complete run identifier may look like upd_T1_myDesc, where:
• upd is the University of Padua team;
• T1 means that the run is submitted for Task 1;
• myDesc suggests an appropriate description for the run.</p>
          <p>The name of the text file containing the run must be the identifier of the run followed by the txt
extension. In the above example upd_T1_myDesc.txt</p>
        </sec>
      </sec>
      <sec id="sec-5-2">
        <title>5.2. Participants</title>
        <p>A total of 28 teams registered to iDPP@CLEF 2024, out of which eight teams were able to submit one
run in at least one task. Table 3 reports the details about teams that managed to submit at least one
run. Furthermore, Table 4 outlines in which tasks each team participated in and how many runs they
were able to submit. In total, 97 runs were submitted to iDPP@CLEF 2024. The most participated task
was Task 1 with 59 runs and 6 teams participating. Subsequently, Task 2 had 31 runs submitted by six
diferent teams. Finally, only two teams participated in Task 3, with a total of 7 runs submitted. The
most prolific participant was UNIPD, with a total of 20 runs.</p>
        <p>Afiliation</p>
        <p>Country</p>
        <p>Repository
IEETA/DETI, LASI, Uni- Portugal
versity of Aveiro
CompBiomedUniTO</p>
        <p>University of Torino</p>
        <p>Italy
LASIGE, Faculty of Sci- Portugal
ences, University of
Lisbon
Georgia Institute of
Technology, Atlanta,
GA
University of Bucharest Romania</p>
        <p>United States
University of Pavia, Italy
BMI lab "Mario
Stefanelli"
University of Botswana</p>
        <p>Botswana
University of Padova</p>
        <p>Italy
https://bitbucket.org/
brainteaser-health/
idpp2024-bitua
https://bitbucket.org/
brainteaser-health/
idpp2024-compbiomedunito
https://bitbucket.org/
brainteaser-health/
idpp2024-fcool
https://bitbucket.org/
brainteaser-health/
idpp2024-idppexplorers
https://bitbucket.org/
brainteaser-health/
idpp2024-mandatory
https://bitbucket.org/
brainteaser-health/
idpp2024-stefagroup
https://bitbucket.org/
brainteaser-health/
idpp2024-ubcs
https://bitbucket.org/
brainteaser-health/
idpp2024-unipd</p>
        <p>Paper
Silva and
Oliveira [9]
Barducci et.
al. [10]
Martins et. al.
[11]
Metha
al. [12]
—</p>
        <p>et.</p>
        <p>Bosoni et. al.
[13]
Okere et. al.
[14]
Martinello et.
al. [15]
Team Name
BIT.UA
FCOOL
iDPPExplorers
Mandatory</p>
      </sec>
    </sec>
    <sec id="sec-6">
      <title>6. Evaluation Measures</title>
      <p>
        In both Tasks 1 and 2, the prediction targets were the future scores of the ALSFRS-R evaluation, which
are integers in the [
        <xref ref-type="bibr" rid="ref1 ref2 ref3 ref4">0-4</xref>
        ] range. Since the scores are discrete, we could have framed the predictive task
as a classification problem. However, we opted for a regression problem to be able to penalize larger
errors more (e.g., with a target score of 3, predicting 1 should be worse than predicting 2). Task 3, where
the target was the week of the relapse, was also framed quite naturally as a regression task for similar
reasons. Thus, we evaluated all tasks using the same two state-of-the-art evaluation measures to assess
the performance of regression models: the Root Mean Square Error (RMSE) and the Mean Absolute
Error (MAE). The formulas for RMSE and MAE are shown in Equation 1 and Equation 2, respectively,
where  represents the number of observations,  is the actual value of the dependent variable for the
-th observation, and ˆ is the predicted value of the dependent variable for the -th observation.
      </p>
      <p>Both metrics can explain the performance of a model in an interpretable manner since their units are
the same as the target variable (e.g., weeks); together, they can provide a comprehensive evaluation of
the three prediction tasks, with smaller values indicating better simulation results. The RMSE measures
how much, on average, the model’s predictions deviate from the actual values. By squaring the errors
before averaging them, RMSE gives higher weight to large errors. MAE represents the average absolute
diference between actual and predicted values. Unlike RMSE, MAE treats all errors equally, regardless
of their magnitude. Therefore, it provides a clear representation of the average error, is less sensitive to
outliers, but does not emphasize large errors as much as RMSE.</p>
      <p>⎯
RMSE = ⎷⎸⎸ 1 ∑=︁1 ( − ˆ)2</p>
      <p>MAE = 1 ∑︁ | − ˆ|
 =1
(1)
(2)</p>
      <p>Both metrics can explain the performance of a model in an interpretable manner since their units are
the same as the target variable (e.g., weeks); together, they can provide a comprehensive evaluation of
the three prediction tasks.</p>
      <p>The RMSE measures how much, on average, the model’s predictions deviate from the actual values.
This statistical index ranges from 0 to ∞, with smaller values indicating better simulation results. By
squaring the errors before averaging them, RMSE gives higher weight to large errors. MAE represents
the average absolute diference between actual and predicted values. Unlike RMSE, MAE treats all
errors equally, regardless of their magnitude. Therefore, it provides a clear representation of the average
error, is less sensitive to outliers, but does not emphasize large errors as much as RMSE.</p>
    </sec>
    <sec id="sec-7">
      <title>7. Results</title>
      <p>For each task, we report the analysis of the performance of the runs submitted by the Lab’s participants
according to the measures described in Section 6.</p>
      <sec id="sec-7-1">
        <title>7.1. Task 1: Predicting ALSFRS-R Score from Sensor Data (ALS)</title>
        <p>Clinicians monitor ALS progression through frequent visits, typically every two to three months, to
promptly detect any worsening of symptoms. Consequently, ALSFRS-R scores usually remain fairly
stable between these appointments, making the most recent score a reliable predictor for the next
assessment. While some deterioration in at least one score is not uncommon, using the last observed
value as a predictive measure is both simple and efective, as most scores will not change. This approach,
which we will call “naive” since it does not use sensor data, is particularly useful for bulbar and
respiratory scores, which show more stability in the challenge dataset, and where sensor data might
not be as efective in detecting eventual changes. The distribution of ALSFRS-R scores and the amount
of worsening between consecutive visits in the training set is shown in Figure 2.</p>
        <p>Four teams—iDPPExplorers, Mandatory, FCOOL, and UNIPD—employed this strategy in one of their
runs for Task 1, achieving the lowest errors with both metrics (0.20 MAE and 0.49 RMSE) and securing
joint first place. The full error scores and rankings for all submitted runs are reported in Table 5.</p>
        <p>Note that other runs, which also utilize sensor data, demonstrate performance very close to the first
place. Due to the small size of the test set, error estimates exhibit large standard deviations, making it
impossible to assert significant diferences in the top scores.</p>
        <p>The rankings are obtained considering the average of the performance for all twelve ALSFRS-R
scores and show how the naive predictors that propagate the last observed score are globally optimal.
However, this is not the case for each single ALSFRS-R score, where other runs often have lower errors,
as can be seen in Figure 3. Again, given the small size of the test sets, these diferences in performance
are not statistically significant. However, it is also reasonable that the data collected by the sensors can
be more helpful in prediction for some scores than others: this is especially evident for Q9 and Q11 in
Task 1 and for Q4 and Q12 in Task 2.</p>
      </sec>
      <sec id="sec-7-2">
        <title>7.2. Task 2: Predicting Patient Self-assessment Score from Sensor Data (ALS)</title>
        <p>Task 2 is very similar to Task 1, with several teams employing the same methods as they did for Task 1.
However, in Task 2, the ALSFRS-R assessments by patients are less regular in timing and less consistent
in scoring compared to assessments by clinicians, although they are generally more closely spaced.</p>
        <p>The predict-the-last-scores approach remains the top performer, albeit with slightly higher errors
(0.29 MAE and 0.58 RMSE), placing the UNIPD and FCOOL teams in joint first place again. Full results
are reported in Table 6.</p>
      </sec>
      <sec id="sec-7-3">
        <title>7.3. Task 3: Predicting Relapses from EDSS Sub-scores and Environmental Data (MS)</title>
      </sec>
      <sec id="sec-7-4">
        <title>7.4. Approaches</title>
        <p>7.4.1. Tasks 1 and 2
In this section, we provide a short summary of the approaches adopted by participants in iDPP@CLEF.
There are two separate sub-sections, one for Task 1 and 2 – focused on ALS progression prediction –
and one for Task 3 – which concerns the MS relapse prediction, using environmental data.
Silva and Oliveira [9] (Team BIT.UA) focus on Tasks 1 and 2. Their proposed approaches employ
machine learning techniques that rely on RF ensembles. They observed that the most efective solutions
are based on temporal analysis, with the maximization strategy being the top-performing approach.
Additionally, they emphasize the importance of proper handling of missing data. The authors noted
inconsistent performance across the two tasks. Specifically, their approaches tended to be more efective
team
on Task 1, while performance on Task 2 was less satisfactory. Silva and Oliveira attribute this behavior
to the variability of the underlying data: Task 1 data, produced by clinicians, was more stable, whereas
Task 2 data, produced directly by patients, appeared to be less stable.</p>
        <p>Barducci et. al. [10] (Team CompBiomedUniTO) tested diferent approaches to preselect the sensor
features to be fed to a RF Classifier. The first solution exploits the mono window approach, which keeps
only sensor data recorded within seven days before the considered questionnaire. The other approach
instead considers two windows: the first window is the same as before, and the second window instead
team
fcool
unipd
CBMUnito
bitua
fcool
bitua
unipd
fcool
unipd
bitua
unipd
fcool
unipd
fcool
unipd
fcool
ubcs
fcool
ubcs</p>
        <p>MAE
UNIPD</p>
        <p>Run
UNIPV_t3_rf
UNIPV_t3_lmer_first
UNIPV_t3_lmer_last
UNIPD_t3_ridge_noenv
UNIPD_t3_average
UNIPD_t3_rf_reg
UNIPD_t3_ridge
considers sensor data recorded when the previously available questionnaire occurred. The second
approach aims to provide the model with more information about the changes over time. However, the
irregularity of sensor data penalizes the two-windows approach. Indeed, 20 out of 54 patients did not
have two 7-day periods with a minimum of three days of sensor data. As a result, only the model using
the mono window approach was submitted. In general, the results vary significantly depending on
the questionnaire and showed better performance for the first task. The lower error in Task 1 may be
due to the questionnaire being completed by clinical staf, whose responses are typically more reliable
and objective compared to the subjective opinions provided by patients. To address the raised issue,
data augmentation is proposed as a possibile solution to increase the number of questionnaires in the
training set. In this way, deep learning models could be tested to improve predictions and leverage
longer sensor data sequences.</p>
        <p>Martins et. al. [11] (Team FCOOL) proposed a methodology consisting of independent multi-class
models, each predicting a distinct ALSFRS-R question. The authors tested four classification models:
Logistic Regression, RF, XGBoost, and Support Vector Machine. To manage sensor data, they first
derived static features from the longitudinal data via summarization techniques, and then reduced
the feature set using three methods: top-k selection across questions, top-k selection by question, and
biclustering. In both tasks, RF achieved top performance among the considered models, but failed to
outperform the Last Observation Carried Forward (LOCF) baseline, except for a few individual questions.
Moreover, no consensus was found about the best feature selection or extraction approach. Instead,
top-k selection by question was the best approach in Task 1, while biclustering in Task 2.</p>
        <p>Mehta et. al. [12] (Team iDPPExplorers) submitted runs only for Task 1 but analyze the approaches for
Task 2 on their working notes paper. Their work focuses on handling the temporal aspect of the sensor
data, by studying how to compress it via statistical methods that provide interpretability. Among the
set of approaches tested in their work, Mehta et. al. observe that the optimal performance is achieved
by both a naive baseline and ElasticNet regression. Nevertheless, the authors also observe that, despite
the similar performance, the ElasticNet model is more robust and allows a better understanding of the
contribution of various features. While they did not take part in Task 2, they observed that the proposed
approach is able to achieve better results on self-assessed data provided by the patients. Finally, their
conclusive remark hints that, while this preliminary analysis did not highlight any major benefit of
using sensor data, a larger dataset with a more diverse set of patients might lead to diferent conclusions.</p>
        <p>In Tasks 1 and 2, Martinello et. al. [15] (Team UNIPD) developed a broad set of predictive models
based on diferent methodological approaches using diferent subsets of the provided variables. The
aim of their study was to evaluate whether considering wearable data to predict ALS disability leads to
better performance with respect to models that only consider disease-specific variables collected during
routine visits. They observe that collecting data from wearable devices can improve the prediction of
ALS disability status. However, patients must be properly trained to use the sensors correctly in order
to acquire high-quality data leading to significant datasets. Otherwise, if the quality of the acquired
wearable data is poor, predicting the next visit ALSFRS-R score by simply holding the current one
seems to be a better approach. This is especially true when predicting scores that are self-assigned by
patients (task 2), who seem to be more stable and conservative with respect to their clinician during the
disability evaluation process over time.</p>
        <p>Okere et. al. [14] (Team UBCS) explores diferent deep-learning techniques to process data, especially
to handle missing values. In particular, the authors exploit auto-encoders and multiple imputation
techniques to handle missing values and use a RF algorithm to select relevant features. Subsequently,
four deep neural networks, such as Multi-Layer Perceptron (MLP), Feed Forward Neural Network (FFNN),
Recurrent Neural Network (RNN), and Long-Short Term Memory (LSTM), were trained to perform the two
tasks. Experimental results revealed that ensemble predictive models, such as the XGBoost algorithm,
show better performance than deep learning models. The authors link the low performance of the
models with the small size of the training data.
7.4.2. Task 3
Bosoni et. al. [13] (Team Stefagroup) used Topological Data Analysis to compute personal exposure
patterns and then employed two predictive approaches. The former relied on applying Linear Regression,
RF, and XGBoost to the last follow-up data. The latter used Mixed-Efects modeling on longitudinal data
from first to last follow-up. The results showed that incorporating environmental variables provides
information statistically significant for predicting relapses. This outcome underlined the need for better
methods to compute personal pollution exposure patterns, thereby enhancing the precision of MS
progression predictions.</p>
        <p>In task 3 Martinello et. al. [15] (Team UNIPD) developed a broad set of predictive models based
on diferent methodological approaches using diferent subsets of the provided variables. The aim of
their study was to evaluate whether considering environmental data to predict MS relapses leads to
better performance with respect to models that only consider disease-specific variables collected during
routine visits. They observe that environmental data can be beneficial for predicting the occurrence
of MS relapses, however, better solutions should be explored to refine the data collection and variable
extraction process in order to obtain more precise and focused predictions.
8. Conclusions and Future Work
iDPP@CLEF 2024 is the third and last iteration of the iDPP@CLEF evaluation campaign. The focus of
this evaluation campaign was on developing AI models capable of preemptively estimating the risks
that patients afected by ALS and MS will need medical support and to describe the progression of their
disease, to foster patient stratification and aid clinicians in providing the due care in the most efective
and rapid way.</p>
        <p>iDPP@CLEF 2024 operated in continuation with iDPP@CLEF 2022 and iDPP@CLEF 2023, expanding
previously proposed tasks, but also identifying novel tasks. In particular, iDPP@CLEF was organized
into 3 tasks. The first two tasks focused on predicting the ALSFRS-R for patients afected by ALS, using
data collected via environmental sensors and wearable devices. This makes iDPP@CLEF 2024 the first
edition of making use of data collected on patients currently involved in the BRAINTEASER project.
The third task of iDPP@CLEF 2024 built upon the results of iDPP@CLEF 2023, by focusing on the
prediction of the disease progression of patients afected by MS. More in detail, this task focused on
predicting when an MS patient will experience a relapse. Aa an improvement over the previous iDPP
edition, this year participants were also provided with environmental data that could have been used to
improve the AI models.</p>
        <p>In terms of participation, 28 teams registered in the Lab, suggesting overall interest in the topic from
the research community, and 8 teams were able to submit their results for a total of 97 submitted runs.
The task that received the most interest was the first, with 59 submissions alone.</p>
        <p>While this cycle concludes the evaluation campaign of iDPP@CLEF, we envision several possible
research paths for which iDPP@CLEF paved the way. First of all, novel and more efective AI approaches
can be developed in the future, by using iDPP@CLEF data as training and evaluation sets. Secondly,
iDPP@CLEF has identified several guidelines and good practices that can be adapted to devise novel
shared tasks and evaluation campaigns in the future, either concerning ALS and MS, other neurological
diseases, or the medical domain at large.</p>
      </sec>
      <sec id="sec-7-5">
        <title>Acknowledgments</title>
        <p>The work reported in this paper has been partially supported by the BRAINTEASER9 project (contract
n. GA101017598), as a part of the European Union’s Horizon 2020 research and innovation programme.
to 21st, 2023, volume 3497 of CEUR Workshop Proceedings, CEUR-WS.org, 2023, pp. 1123–1164. URL:
https://ceur-ws.org/Vol-3497/paper-095.pdf.
7. G. Faggioli, A. Guazzo, S. Marchesin, L. Menotti, I. Trescato, H. Aidos, R. Bergamaschi, G. Birolo,
P. Cavalla, A. Chiò, A. Dagliati, M. de Carvalho, G. M. D. Nunzio, P. Fariselli, J. M. G. Dominguez,
M. Gromicho, E. Longato, S. C. Madeira, U. Manera, G. Silvello, E. Tavazzi, E. Tavazzi, M. Vettoretti,
B. D. Camillo, N. Ferro, Intelligent disease progression prediction: Overview of idpp@clef 2023,
in: A. Arampatzis, E. Kanoulas, T. Tsikrika, S. Vrochidis, A. Giachanou, D. Li, M. Aliannejadi,
M. Vlachos, G. Faggioli, N. Ferro (Eds.), Experimental IR Meets Multilinguality, Multimodality,
and Interaction - 14th International Conference of the CLEF Association, CLEF 2023, Thessaloniki,
Greece, September 18-21, 2023, Proceedings, volume 14163 of Lecture Notes in Computer Science,
Springer, 2023, pp. 343–369. URL: https://doi.org/10.1007/978-3-031-42448-9_24. doi:10.1007/
978-3-031-42448-9\_24.
8. World Health Organization, WHO global air quality guidelines: Particulate matter (PM2.5 and PM10,
ozone, nitrogen dioxide, sulfur dioxide and carbon monoxide, World Health Organization, Geneva,
2021. Review.
9. J. Silva, J. Oliveira, Bit.ua at idpp: Predictive analytics on als disease progression using sensor data
with machine learning, in: CLEF 2024 Working Notes, 2024.
10. G. Barducci, F. Sartori, G. Birolo, T. Sanavia, P. Fariselli, Alsfrs-r score prediction for amyotrophic
lateral sclerosis, in: CLEF 2024 Working Notes, 2024.
11. A. Martins, D. Amaral, E. Castanho, D. Soares, R. Branco, S. Madeira, H. Aidos, Predicting the
functional rating scale and self-assessment status of als patients with sensor data, in: CLEF 2024
Working Notes, 2024.
12. R. Mehta, A. Pramov, S. Verma, Machine learning for alsfrs-r score prediction: Making sense of the
sensor data, in: CLEF 2024 Working Notes, 2024.
13. P. Bosoni, M. Vazifehdan, D. Pala, E. Tavazzi, R. Bergamaschi, R. Bellazzi, A. Dagliati, Predicting
multiple sclerosis relapses using patient exposure trajectories, in: CLEF 2024 Working Notes, 2024.
14. C. Okere, E. Thuma, G. Mosweunyane, Ubcs at idpp: Predicting patient self-assessment score from
sensor data using machine learning algorithms, in: CLEF 2024 Working Notes, 2024.
15. E. Marinello, A. Guazzo, E. Longato, E. Tavazzi, I. Trescato, M. Vettoretti, B. D. Camillo, Using
wearable and environmental data to improve the prediction of amyotrophic lateral sclerosis and
multiple sclerosis progression: an explorative study, in: CLEF 2024 Working Notes, 2024.</p>
      </sec>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          1.
          <string-name>
            <given-names>G.</given-names>
            <surname>Birolo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Bosoni</surname>
          </string-name>
          , G. Faggioli,
          <string-name>
            <given-names>H.</given-names>
            <surname>Aidos</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Bergamaschi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Cavalla</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Chiò</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Dagliati</surname>
          </string-name>
          , M. de Carvalho, G. Di Nunzio,
          <string-name>
            <given-names>P.</given-names>
            <surname>Fariselli</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J. García</given-names>
            <surname>Dominguez</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A. G.</given-names>
            <surname>Marta Gromicho</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Longato</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Madeira</surname>
          </string-name>
          ,
          <string-name>
            <given-names>U.</given-names>
            <surname>Manera</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Marchesin</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Menotti</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G.</given-names>
            <surname>Silvello</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Tavazzi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Tavazzi</surname>
          </string-name>
          , I. Trescato,
          <string-name>
            <given-names>M.</given-names>
            <surname>Vettoretti</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B. D.</given-names>
            <surname>Camillo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Ferro</surname>
          </string-name>
          ,
          <source>Intelligent Disease Progression Prediction: Overview of iDPP@CLEF</source>
          <year>2024</year>
          , in: Experimental IR Meets Multilinguality, Multimodality, and Interaction - 15th
          <source>International Conference of the CLEF Association, CLEF</source>
          <year>2024</year>
          , Grenoble, France, September 9-
          <issue>12</issue>
          ,
          <year>2024</year>
          , Proceedings,
          <year>2024</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          2.
          <string-name>
            <surname>J. M. Cedarbaum</surname>
            ,
            <given-names>N.</given-names>
          </string-name>
          <string-name>
            <surname>Stambler</surname>
            ,
            <given-names>E.</given-names>
          </string-name>
          <string-name>
            <surname>Malta</surname>
            ,
            <given-names>C.</given-names>
          </string-name>
          <string-name>
            <surname>Fuller</surname>
            ,
            <given-names>D.</given-names>
          </string-name>
          <string-name>
            <surname>Hilt</surname>
            ,
            <given-names>B.</given-names>
          </string-name>
          <string-name>
            <surname>Thurmond</surname>
            ,
            <given-names>A.</given-names>
          </string-name>
          <string-name>
            <surname>Nakanishi</surname>
          </string-name>
          ,
          <string-name>
            <surname>The</surname>
            <given-names>ALSFRS</given-names>
          </string-name>
          -R:
          <article-title>a revised ALS functional rating scale that incorporates assessments of respiratory function</article-title>
          ,
          <source>Journal of the Neurological Sciences</source>
          <volume>169</volume>
          (
          <year>1999</year>
          )
          <fpage>13</fpage>
          -
          <lpage>21</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          3.
          <string-name>
            <given-names>R.</given-names>
            <surname>Küfner</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Zach</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Norel</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Hawe</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Schoenfeld</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Wang</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G.</given-names>
            <surname>Li</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Fang</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Mackey</surname>
          </string-name>
          ,
          <string-name>
            <given-names>O.</given-names>
            <surname>Hardiman</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Cudkowicz</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Sherman</surname>
          </string-name>
          , G. Ertaylan,
          <string-name>
            <given-names>M.</given-names>
            <surname>Grosse-Wentrup</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T.</given-names>
            <surname>Hothorn</surname>
          </string-name>
          ,
          <string-name>
            <surname>J. van Ligtenberg</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J. H.</given-names>
            <surname>Macke</surname>
          </string-name>
          , T. Meyer,
          <string-name>
            <given-names>B.</given-names>
            <surname>Schölkopf</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Tran</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Vaughan</surname>
          </string-name>
          , G. Stolovitzky,
          <string-name>
            <given-names>M. L.</given-names>
            <surname>Leitner</surname>
          </string-name>
          ,
          <article-title>Crowdsourced analysis of clinical trial data to predict amyotrophic lateral sclerosis progression</article-title>
          ,
          <source>Nature Biotechnology</source>
          <volume>33</volume>
          (
          <year>2015</year>
          )
          <fpage>51</fpage>
          -
          <lpage>57</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          4.
          <string-name>
            <given-names>A.</given-names>
            <surname>Guazzo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>I.</given-names>
            <surname>Trescato</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Longato</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Hazizaj</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Dosso</surname>
          </string-name>
          , G. Faggioli,
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Di Nunzio</surname>
          </string-name>
          , G. Silvello,
          <string-name>
            <given-names>M.</given-names>
            <surname>Vettoretti</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Tavazzi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Roversi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Fariselli</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S. C.</given-names>
            <surname>Madeira</surname>
          </string-name>
          , M. de Carvalho,
          <string-name>
            <given-names>M.</given-names>
            <surname>Gromicho</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Chiò</surname>
          </string-name>
          ,
          <string-name>
            <given-names>U.</given-names>
            <surname>Manera</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Dagliati</surname>
          </string-name>
          , G. Birolo,
          <string-name>
            <given-names>H.</given-names>
            <surname>Aidos</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B. Di</given-names>
            <surname>Camillo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Ferro</surname>
          </string-name>
          ,
          <source>Intelligent Disease Progression Prediction: Overview of iDPP@CLEF</source>
          <year>2022</year>
          , in: A.
          <string-name>
            <surname>Barrón-Cedeño</surname>
          </string-name>
          , G. Da San Martino, M. Degli
          <string-name>
            <surname>Esposti</surname>
            ,
            <given-names>F.</given-names>
          </string-name>
          <string-name>
            <surname>Sebastiani</surname>
            ,
            <given-names>C.</given-names>
          </string-name>
          <string-name>
            <surname>Macdonald</surname>
            ,
            <given-names>G.</given-names>
          </string-name>
          <string-name>
            <surname>Pasi</surname>
            ,
            <given-names>A.</given-names>
          </string-name>
          <string-name>
            <surname>Hanbury</surname>
            ,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Potthast</surname>
          </string-name>
          , G. Faggioli, N. Ferro (Eds.),
          <source>Experimental IR Meets Multilinguality, Multimodality, and Interaction. Proceedings of the Thirteenth International Conference of the CLEF Association (CLEF 2022), Lecture Notes in Computer Science (LNCS) 13390</source>
          , Springer, Heidelberg, Germany,
          <year>2022</year>
          , pp.
          <fpage>395</fpage>
          -
          <lpage>422</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref5">
        <mixed-citation>
          5.
          <string-name>
            <given-names>A.</given-names>
            <surname>Guazzo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>I.</given-names>
            <surname>Trescato</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Longato</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Hazizaj</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Dosso</surname>
          </string-name>
          , G. Faggioli,
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Di Nunzio</surname>
          </string-name>
          , G. Silvello,
          <string-name>
            <given-names>M.</given-names>
            <surname>Vettoretti</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Tavazzi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Roversi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Fariselli</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S. C.</given-names>
            <surname>Madeira</surname>
          </string-name>
          , M. de Carvalho,
          <string-name>
            <given-names>M.</given-names>
            <surname>Gromicho</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Chiò</surname>
          </string-name>
          ,
          <string-name>
            <given-names>U.</given-names>
            <surname>Manera</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Dagliati</surname>
          </string-name>
          , G. Birolo,
          <string-name>
            <given-names>H.</given-names>
            <surname>Aidos</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B. Di</given-names>
            <surname>Camillo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Ferro</surname>
          </string-name>
          ,
          <source>Overview of iDPP@CLEF</source>
          <year>2022</year>
          :
          <article-title>The Intelligent Disease Progression Prediction Challenge</article-title>
          , in: G. Faggioli,
          <string-name>
            <given-names>N.</given-names>
            <surname>Ferro</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Hanbury</surname>
          </string-name>
          , M. Potthast (Eds.),
          <source>CLEF 2022 Working Notes, CEUR Workshop Proceedings (CEUR-WS.org)</source>
          ,
          <source>ISSN 1613-0073</source>
          . http://ceur-ws.
          <source>org/</source>
          Vol-
          <volume>3180</volume>
          /,
          <year>2022</year>
          , pp.
          <fpage>1130</fpage>
          -
          <lpage>1210</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref6">
        <mixed-citation>
          6.
          <string-name>
            <given-names>G.</given-names>
            <surname>Faggioli</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Guazzo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Marchesin</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Menotti</surname>
          </string-name>
          , I. Trescato,
          <string-name>
            <given-names>H.</given-names>
            <surname>Aidos</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Bergamaschi</surname>
          </string-name>
          , G. Birolo,
          <string-name>
            <given-names>P.</given-names>
            <surname>Cavalla</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Chiò</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Dagliati</surname>
          </string-name>
          , M. de Carvalho,
          <string-name>
            <surname>G. M. D. Nunzio</surname>
            ,
            <given-names>P.</given-names>
          </string-name>
          <string-name>
            <surname>Fariselli</surname>
            ,
            <given-names>J. M. G.</given-names>
          </string-name>
          <string-name>
            <surname>Dominguez</surname>
            ,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Gromicho</surname>
            ,
            <given-names>E.</given-names>
          </string-name>
          <string-name>
            <surname>Longato</surname>
            ,
            <given-names>S. C.</given-names>
          </string-name>
          <string-name>
            <surname>Madeira</surname>
            , U. Manera,
            <given-names>G.</given-names>
          </string-name>
          <string-name>
            <surname>Silvello</surname>
            ,
            <given-names>E.</given-names>
          </string-name>
          <string-name>
            <surname>Tavazzi</surname>
            , E. Tavazzi,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Vettoretti</surname>
            ,
            <given-names>B. D.</given-names>
          </string-name>
          <string-name>
            <surname>Camillo</surname>
            ,
            <given-names>N.</given-names>
          </string-name>
          <string-name>
            <surname>Ferro</surname>
          </string-name>
          , Overview of idpp@clef
          <year>2023</year>
          :
          <article-title>The intelligent disease progression prediction challenge</article-title>
          , in: M.
          <string-name>
            <surname>Aliannejadi</surname>
            , G. Faggioli,
            <given-names>N.</given-names>
          </string-name>
          <string-name>
            <surname>Ferro</surname>
          </string-name>
          , M. Vlachos (Eds.),
          <source>Working Notes of the Conference and Labs of the Evaluation Forum (CLEF</source>
          <year>2023</year>
          ), Thessaloniki, Greece, September 18th
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>