<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta />
    <article-meta>
      <title-group>
        <article-title>Overview of iDPP@CLEF 2023: The Intelligent Disease Progression Prediction Challenge</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <string-name>Guglielmo Faggioli</string-name>
          <email>guglielmo.faggioli@unipd.it</email>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Alessandro Guazzo</string-name>
          <email>alessandro.guazzo@phd.unipd.it</email>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Stefano Marchesin</string-name>
          <email>stefano.marchesin@unipd.it</email>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Laura Menotti</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Isotta Trescato</string-name>
          <email>isotta.trescato@phd.unipd.it</email>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Helena Aidos</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Roberto Bergamaschi</string-name>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Giovanni Birolo</string-name>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Paola Cavalla</string-name>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Adriano Chiò</string-name>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Arianna Dagliati</string-name>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Mamede de Carvalho</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Giorgio Maria Di Nunzio</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Piero Fariselli</string-name>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Jose Manuel García Dominguez</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Marta Gromicho</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Enrico Longato</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Sara C. Madeira</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Umberto Manera</string-name>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Gianmaria Silvello</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Eleonora Tavazzi</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Erica Tavazzi</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Martina Vettoretti</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Barbara Di Camillo</string-name>
          <email>barbara.dicamillo@unipd.it</email>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Nicola Ferro</string-name>
          <email>nicola.ferro@unipd.it</email>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <aff id="aff0">
          <label>0</label>
          <institution>Gregorio Marañon Hospital in Madrid</institution>
          ,
          <country country="ES">Spain</country>
        </aff>
        <aff id="aff1">
          <label>1</label>
          <institution>IRCCS Foundation C. Mondino in Pavia</institution>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff2">
          <label>2</label>
          <institution>University of Lisbon</institution>
          ,
          <country country="PT">Portugal</country>
        </aff>
        <aff id="aff3">
          <label>3</label>
          <institution>University of Padua</institution>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff4">
          <label>4</label>
          <institution>University of Pavia</institution>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff5">
          <label>5</label>
          <institution>University of Turin</institution>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff6">
          <label>6</label>
          <institution>Città della Salute e della Scienza”</institution>
          ,
          <addr-line>Turin</addr-line>
          ,
          <country country="IT">Italy</country>
        </aff>
      </contrib-group>
      <abstract>
        <p>Amyotrophic Lateral Sclerosis (ALS) and Multiple Sclerosis (MS) are chronic diseases that cause progressive or alternating neurological impairments in motor, sensory, visual, and cognitive functions. Afected patients must manage hospital stays and home care while facing uncertainty and significant psychological and economic burdens that also afect their caregivers. To ease these challenges, clinicians need automatic tools to support them in all phases of patient treatment, suggest personalized therapeutic paths, and preemptively indicate urgent interventions. iDPP@CLEF aims at developing an evaluation infrastructure for AI algorithms to describe ALS and MS mechanisms, stratify patients based on their phenotype, and predict disease progression in a probabilistic, time-dependent manner. iDPP@CLEF 2023 was organised into three tasks, two of which (Tasks 1 and 2) pertained to Multiple Sclerosis (MS), and one (Task 3) concerned the evaluation of the impact of environmental factors in the progression of Amyotrophic Lateral Sclerosis (ALS), and how to use environmental data at prediction time. 10 teams took part in the iDPP@CLEF 2023 Lab, submitting a total of 163 runs with multiple approaches to the disease progression prediction task, including Survival Random Forests and Coxnets.</p>
      </abstract>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>1. Introduction</title>
      <p>Both Amyotrophic Lateral Sclerosis (ALS) and Multiple Sclerosis (MS) are severe chronic diseases
that cause progressive neurological impairment. They exhibit high heterogeneity in terms of
symptoms and disease progression, leading to difering needs for patients. The heterogeneity
of these diseases partly explains the lack of efective prognostic tools and the current lack of
therapies that can efectively slow or reverse their course. This poses patients, challenges for
caregivers and clinicians alike. Furthermore, the timing of worsening or significant events –
such as the need for specific treatments, as the Non-Invasive Ventilation (NIV) or Percutaneous
Endoscopic Gastrostomy (PEG) in the case of ALS – is uncertain and hard to predict. Being able
to preemptively recognize signals of the worsening of the disease as well as the need for specific
medical treatments would have significant implications for the quality of life of patients.</p>
      <p>Therefore, it us of uttermost importance to devise automatic tools that could aid clinicians in
their decision-making in all phases of disease progression and facilitate personalized therapeutic
choices.</p>
      <p>To address these challenges and develop Artificial Intelligence ( AI) predictive algorithms
researchers need a framework to design and evaluate approaches to:
• stratify patients according to their phenotype all over the disease evolution;
• predict the progression of the disease in a probabilistic, time-dependent way;
• describe better and in an explainable fashion the mechanisms underlying MS and ALS
diseases.</p>
      <p>In this context, it is crucial to develop shared approaches, promote common benchmarks,
and foster experiment comparability and replicability, which is currently not so common in
this domain. The Intelligent Disease Progression Prediction at CLEF (iDPP@CLEF) lab1 aims to
provide an evaluation infrastructure for the development of such AI algorithms. Unlike previous
challenges in the field, iDPP@CLEF systematically addresses issues related to the application of
AI in clinical practice for ALS and MS. Apart from defining risk scores based on the probability
of events occurring in the short or long term, iDPP@CLEF also deals with providing clinicians
with structured and understandable data.</p>
      <p>
        iDPP@CLEF 2023 [
        <xref ref-type="bibr" rid="ref1">1</xref>
        ] encompasses three primary tasks, with two focused on MS and one
centred around ALS. Concerning MS, these tasks revolve around predicting the risk of incurring
a disease worsening, either in terms of probability or as a cumulative probability over increasing
time periods. Furthermore, each MS task is further divided into two subtasks, each with its
specific definition of worsening.
      </p>
      <p>The outcomes for the MS tasks have been notably promising, as participating teams achieved
remarkable results, including an impressive AUC of up to 92.4% and an O/E ratio of 0.946.</p>
      <p>The third task is dedicated to ALS and builds upon the tasks explored in iDPP@CLEF 2022.
Specifically, participants were asked to predict the occurrence of two essential medical
treatments, namely NIV and PEG, as well as the predicted time of death. Each prediction was</p>
      <sec id="sec-1-1">
        <title>1https://brainteaser.health/open-evaluation-challenges/</title>
        <p>addressed as a distinct subtask. Notably, for this year’s ALS task, participants were provided
with environmental data, allowing them to investigate whether incorporating such information
could lead to improved predictive models.</p>
        <p>However, despite the inclusion of environmental data, the models submitted by participants
did not demonstrate a statistically significant improvement. This suggests that further
exploration and investigation in this domain are necessary to fully understand the potential impact
of environmental factors on ALS prediction models.</p>
        <p>The paper is organized as follows: Section 2 presents related challenges; Section 3 describes
iDPP@CLEF 2023 tasks; Section 4 discusses the developed dataset; Section 5 explains the setup
of the lab and introduces the participants; Section 6 introduces the evaluation measures adopted
to score the runs; Section 7 analyzes the experimental results for the diferent tasks; finally,
Section 8 draws some conclusions and outlooks some future work.</p>
      </sec>
    </sec>
    <sec id="sec-2">
      <title>2. Related Challenges</title>
      <p>Within CLEF, there have been no other labs on this or similar topics before the start of
iDPP@CLEF. iDPP@CLEF 2022, whose details are summarized below, was the first iteration of
the lab and the current is the second one.</p>
      <p>
        Outside CLEF, there have been a recent challenge on Kaggle2 in 2021 and some older ones,
the DREAM 7 ALS Prediction challenge3 in 2012 and the DREAM ALS Stratification challenge 4
in 2015. The Kaggle challenge used a mix of clinical and genomic data to seek insights about the
mechanisms of ALS and the diference between people with ALS who progress faster versus
those who develop it more slowly. The DREAM 7 ALS Prediction challenge [
        <xref ref-type="bibr" rid="ref2">2</xref>
        ] asked to use
3 months of ALS clinical trial information (months 0–3) to predict the future progression of
the disease (months 3–12), expressed as the slope of change in ALS Functional Rating Scale
Revisited (ALSFRS-R) [
        <xref ref-type="bibr" rid="ref3">3</xref>
        ], a functional scale that ranges between 0 and 40. The DREAM ALS
Stratification challenge asked participants to stratify ALS patients into meaningful subgroups, to
enable better understanding of patient profiles and application of personalized ALS treatments.
Diferently from these previous challenges, iDPP@CLEF focuses on explainable AI and on
temporal progression of the disease.
      </p>
      <p>
        Finally, when it comes to Multiple Sclerosis (MS), studies are mostly conducted on closed and
proprietary datasets and iDPP@CLEF represents one of the first attempts to create a public and
shared dataset.
2.1. iDPP@CLEF 2022
iDPP@CLEF 2022 ran as a pilot lab for the first time in CLEF 20225 [
        <xref ref-type="bibr" rid="ref4 ref5">4, 5</xref>
        ] and focused on
activities aimed at ALS progression prediction as well as at an understanding of the challenges
and limitations to refine and tune the labs itself for future iterations. iDPP@CLEF 2022 consisted
of the following tasks:
2https://www.kaggle.com/alsgroup/end-als
3https://dreamchallenges.org/dream-7-phil-bowen-als-prediction-prize4life/
4https://dx.doi.org/10.7303/syn2873386.
5https://brainteaser.health/open-evaluation-challenges/idpp-2022/
• Pilot Task 1 - Ranking Risk of Impairment: it focused on ranking patients based on
the risk of impairment. We used the ALSFRS-R scale [
        <xref ref-type="bibr" rid="ref3">3</xref>
        ] to monitor speech, swallowing,
handwriting, dressing/hygiene, walking and respiratory ability in time and asked
participants to rank patients based on the time-to-event risk of experiencing impairment in
each specific domain.
• Pilot Task 2 - Predicting Time of Impairment: it refined Task 1 by asking participants
to predict when specific impairments will occur (i.e. in the correct time-window). In this
regard, we assessed model calibration in terms of the ability of the proposed algorithms
to estimate a probability of an event close to the true probability within a specified
time-window.
• Position Paper Task 3 - Explainability of AI algorithms: we evaluated proposals of
diferent frameworks able to explain the multivariate nature of the data and the model
predictions.
      </p>
      <p>iDPP@CLEF 2022 created 3 datasets, for the prediction of specific events related to ALS,
consisting of fully anonymized data from 2,250 real patients from medical institutions in Turin,
Italy, and Lisbon, Portugal. The datasets contain both static data about patients, e.g. age, onset
date, gender, . . . and event data, i.e. 18,512 ALSFRS-R questionnaires and 4,015 spyrometries. 6
groups participated in iDPP@CLEF 2022 and submitted a total of 120 runs.</p>
    </sec>
    <sec id="sec-3">
      <title>3. Tasks</title>
      <p>iDPP@CLEF 2023 is the second iteration of the lab, expanding its scope to include both ALS
and MS in the study of disease progression. The activities in iDPP@CLEF 2023 focus on two
objectives: exploring the prediction of MS worsening and conducting a more in-depth analysis
of ALS compared to iDPP@CLEF 2022, with the addition of environmental data.</p>
      <p>Following iDPP@CLEF 2022, iDPP@CLEF 2023 targets three tasks:
• Pilot tasks (Task 1 and Task 2) on predicting the progression of the MS, focusing on its
worsening;
• Position papers (Task 3) on the impact that environmental data can have on the progression
of the ALS.</p>
      <p>In the remainder of this section, we describe each task more in detail.</p>
      <sec id="sec-3-1">
        <title>3.1. Task 1: Predicting Risk of Disease Worsening (MS)</title>
        <p>Task 1 focuses on MS and requires ranking subjects based on the risk of worsening, setting
the problem as a survival analysis task. More specifically the risk of worsening predicted by
the algorithm should reflect how early a patient experiences the “worsening” event and should
range between 0 and 1.</p>
        <p>
          Worsening is defined on the basis of the Expanded Disability Status Scale (EDSS) [
          <xref ref-type="bibr" rid="ref6">6</xref>
          ],
according to clinical standards. In particular, we consider two diferent definitions of worsening
corresponding to two diferent sub-tasks:
• Task1a: the patient crosses the threshold EDSS ≥ 3 at least twice within a one-year
interval;
• Task1b: the second definition of worsening depends on the first recorded value, according
to current clinical protocols:
– if the baseline is EDSS &lt; 1, then the worsening event occurs when an increase of
        </p>
        <p>EDSS by 1.5 points is first observed;
– if the baseline is 1 ≤ EDSS &lt; 5.5, then the worsening event occurs when an increase
of EDSS by 1 point is first observed;
– if the baseline is EDSS ≥ 5.5, then the worsening event occurs when an increase of</p>
        <p>EDSS by 0.5 points is first observed.</p>
        <p>For each sub-task, participants are given a dataset containing 2.5 years of visits, with the
occurrence of the worsening event and the time of occurrence pre-computed by the challenge
organizers.</p>
      </sec>
      <sec id="sec-3-2">
        <title>3.2. Task 2: Predicting Cumulative Probability of Worsening (MS)</title>
        <p>Task 2 refines Task 1 by asking participants to explicitly assign the cumulative probability of
worsening at diferent time windows, i.e., between years 0 and 2, 0 and 4, 0 and 6, 0 and 8, 0 and
10. In particular, as in Task 1, we consider two diferent definitions of worsening corresponding
to two diferent sub-tasks:
• Task2a: the patient crosses the threshold EDSS ≥ 3 at least twice within a one-year
interval;
• Task2b: the second definition of worsening depends on the first recorded value, according
to current clinical protocols:
– if the baseline is EDSS &lt; 1, then the worsening event occurs when an increase of</p>
        <p>EDSS by 1.5 points is first observed;
– if the baseline is 1 ≤ EDSS &lt; 5.5, then the worsening event occurs when an increase
of EDSS by 1 point is first observed;
– if the baseline is EDSS ≥ 5.5, then worsening event occurs when an increase of</p>
        <p>EDSS by 0.5 points is first observed.</p>
        <p>For each sub-task, participants are given a dataset containing 2.5 years of visits, with the
occurrence of the worsening event and the time of occurrence pre-computed by the challenge
organizers.
3.3. Task 3: Position Papers on the Impact of Exposition to Pollutants (ALS)
Participants in Task 3 are required to propose approaches to assess if exposure to diferent
pollutants is a useful variable to predict time to PEG, NIV, and death in ALS patients. This task
is based on the same design as Task 1 in iDPP@CLEF 2022 and employs the same data as well.
Therefore, both training and test data are available immediately. Compared to iDPP@CLEF 2022,
the dataset is complemented with environmental data to investigate the impact of exposition to
pollutants on the prediction of disease progression. The task consists in ranking subjects based
on the risk of early occurrence of:
• Task3a: NIV or (competing event) death, whichever occurs first;
• Task3b: PEG or (competing event) Death, whichever occurs first;
• Task3c: Death.</p>
        <p>Since test data were already released at the end of iDPP@CLEF 2022 it is impossible to
produce a fair leaderboard. Therefore, participants are required to produce position papers in
which they describe their approaches and findings concerning the link between environmental
factors and ALS progression.</p>
      </sec>
    </sec>
    <sec id="sec-4">
      <title>4. Dataset</title>
      <p>For iDPP@CLEF 2023, we provided 5 datasets, two for MS and three for ALS, using data from
three clinical institutions in Turin and Pavia, Italy, and Lisbon, Portugal. The datasets are fully
anonymized: identifiers and pseudo-identifiers, e.g. place of birth or city of residence, have been
removed; dates are reported as relative spans in days with respect to a Time 0, i.e., a reference
moment in time that depends on the considered disease. For MS, Time 0 was defined as the
time of the last EDSS recorded before the date of the first recorded EDSS plus 2.5 years. Patients
that were not diagnosed with MS within the time window going from the first EDSS date to
2.5 years after it had a diferent definition of Time 0, specifically, the first EDSS for which the
patient had a MS diagnosis within 2.5 years was considered instead of the first recorded one for
their Time 0 definition. Patients for which it was not possible to find suitable EDSS according
to this scheme were excluded from the analysis as it was not possible to correctly define a
Time 0 for them. In the context of ALS, Time 0 represents the date of the first ALSFRS-R
questionnaire.</p>
      <sec id="sec-4-1">
        <title>4.1. Prefiltering</title>
        <p>
          4.1.1. MS Data
The original MS data contained minor inconsistencies and typos. Therefore, to avoid introducing
noise and spurious information within datasets, we first processed the data removing records that
were likely wrong or did not provide enough information for AI methods to perform predictions.
In terms of patients, we removed those where the following pieces of information were absent or
out of range: onset date; first visit date; functional systems scores and corresponding EDSS scores.
For each removed patient, we discarded all their records related to EDSS, evoked potentials,
MRIs, and MS courses. As for relapses, we removed those records where no information about
the relapse was given. We removed MRI records not reporting information about T1 and T2
lesions. Finally, where needed, we removed duplicated records, records associated with patients
without demographic and onset data, or records with missing dates. In particular, we removed
patients with no visits about EDSS. Having at least one visit about EDSS was an inclusion
criterion for patients in retrospective data.
4.1.2. ALS Data
ALS datasets are the same as the ones provided for iDPP@CLEF 2022. Their description
is available at [
          <xref ref-type="bibr" rid="ref4 ref5">4, 5</xref>
          ]. Compared to iDPP@CLEF 2022, the ALS datasets used for Task 3 in
iDPP@CLEF 2023 have been updated as follows: i) records associated with invalid event date
(i.e., patients with censoring time equal to 0) have been removed; ii) environmental data has
been added.
        </p>
      </sec>
      <sec id="sec-4-2">
        <title>4.2. Task 1 and Task 2: MS Datasets</title>
        <p>Tasks 1 and 2 share the same datasets – each MS dataset corresponds to a specific sub-task (a
and b). As training features, we provide:
– Static data, containing information on patient’s demographics, diagnostic delay, and
symptoms at the onset;
– Dynamic data (2.5 years), containing information on: relapses, EDSS scores, evoked
potentials, MRIs, and MS course.</p>
        <sec id="sec-4-2-1">
          <title>The following data are available as ground-truth:</title>
          <p>– The worsening occurrence, as defined in Section 3, expressed as a Boolean variable with
0 meaning “not occurred” and 1 meaning “occurred”.
– The time-of-occurrence, expressed as relative delta with respect to Time 0 in years (also
fractions).</p>
          <p>Each dataset contains the following groups of variables:
– static vars., representing static variables associated with a patient. The complete
list of available static variables is available at http://brainteaser.dei.unipd.it/challenges/
idpp2023/assets/other/ms/static-vars.txt.
– MS type, containing information about the MS type and the (relative) date when the MS
type has been observed.
– relapses consisting of the (relative) initial dates of relapses.
– EDSS, containing EDSS scores and the (relative) date when they were recorded.
– evoked potentials, reporting the results of evoked potential tests. The complete list
of variables for each evoked potential test is available at http://brainteaser.dei.unipd.it/
challenges/idpp2023/assets/other/ms/evoked-potentials.txt.
– MRI, containing the data involving MRIs; e.g., the area on which MRIs have been performed
and the observed lesions. The complete list of variables about MRIs is available at
http://brainteaser.dei.unipd.it/challenges/idpp2023/assets/other/ms/mri.txt.</p>
          <p>Training</p>
          <p>Test
– outcomes, detailing the patients’ worsening occurrence, together with the time of
occurrence. More in detail, outcomes contain one record for each patient where:
– The first column is the patient ID;
– The second column indicates if the worsening occurred (1) or not (0).
– The third column is the time of occurrence, defined as a floating point number in
the range [0,15].</p>
          <p>Table 1 reports the number of records for each group of variables for training and test sets
for each sub-task.
4.2.1. Creation of the datasets
To obtain the iDPP@CLEF 2023 MS datasets, we processed the data provided by two research
centres in Turin and Pavia, Italy, respectively. To remove minor inconsistencies and typos
present in the original data, we applied the prefiltering step described in 4.1.1. Besides removing
records that contained inconsistent or insuficient information, to construct iDPP datasets we
restricted visits data to a 2.5 years window prior to Time 0. Moreover, patients for whom it
was not possible to define Time 0 – i.e., patients who died or experienced the outcome within
the 2.5 years window – were excluded. Finally, following the definition of outcome for sub-task
a, patients were excluded if it was not possible to confirm the crossing of the EDSS = 3 threshold
within one year – that is, if a patient had an EDSS ≥ 3 but no other recorded EDSS within one
year after it.
4.2.2. Split into training and test
Each of the two MS datasets underwent a division into a training set and a test set, with
proportions of 80% and 20% respectively. In order to ensure a well-stratified distribution of
variables across the datasets and to avoid any biases during the splitting process, the data were
randomly partitioned 100 times using 100 diferent random seeds. To assess the appropriateness
of the stratification, a comparison of variable distributions was conducted for each training/test
pair. Statistical tests were performed on each variable based on its type: the Kruskal-Wallis
test [7] was applied to continuous variables, while the Chi-squared test [8] was employed for
categorical and ordinal variables. A variable was considered well-stratified depending on the
test result. For each split, the percentage of well-stratified variables was calculated using Eq. 1.</p>
          <p>−  =     * 100
(1)</p>
          <p>To identify the split that achieved the best stratification between those that achieved the
highest percentage, equal to 97%, a visual inspection was then conducted. Density plots were
used for continuous variables, bar plots for categorical and ordinal variables, and Kaplan-Meier
curves [9] for the outcome time in the survival setting. A careful examination of the outcome
occurrence and time was performed to ensure that the models’ performance would not be
influenced by the data splitting. For each variable, we enforced the test set to not contain levels
that were observed in the training set for the same variable. Table 2 and 3 the comparison of
the variables’ distributions in the training and test sets for sub-task a, while Table 4 and 5 show
them for sub-task b. Since the distributions are similar, we concluded that the training/test split
provided to the participants met best-practice quality standards.</p>
        </sec>
      </sec>
      <sec id="sec-4-3">
        <title>4.3. Task 3: ALS Dataset</title>
        <p>
          The datasets used for Task 3 in iDPP@CLEF 2023 have the same structure and most of the
records as the one used in iDPP@CLEF 2022. There are three datasets concerning patients
afected by ALS, Dataset ALSa, Dataset ALSb, and Dataset ALSc. Each dataset concerns a
specific type of event that might occur to patients afected by ALS. Datasets ALSa and ALSb
regard respectively the moment in which a patient undergoes NIV or PEG. While dataset ALSc
concerns the death of the patient. For a detailed description of the data, cleaning procedures,
and additional statistics, please refer to [
          <xref ref-type="bibr" rid="ref4 ref5">4, 5</xref>
          ].
        </p>
        <p>iDPP@CLEF 2023 dataset extends the previous version by providing participants with
environmental data. Furthermore, due to its release at the end of iDPP@CLEF 2022, the ground
truth is available to the challenge participants since the beginning of the challenge.
4.3.1. Updates over iDPP@CLEF 2022
In the 2023 version of the dataset, a small subset of patients (less than 50) has been removed from
the dataset used for iDPP@CLEF 2022. Indeed, such patients were characterized by the absence
of relevant events (i.e., NIV, PEG or death), but did not receive further ALSFRS-R assessments
after the first. Therefore, such patients were annotated with the censoring event happening
at time 0 making it impossible to provide a sensible prediction. Such patients were removed
from the 2023 version of the iDPP@CLEF ALS dataset. Table 6 reports the number of removed
patients compared to the original iDPP@CLEF ALS dataset. Notice that, by construction, all the
removed patients were labelled with event NONE. Spyrometries and ALSFRS-R questionnaires
associated with dropped patients have been removed as well.
4.3.2. Environmental Data
One of the primary objectives of iDPP@CLEF 2023 is to promote research on the influence of
environmental factors on the progression of ALS disease. Task 3, which specifically focuses on
this aspect, requires participants to submit position papers investigating the impact of exposure
Variable
sex
ethnicity
to pollutants.</p>
        <p>To address this objective, the iDPP@CLEF 2022 datasets have been expanded to include
information about patients’ exposure to environmental agents. This includes various environmental
factors such as daily mean, minimum, and maximum temperatures, daily precipitation, daily
averaged sea level pressure and relative humidity, daily mean wind speed, and daily mean
global radiation. Additionally, the iDPP@CLEF 2023 ALS datasets also provide information on
the concentration of seven pollutants: PM10, PM25, O3, C6H6, CO, SO2, and NO2. For each
environmental parameter, both the raw observations collected each day and the calibrated
version of the observations, following best practices [10, 11], are made available.</p>
        <p>It is important to note that not all patients have the same amount of environmental information
due to varying diagnosis times and data availability. Several patients could not be associated
with environmental data, as their disease progression occurred before public environmental
data repositories were established. Approximately 20% of the iDPP@CLEF 2023 ALS datasets,
corresponding to 434 to 574 patients, are linked to environmental data.</p>
        <p>Considering that the impact of environmental factors may occur well before the diagnosis,
we include the maximum amount of available information before Time 0 for all patients with
historical records. Depending on the patient, this corresponds to a maximum of 4 to 6 years of
data. However, no more than 6 months of data after Time 0 are considered. If a patient has
more than 180 days of information after the first ALSFRS-R assessment, the subsequent days
are excluded from the released dataset.
Patients removed from the iDPP@CLEF ALS dataset 2023 due to having an unrealistic censoring event
time. Between parentheses the original number of patients available in the dataset.</p>
        <p>Dataset ALSa
Dataset ALSb
Dataset ALSc</p>
        <p>Train
22 (orig. 1454)
36 (orig. 1715)
40 (orig. 1756)
Test</p>
        <p>Test
4 (orig. 350)
8 (orig. 430)
8 (orig. 494)</p>
        <p>Total
26 (orig. 1806)
44 (orig. 2145)
48 (orig. 2250)
Train
Train</p>
        <p>Test
50
40
s
t
ien30
t
a
p20
n
10
0
60
s
t
ien40
t
a
p
n20
0
20
ts15
n
e
i
t
a10
p
n
5
0
30
s
t
n20
e
i
t
a
p
n10
0
60
s
t
n
ie40
t
a
p
n
20
0
30
ts20
n
e
i
t
a
p
n10
0
0 0.5k 1k 1.5k 2k 2.5k
n observations
0 0.5k 1k 1.5k 2k 2.5k
n observations
0 0.5k 1k 1.5k 2k 2.5k
n observations
0 0.5k 1k 1.5k 2k 2.5k
n observations
(a) Dataset ALSa
Train</p>
        <p>Test</p>
        <p>(b) Dataset ALSb
Dataset ALSa
Train Test</p>
        <p>Dataset ALSb
Train Test</p>
        <p>Dataset ALSc</p>
        <p>Train Test
n. patients 356
n. obs. (q3) 318
n. obs. (median) 588
n. obs. (q1) 911
n. obs. (mean) 732
(d) Number of patients with at least one
environmental observation and statistics on the number
of observations per patient.
0 0.5k 1k 1.5k 2k 2.5k
n observations
0 0.5k 1k 1.5k 2k 2.5k</p>
        <p>n observations
(c) Dataset ALSc
number of records of environmental observations available. It is possible to observe that on
average, on the training set, there are 732, 799 and 856 days of observations in the case of
Datasets ALSa ALSb, and ALSc respectively. Patients within the test set contain slightly lower
numbers of records.</p>
        <p>Figure 2 shows the proportion of patients (among those with environmental data) having
1.0
its0.8
n
e
tap0.6
f
o
n
ito0.4
r
o
p
o
r
p0.2
observations for a given day in (their) history. For example, it is possible to observe that roughly
80% of the patients have a record of their Time 0, this number grows to approximately 95%
if we consider the Time 180, the last day for which we release information. Going back in
time, we observe that, for roughly 40% of the patients, we have at least 2 years (Time -730) of
information before their Time 0.</p>
      </sec>
    </sec>
    <sec id="sec-5">
      <title>5. Lab Setup and Participation</title>
      <p>In the remainder of this section, we detail the guidelines the participants had to comply with to
submit their runs and the submissions received by iDPP@CLEF. In the remainder, we describe
the guidelines provided to participating teams.</p>
      <sec id="sec-5-1">
        <title>5.1. Guidelines</title>
        <p>• The runs should be submitted in the textual format described below;
• Each group can submit a maximum of 10 runs for each subtask, thus amounting to
maximum 20 runs for each of Task 1 and Task 2 and 30 runs for Task 3.</p>
      </sec>
      <sec id="sec-5-2">
        <title>Task 1 Run Format</title>
        <p>Runs should be submitted as a text file (.txt) with the following format:
100619256189067386770484450960632124211 0.897 upd_T1a_survRF
101600333961427115125266345521826407539 0.773 upd_T1a_survRF
102874795308599532461878597137083911508 0.773 upd_T1a_survRF
123988288044597922158182615705447150224 0.615 upd_T1a_survRF
100381996772220382021070974955176218231 0.317 upd_T1a_survRF
...
• Columns are separated by a white space;
• The first column is the patient ID, an hashed version of the original patient ID (should
be considered just as a string);</p>
        <p>It is important to include all the columns and have a white space delimiter between the
columns. No specific ordering is expected among patients (rows) in the submission file.</p>
      </sec>
      <sec id="sec-5-3">
        <title>Task 2 Run Format</title>
        <p>
          Runs should be submitted as a text file (.txt) with the following format:
• Columns are separated by a white space;
• The first column is the patient ID, a hashed version of the original patient ID (should
be considered just as a string);
• The second column is the cumulative probability of worsening between years 0 and 2. It
is expected to be a floating point number in the range [
          <xref ref-type="bibr" rid="ref1">0, 1</xref>
          ].
• The third column is the cumulative probability of worsening between years 0 and 4. It is
expected to be a floating point number in the range [
          <xref ref-type="bibr" rid="ref1">0, 1</xref>
          ].
• The fourth column is the cumulative probability of worsening between years 0 and 6. It
is expected to be a floating point number in the range [
          <xref ref-type="bibr" rid="ref1">0, 1</xref>
          ].
• The fifth column is the cumulative probability of worsening between years 0 and 8. It is
expected to be a floating point number in the range [
          <xref ref-type="bibr" rid="ref1">0, 1</xref>
          ].
• The sixth column is the cumulative probability of worsening between years 0 and 10. It is
expected to be a floating point number in the range [
          <xref ref-type="bibr" rid="ref1">0, 1</xref>
          ].
• The seventh column is the run identifier, according to the format described below. It must
uniquely identify the participating team and the submitted run.
        </p>
        <p>It is important to include all the columns and have a white space delimiter between the
columns. No specific ordering is expected among patients (rows) in the submission file.</p>
      </sec>
      <sec id="sec-5-4">
        <title>Task 3 Run Format</title>
        <p>Runs should be submitted as a text file (.txt) with the following format:
0x4bed50627d141453da7499a7f6ae84ab 0.897 upd_T3a_EW6_survRF
0x4d0e8370abe97d0fdedbded6787ebcfc 0.773 upd_T3a_EW6_survRF
0x5bbf2927feefd8617b58b5005f75fc0d 0.773 upd_T3a_EW6_survRF
0x814ec836b32264453c04bb989f7825d4 0.615 upd_T3a_EW6_survRF
0x71dabb094f55fab5fc719e348dffc85x 0.317 upd_T3a_EW6_survRF
...
• Columns are separated by a white space;
• The first column is the patient ID, a 128 bit hex number (should be considered just as
a string);</p>
        <p>It is important to include all the columns and have a white space delimiter between the columns.
No specific ordering is expected among patients (rows) in the submission file. Since diferent
time windows may be considered, participants are allowed to submit predictions for a variable
number of patients. We encourage participants to submit predictions for as many patients
as possible. To avoid favoring runs that consider only a few patients, submitted runs will be
evaluated based on their correctness as well as the number of patients included. The number of
patients included is also reported in the output of the evaluation scripts.</p>
      </sec>
      <sec id="sec-5-5">
        <title>Submission Upload</title>
        <p>Runs should be uploaded in the repository provided by the organizers. Following the repository
structure discussed above, for example, a run submitted for the first task should be included in
submission/task1.</p>
        <p>Runs should be uploaded using the following name convention for their identifiers:
• teamname is the name of the participating team;
• T&lt;1|2&gt;&lt;a|b|c&gt; is the identifier of the task the run is submitted to, e.g. T1b for Task 1,
subtask b;
– type describes the type of run only in the case of Task 3 (it can be omitted for Task
1 and 2). It should be one among:
– base for a baseline run;
– EW6 when using environmental data in a time window of 6 months before and after</p>
        <p>Time 0;
– EWP when using environmental data in a time windows chosen by the participant; in
this case it is suggested to use freefield to provide information about the adopted
time window;
• freefield is a free field that participants can use as they prefer to further distinguish
among their runs. Please, keep it short and informative.</p>
        <p>For example, a complete run identifier may look like:
upd_T3a_EW6_survRF
• upd is the University of Padua team;
• T3a means that the run is submitted for Task 3, subtask a;
• EW6 means that environmental data in a time window of 6 months before and after Time
0 have been used;
• survRF suggests that participants have used survival random forests as a prediction
method.</p>
        <p>The name of the text file containing the run must be the identifier of the run followed by the
.txt extension. In the above example:
upd_T3a_EW6_survRF.txt</p>
      </sec>
      <sec id="sec-5-6">
        <title>Run Scores</title>
        <p>Performance scores for the submitted runs will be returned by the organizers in the score
folder, which follows the same structure as the submission folder.</p>
        <p>For each submitted run, participants will find a file named
where &lt;teamname&gt;_T&lt;1|2|3&gt;&lt;a|b|c&gt;_[type_]&lt;freefield&gt; matches the corresponding
run. The file will contain performance scores for each of the evaluation measures described
below. In the above example:
upd_T3a_EW6_survRF.score.txt</p>
      </sec>
      <sec id="sec-5-7">
        <title>5.2. Participants</title>
        <p>Overall, 45 teams registered for participating in iDPP@CLEF but only 10 of them actually
managed to submit runs for at least one of the ofered tasks. Table 7 reports the details about
the participating teams.</p>
        <p>Table 8 provides breakdown of the number of runs submitted by each participant for each
task and sub-task. Overall, we have received 163 runs with a prevalence of submissions for Task
1 (76 runs), followed by Task 2 (48 runs), and lastly, Task 3 (49 runs).</p>
      </sec>
    </sec>
    <sec id="sec-6">
      <title>6. Evaluation Measures</title>
      <p>iDPP@CLEF adopted several state-of-the-art evaluation measures to assess the performance of
the prediction algorithms, among which:</p>
      <p>• Area Under the ROC Curve (AUC) [22] to show the trade-of between clinical sensitivity
and specificity for every possible cut-of of the risk scores;
• Harrel’s Concordance Index (C-index) [23] to summarize how well a predicted risk score
describes an observed sequence of events.
• O/E ratio to assess whether or not the observed event rates match expected event rates in
subgroups of the model population.</p>
      <p>To ease the computation and reproducibility of the results, scripts for computing the measures
are available in the following repository:
https://bitbucket.org/brainteaser-health/idpp2023-performance-computation.</p>
      <sec id="sec-6-1">
        <title>6.1. Task 1: Measures to evaluate the Prediction of the Risk of Disease</title>
      </sec>
      <sec id="sec-6-2">
        <title>Worsening (MS)</title>
        <p>For Task 1, the efectiveness of the submitted runs is evaluated using Harrell’s Concordance
Index (C-index) [23]. This score quantifies the model’s ability in ranking pairs of observations
based on their predicted outcomes. A C-index value of 1 indicates perfect concordance, meaning
the model can accurately distinguish between higher and lower-risk individuals. Conversely, a
value of 0.5 suggests random guessing, while values below 0.5 indicate a counter-correlation.
6.2. Task 2:Measures to evaluate the Prediction of the Cumulative Probability
of Worsening (MS)
The efectiveness of the submitted runs is evaluated with the following measures:
• Area Under the ROC curve (AUROC) at each of the time intervals (0-2, 0-4, 0-6, 0-8, 0-10
years);
• O/E Ratio: the ratio of observed to expected events at each of the time intervals (0-2, 0-4,
0-6, 0-8, 0-10 years).</p>
        <p>The Receiver Operating Characteristic (ROC) curve is a graphical representation of the model’s
true positive rate (sensitivity) against the false positive rate (1 - specificity) at diferent
classification thresholds. The AUROC ranges from 0 to 1, where a value of 1 indicates a perfect
model that can accurately distinguish between individuals who will experience worsening and
those who will not. An AUROC value of 0.5 suggests a model that performs no better than
random chance. Therefore, a higher AUROC reflects a better ability of the model to discriminate
between diferent outcomes.</p>
        <p>The O/E (Observed-to-Expected) ratio provides a measure of calibration for the model’s
predictions. It compares the actual number of observed worsening events to the number of
events expected based on the model’s predictions. Ideally, the O/E ratio should be close to 1,
indicating good calibration and alignment between predicted and observed outcomes. A ratio
significantly above 1 suggests an overestimation of the number of worsening events, while
a ratio below 1 indicates an underestimation. Monitoring the O/E ratio at each time interval
allows for assessing the model’s calibration performance over time.</p>
        <p>To compute the AUROC and O/E Ratio, we applied censoring to the ground truth values
using the following schema. Let A, B, C, and D be four subjects, where:
• A experienced the outcome at ;
• B was censored at ;
• C experienced the outcome at 3;
• D was censored at 3.</p>
        <sec id="sec-6-2-1">
          <title>The scenario is represented in Figure 3.</title>
          <p>Table 9 reports the outcome occurrence label and outcome time for each possible scenario of
censoring time, which we refer to as 1, 2, and 3. When 1 is considered as censoring time,
all four example subjects have yet to experience the event or be censored, as a result, their
outcome occurrence label at this time is set to 0 as shown in the first column of Table 9. When
2 is considered to perform censoring (second column of Table 9), instead, only subjects C and
D have yet to experience either the even or the censoring, and their outcome label is then set
to 0. In this scenario, subject A had the event before 2 and its outcome label is then set to
1. Subject B was censored before 2 and, as its outcome at this time is unknown, it must be
excluded from performance evaluation. Finally, when 3 is considered to perform censoring
(third column of Table 9), outcome labels of subjects A and B are equal to those considered
for 2 since their situation at this time is unchanged compared to the previous one. However,
subject C experienced the vent at 3 and now its outcome label must be set to 1 and subject D
was censored at 3 and its outcome label is then set to 0.
6.3. Task 3: Measures to evaluate the Impact of Exposition to Pollutants (ALS)
A
B
C
D
A
B
C
D
t1
tA
t2</p>
          <p>t3
1
outcome time 1
outcome occurred 0</p>
          <p>outcome time 1
outcome occurred 0</p>
          <p>outcome time 1
outcome occurred 0</p>
          <p>outcome time 1
outcome occurred 0
2

1
NA
NA*
2
0
2
0
3

1
NA
NA*
3
1
3
0</p>
          <p>• AUROC: the area under the receiver operating characteristic curve at each of the time
intervals (6, 12, 18, 24, 30, 36 months);
• C-index.</p>
        </sec>
      </sec>
    </sec>
    <sec id="sec-7">
      <title>7. Results</title>
      <p>For each task, we report the analysis of the performance of the runs submitted by the Lab’s
participants according to the measures described in Section 6.</p>
      <sec id="sec-7-1">
        <title>7.1. Task 1: Predicting Risk of Disease Worsening (Multiple Sclerosis)</title>
        <p>Figure 4 shows the C-index with its 95% confidence intervals computed for all runs submitted
for Task 1 sub-task a and for the random classifier (last row). Discrimination performance varies
across the diferent submitted runs ranging from 0.4 to above 0.8. Runs submitted by the UWB
team [21] lead the pack (C-index &gt; 0.8), followed by CompBioMed (CBMUnitTO) [12], and
FCOOL [14]. The best-performing approach for UWB and FCOOL and SisInfLab_AIBio [20]
are Survival Random Forests. CompBioMed [12], HULAT [15], and SBB [18] achieve the best
performance with Cox regression and CoxNets.
for Task 1 sub-task b and for the random classifier (last row). Also for this sub-task discrimination
performance varies across the diferent submitted runs ranging from 0.4 to above 0.7. Runs
submitted by the FCOOL team [14] lead the pack (C-index
∼
(CBMUnitTO) [12], and UWB [21]. The best-performing approach for FCOOL is a survival
SVM. CompBioMed [12], and SBB [18] achieve the best performance with Cox regression and
CoxNets. Other methodologic approaches such as gradient boosting or survival random forest
0.7), followed by CompBioMed
show lower performance in this sub-task.</p>
        <p>Model performance was overall lower in sub-task b with respect to sub-task a. This
observation suggests that, from a model-based perspective and with the available data, the prediction
of the crossing of an EDSS threshold (EDSS=3 in this study) may be simpler than the prediction
of the worsening of the disease as defined by medical guidelines.</p>
      </sec>
      <sec id="sec-7-2">
        <title>7.2. Task 2: Predicting Cumulative Probability of Worsening (Multiple</title>
      </sec>
      <sec id="sec-7-3">
        <title>Sclerosis)</title>
        <p>Appendix A presents the AUROC and the O/E ratios, along with their 95% confidence intervals,
computed for all runs submitted for task 2. Specifically, Tables 10, 11, 12, 13, and 14 refer to
sub-task a at two, four, six, eight, and ten years, respectively. Tables 15, 16, 17, 18, and 19 report
the results for sub-task b at two, four, six, eight, and ten years, respectively. In the following
paragraph, a short analysis of the obtained performance is reported.
7.2.1. Sub-task a
Table 10 presents the AUROC and OE ratio values for a two-year time window. In this time
span, the run identified as uwb_T2a_survRFmri achieved the highest AUROC value (0.924).
The best O/E ratio of 0.946 is obtained by uwb_T2a_survGB_minVal, indicating a good balance
between observed and expected events.</p>
        <p>Table 11 shows the performance measures for the same runs but with a four-year time window.
Also in this case, uwb_T2a_survRFmri obtains the best AUROC score of 0.907. Regarding the O/E
ratio, sisinflab-aibio_T2a_RF2 demonstrates the best balance between observed and expected
events with a value of 0.927.</p>
        <p>Table 12 displays the performance over a six-year time span. HULATUC3M_T2a_survcoxnet
achieves the highest AUROC score of 0.938, while uhu-etsi-1_T2a_04 (0.825) has the best O/E
ratio.</p>
        <p>Table 13 provides the performance measures at eight years. HULATUC3M_T2a_survcoxnet
reaches the highest AUROC value of 0.859. In terms of the O/E ratio, uhu-etsi-1_T2a_04 (0.900)
achieves the best balance between observed and expected events.</p>
        <p>Table 14 reports the performance on the longest time span considered, i.e., at ten years. In
this scenario, uwb_T2a_survRFmri (0.839) demonstrates the highest AUROC value among the
submitted runs. The identifier with the highest O/E ratio is uhu-etsi-1_T2a_05 (0.816), indicating
good calibration.</p>
        <p>In Sub-task a, the identifier uwb_T2a_survRFmri consistently achieves the highest AUROC
values across multiple time windows, indicating strong predictive performance. Notably,
HULATUC3M_T2a_survcoxnet also demonstrates good AUROC scores in longer time spans.</p>
        <p>When considering the balance between observed and expected events (O/E ratio), the
identifiers uwb_T2a_survGB_minVal and sisinflab-aibio_T2a_RF2 stand out by achieving good
equilibrium.
7.2.2. Sub-task b
Tables 15 and onwards present the AUROC and OE ratio values for all submissions in Task 2,
sub-task b.</p>
        <p>Within the two-year time frame, the run denoted as CBMUniTO_T2b_coxnet (0.676) achieved
the highest AUROC value. The best O/E ratio, equal to 1.019, is obtained by
HULATUC3M_T2b_survRF, signifying a favourable balance between observed and expected
events.</p>
        <p>Table 16 showcases the performance with a four-year time window. In this case,
sisinflabaibio_T2b_GB2 achieves the highest AUROC score of 0.639. Regarding the O/E ratio,
sisinflabaibio_T2b_RF2 maintains the optimal balance between observed and expected events, with a
value of 1.005.</p>
        <p>Table 17 displays the performance over a six-year time span. CBMUniTO_T2b_coxnet attains
the highest AUROC score of 0.635, while uhu-etsi-1_T2b_03 (0.985) demonstrates the best O/E
ratio.</p>
        <p>Table 18 provides the performance measures at eight years. CBMUniTO_T2b_cwgbsa achieves
the highest AUROC value of 0.673. In terms of the O/E ratio, uhu-etsi-1_T2b_03 (1.001) achieves
the most desirable balance between observed and expected events.</p>
        <p>Table 19 reports the performance over the longest time span considered, i.e., ten years. In this
scenario, CBMUniTO_T2b_cwgbsa (0.709) demonstrates the highest AUROC value among the
submitted runs. The identifier with the highest O/E ratio is uhu-etsi-1_T2b_03 (1.054), indicating
good calibration.</p>
        <p>In Sub-task b, the identifier CBMUniTO_T2b_coxnet consistently achieves the highest AUROC
values across multiple time windows, indicating its efectiveness in prediction. Additionally,
CBMUniTO_T2b_cwgbsa demonstrates strong AUROC scores in eight and ten years time spans.</p>
        <p>Regarding the O/E ratio, the runs identified as HULATUC3M_T2b_survRF and
uhu-etsi1_T2b_03 exhibit a favourable balance between observed and expected events in the considered
time windows.
Figure 6 shows the C-index and 95% confidence intervals achieved on Task 3 sub-task a by the
submitted runs and for the random classifier (last row). As observed by Karray [16] and Branco
et al. [13] runs including environmental data (runs tagged with EWP and EW6) tend to perform
worse than their counterpart that does not rely on the environmental data. The best-performing
approach is provided by the NeuroTN team [16] and corresponds to the classifier ensemble (see
subsection 7.4).</p>
        <p>neurotn_T3a_base_ClassifEnsemble
neurotn_T3a_base_survRFOpt
neurotn_T3a_EW6_survRFOpt
neurotn_T3a_EW6_ClassifEnsemble
fcool_T3a_base_GradientBoostingSurvivalAnalysis
fcool_T3a_base_RandomSurvivalForest
fcool_T3a_base_FastSurvivalSVM
fcool_T3a_EWP_CoxPHSurvivalAnalysis
fcool_T3a_EW6_CoxPHSurvivalAnalysis
fcool_T3a_EW6_GradientBoostingSurvivalAnalysis
fcool_T3a_EWP_GradientBoostingSurvivalAnalysis
fcool_T3a_EW6_RandomSurvivalForest
fcool_T3a_EWP_RandomSurvivalForest
random_classifier
0.45 0.475 0.5 0.525 0.55 0.575 0.6 0.625 0.65 0.675 0.7 0.725 0.75 0.775 0.8</p>
        <p>Figure 7 shows the C-index and 95% confidence intervals achieved on Task 3 sub-task b by
the submitted runs and for the random classifier (last row). In this sub-task only runs including
environmental data (runs tagged with EWP and EW6) of FCOOL [13] tend to perform worse than
their counterpart that does not rely on the environmental data. Instead, the best-performing
approach is provided by the NeuroTN team [16] and corresponds to a survival random forest
trained on EW6 data.</p>
        <p>Similarly to sub-task a runs including environmental data (EWP, EW6) submitted by both
participating teams (FCOOL, NeuroTN) tend to perform worse than their counterpart that does
not rely on the environmental data. The best-performing approach is once more provided by
the NeuroTN team [16] and corresponds to a survival random forest.</p>
      </sec>
      <sec id="sec-7-4">
        <title>7.4. Approaches</title>
        <p>In this section, we provide a short summary of the approaches adopted by participants in
iDPP@CLEF. There are two separate sub-sections, one for Task 1 and 2 – focused on MS
worsening prediction – and one for Task 3 – which concerns the impact of exposition to
pollutants on the ALS progression.
0.475 0.5 0.525 0.55 0.575 0.6 0.625 0.65 0.675 0.7 0.725 0.75 0.775 0.8 0.825
Tasks 1 and 2
CompBioMed [12] experiments with CoxNet, Component-wise Gradient Boosting Survival
Analysis (CWGBSA), and a hybrid method where the most important features selected by
CWGBSA are used to build a CoxNet model (EvilCox). They also test non-linear methods such
as Random Survival Forest and Gradient Boosting Survival Analysis, observing a tendency to
overfit the training data. To assess the importance of the features, Rossi et al. [12] perform
Permutation-based Feature Importance Analysis. In general, they observe that Coxnet is the
best-performing approach for all tasks and subtasks. Nevertheless, they also observed that
CWGBSA is resistant to over-fitting and aggressive in eliminating features. CWGBSA
crossvalidated performance is almost on par with that of CoxNet, despite using a smaller set of
features.</p>
        <p>FCOOL [14] explores several survival prediction methods to rank MS patients according to
the risk of worsening. The considered methods are Random Survival Forest, Gradient Boosting,
Fast Survival SVM, Fast Kernel Survival SVM, and the Cox Proportional-Hazards model. A data
preprocessing phase is conducted prior to training to manage the temporal nature of patient
data by choosing relevant features and by computing additional ones – which capture the
temporal progression of the disease. Overall, Random Survival Forest performs best on subtask
1a, whereas Fast Kernel Survival SVM on subtask 1b. Subtask 1b was found to be more complex
because of the diferent definition of the worsening event.</p>
        <p>HULAT [15] investigates the efectiveness of Random Survival Forest and Cox regression
with Elastic Net regularization (CoxNet) methods on MS worsening prediction. As well as
other groups, Ramos et al. [15] perform a data preprocessing phase involving data cleaning,
format transformation, normalization, and outliers removal. In particular, the preprocessing
step removes all the dynamic features containing a high number of missing values.</p>
        <p>Onto-Med [17] develop a Maximum Likelihood Estimation approach to predict MS progression.
The proposed method relies on patients’ covariates and employs a multi-layer perceptron to
approximate the optimal distribution parameters. To handle both tasks, Asamov et al. [17] used
the whole training data to build a model and estimate a maximum likelihood distribution for
each patient given their features. The method uses a cumulative probability estimate instead of
coherent risk measures to accommodate the requirements of bot tasks.</p>
        <p>SBB [18] develops diferent machine-learning approaches to predict a worsening in patient
disability caused by MS. Specifically, they consider the following well-known survival analysis
approaches: Cox model, random survival forests, and survival support machine. They conclude
that these approaches achieve modest performance and that employing non-linear methods does
not lead to a discernible advantage with respect to the gold standard Cox model. Nonetheless,
they observe that improving data pre-processing may be a key operation to perform in order
to obtain more relevant input features and augment model discrimination with the aim of
obtaining satisfactory results.</p>
        <p>Stefagroup [19] explores two post-hoc model-agnostic XAI methods, namely SHAP and
AraucanaXAI, to provide insights about the most predictive factors of worsening in MS patients.
Buonocore et al. [19] evaluate the proposed XAI approaches using commonly adopted measures
in XAI for healthcare such as identity, fidelity, separability and time. By leveraging SHAP and
AraucanaXAI, the authors gained a deeper understanding of the shortcomings and limitations
of their classifiers through feature importance and navigable decision trees.</p>
        <p>SisInfLab_AIBio [20] uses Random Survival Forests, an extension of random forests
specifically designed for survival analysis, and Boosting Machines for time-to-event analysis. To
assess the importance of features for both ML models, the permutation feature importance
is computed as well. Lombardi et al. [20] observe that, if the definition of worsening is more
complex and condition-dependent (tasks 1b and 2b) significantly lower their approach performs
worse than with a simpler definition of worsening (tasks 1a and 2a).</p>
        <p>UWB [21] evaluates various ML methods – such as Random Forest and Gradient Boosting – for
survival analysis, as well as a Deep Learning survival analysis method based on the Transformer
architecture: Surf TRACE. Among the diferent methods, the authors report top performance
with Random Forest. Hanzl and Picek [21] observe that three aspects are instrumental to
achieving good performance: (i) data preprocessing, (ii) hyper-parameter tuning, and (iii)
validation.</p>
        <p>Task 3
FCOOL [13] investigates four models to assess the importance of environmental data in
predicting the risk of early occurrence of NIV, PEG or death: Cox Proportional-Hazards, Random
Survival Forest, Survival SVM, and Gradient Boosting. Without the introduction of
environmental data, the models perform reasonably well. Nevertheless, Branco et al. [13] observe an
evident degradation in performance when providing the model with environmental and clinical
data in all three tasks. For task A, they observe an even larger degradation when unconstrained
amounts of environmental data are provided, compared to what was observed with only 6
months of data. This pattern does not hold for Tasks B and C, where the amount of data does not
harm the results, which are, in any case, lower than what was observed without environmental
data.</p>
        <p>NeuroTN [16] Proposes an approach to stratify patients relying on the disease progression
patterns according to features extracted from applying staging systems on visits data. Clusters
of patients are then profiled to determine their common characteristics: clinical, demographic
and environmental. A second clustering procedure is carried on to detect clusters of patients
with similar exposure concentrations to 3 diferent air pollutants. Then, Karray [16] performs
risk prediction on each cluster separately and combines the predictions. In particular Karray
[16] relies on two ensembles of classifiers trained on a diferent data representation (data with
Environmental Features and data without Environmental Features). Furthermore, they explored
also Survival Random Forests. As for Branco et al. [13], the introduction of environmental
features does not seem to benefit both models and causes performance deterioration.</p>
      </sec>
    </sec>
    <sec id="sec-8">
      <title>8. Conclusions and Future Work</title>
      <p>The second iteration of iDPP@CLEF focuses on predicting the temporal progression of MS and
ALS. In particular, iDPP@CLEF 2023 comprises three tasks. The first two tasks concern MS and
participants were provided clinical data and had the objective of predicting the risk of worsening.
The third task centres around ALS and builds upon the foundation laid by iDPP@CLEF 2022.
This task follows a similar design, involving the prediction of NIV, PEG, or death, but with the
addition of environmental data to explore the impact of pollutant exposure on the progression
of ALS.</p>
      <p>We developed 5 datasets, two for MS and three for ALS, based on the anonymized data
provided by three medical institutions in Turin, Lisbon, and Pavia. Out of 45 registered participants,
10 managed to submit a total of 163 runs with a prevalence of submissions for Tasks 1 and 2.
Participants adopted a range of approaches, such as Survival Random Forests and Coxnets.</p>
      <p>The next iteration of iDPP@CLEF will maintain its dual focus on both ALS and MS. We will
extend the amount of available information, by considering also time-series concerning patients’
vital parameters produced by wearable devices.</p>
      <sec id="sec-8-1">
        <title>Acknowledgments</title>
        <p>The work reported in this paper has been partially supported by the BRAINTEASER6 project
(contract n. GA101017598), as a part of the European Union’s Horizon 2020 research and
innovation programme.
[7] P. E. McKight, J. Najab, Kruskal-wallis test, The corsini encyclopedia of psychology (2010)
1–1.
[8] R. J. Tallarida, R. B. Murray, R. J. Tallarida, R. B. Murray, Chi-square test, Manual of
pharmacologic calculations: with computer programs (1987) 140–142.
[9] J. T. Rich, J. G. Neely, R. C. Paniello, C. C. Voelker, B. Nussenbaum, E. W. Wang, A practical
guide to understanding kaplan-meier curves, Otolaryngology—Head and Neck Surgery
143 (2010) 331–336.
[10] M. Vogt, P. Schneider, N. Castell, P. Hamer, Assessment of low-cost particulate matter
sensor systems against optical and gravimetric methods in a field co-location in norway,
Atmosphere 12 (2021) 961. doi:10.3390/atmos12080961.
[11] D. H. Hagan, G. Isaacman-VanWertz, J. P. Franklin, L. M. M. Wallace, B. D. Kocar, C. L.</p>
        <p>Heald, J. H. Kroll, Calibration and assessment of electrochemical air quality sensors by
co-location with regulatory-grade instruments, Atmospheric Measurement Techniques 11
(2018) 315–328. doi:10.5194/amt-11-315-2018.
[12] I. Rossi, G. Birolo, P. Fariselli, idpp@clef 2023 results from dsm-compbio unito, in:</p>
        <p>M. Aliannejadi, G. Faggioli, N. Ferro, M. Vlachos (Eds.), CLEF 2023 Working Notes, 2023.
[13] R. Branco, D. Soares, A. Martins, J. Valente, E. Castanho, S. Madeira, H. Aidos, Investigating
the impact of environmental data on als prognosis with survival analysis, in: M. Aliannejadi,
G. Faggioli, N. Ferro, M. Vlachos (Eds.), CLEF 2023 Working Notes, 2023.
[14] R. Branco, J. Valente, A. Martins, D. Soares, E. Castanho, S. Madeira, H. Aidos, Survival
analysis for multiple sclerosis: Predicting risk of disease worsening, in: M. Aliannejadi,
G. Faggioli, N. Ferro, M. Vlachos (Eds.), CLEF 2023 Working Notes, 2023.
[15] A. Ramos, P. Martínez, I. González-Carrasco, Hulat@iddp clef 2023: Intelligent prediction
of disease progression in multiple sclerosis patients, in: M. Aliannejadi, G. Faggioli,
N. Ferro, M. Vlachos (Eds.), CLEF 2023 Working Notes, 2023.
[16] M. Karray, Air pollution profiling through patient stratification: Study of als staging
systems usefulness in facilitating data-driven disease subtyping and discovery of hazardous
ambient air pollutants., in: M. Aliannejadi, G. Faggioli, N. Ferro, M. Vlachos (Eds.), CLEF
2023 Working Notes, 2023.
[17] T. Asamov, A. Aksenova, P. Ivanov, S. Boytcheva, D. Taskov, Maximum likelihood
estimation with deep learning for multiple sclerosis progression prediction, in: M. Aliannejadi,
G. Faggioli, N. Ferro, M. Vlachos (Eds.), CLEF 2023 Working Notes, 2023.
[18] A. Guazzo, I. Trescato, E. Longato, E. Tavazzi, M. Vettoretti, B. Camillo, Baseline machine
learning approaches to predict multiple sclerosis disease progression, in: M. Aliannejadi,
G. Faggioli, N. Ferro, M. Vlachos (Eds.), CLEF 2023 Working Notes, 2023.
[19] T. Buonocore, P. Bosoni, G. Nicora, M. Vazifehdan, R. Bellazzi, E. Parimbelli, A. Dagliati,
Predicting and explaining risk of disease worsening using temporal features in multiple
sclerosis notebook for the idpp lab on intelligent disease progression prediction at clef
2023, in: CLEF 2023 Working Notes, 2023.
[20] A. Lombardi, L. De Bonis, G. Fasano, A. Sportelli, T. Colafiglio, D. Lofù, P. Sorino, F.
Narducci, E. Di Sciascio, T. Di Noia, Time-to-event interpretable machine learning for multiple
sclerosis worsening prediction: Results from idpp@clef 2023, in: M. Aliannejadi, G.
Faggioli, N. Ferro, M. Vlachos (Eds.), CLEF 2023 Working Notes, 2023.
[21] M. Hanzl, L. Picek, Predicting risk of multiple sclerosis worsening, in: M. Aliannejadi,</p>
        <p>G. Faggioli, N. Ferro, M. Vlachos (Eds.), CLEF 2023 Working Notes, 2023.
[22] J. A. Hanley, B. J. McNeil, The meaning and use of the area under a receiver operating
characteristic (roc) curve., Radiology 143 (1982) 29–36. PMID: 7063747.
[23] J. Harrell, Frank E., R. M. Calif, D. B. Pryor, K. L. Lee, R. A. Rosati, Evaluating the Yield of
Medical Tests, JAMA 247 (1982) 2543–2546.
sisinflab-aibio_T2a_GB1
sisinflab-aibio_T2a_GB2
sisinflab-aibio_T2a_GB3
sisinflab-aibio_T2a_RF1
sisinflab-aibio_T2a_RF2
uhu-etsi-1_T2a_03
uhu-etsi-1_T2a_04
uhu-etsi-1_T2a_05
uwb_T2a_CGBSA
uwb_T2a_survGB
uwb_T2a_survGB_minVal
uwb_T2a_survRF
uwb_T2a_survRFmri</p>
      </sec>
    </sec>
    <sec id="sec-9">
      <title>A. Task 2 results</title>
      <p>identifier</p>
      <p>identifier
CBMUniTO_T2a_coxnet
CBMUniTO_T2a_cwgbsa
CBMUniTO_T2a_evilcox
HULATUC3M_T2a_survcoxnet
HULATUC3M_T2a_survRF
AUROC</p>
      <p>O/E ratio
onto-med_T2a_0.01.1.0e-5.10000.100.adj 0.804 (0.600, 1.000) 0.228 (-0.068, 0.525)
onto-med_T2a_0.2.1.0e-5.10000.100 0.733 (0.522, 0.944) 0.360 (-0.012, 0.732)
onto-med_T2a_0.2.1.0e-5.10000.200 0.760 (0.540, 0.980) 0.316 (-0.033, 0.664)
onto-med_T2a_0.2.1.0e-5.5000.100 0.627 (0.426, 0.827) 0.487 (0.055, 0.920)
onto-med_T2a_0.2.1.0e-5.5000.200 0.622 (0.409, 0.835) 0.460 (0.040, 0.881)
sbb_T2a_Cox
sbb_T2a_RSF
sbb_T2a_SSVM
sisinflab-aibio_T2a_GB1
sisinflab-aibio_T2a_GB2
sisinflab-aibio_T2a_GB3
sisinflab-aibio_T2a_RF1
sisinflab-aibio_T2a_RF2
uhu-etsi-1_T2a_03
uhu-etsi-1_T2a_04
uhu-etsi-1_T2a_05
uwb_T2a_CGBSA
uwb_T2a_survGB
uwb_T2a_survGB_minVal
uwb_T2a_survRF
uwb_T2a_survRFmri</p>
      <p>identifier
CBMUniTO_T2a_coxnet
CBMUniTO_T2a_cwgbsa
CBMUniTO_T2a_evilcox
HULATUC3M_T2a_survcoxnet
HULATUC3M_T2a_survRF
AUROC</p>
      <p>O/E ratio
onto-med_T2a_0.01.1.0e-5.10000.100.adj 0.687 (0.495, 0.880) 0.284 (-0.005, 0.574)
onto-med_T2a_0.2.1.0e-5.10000.100 0.655 (0.451, 0.859) 0.352 (0.029, 0.675)
onto-med_T2a_0.2.1.0e-5.10000.200 0.702 (0.495, 0.909) 0.317 (0.011, 0.623)
onto-med_T2a_0.2.1.0e-5.5000.100 0.538 (0.351, 0.726) 0.469 (0.097, 0.842)
onto-med_T2a_0.2.1.0e-5.5000.200 0.558 (0.370, 0.746) 0.458 (0.090, 0.826)
sbb_T2a_Cox
sbb_T2a_RSF
sbb_T2a_SSVM
sisinflab-aibio_T2a_GB1
sisinflab-aibio_T2a_GB2
sisinflab-aibio_T2a_GB3
sisinflab-aibio_T2a_RF1
sisinflab-aibio_T2a_RF2
uhu-etsi-1_T2a_03
uhu-etsi-1_T2a_04
uhu-etsi-1_T2a_05
uwb_T2a_CGBSA
uwb_T2a_survGB
uwb_T2a_survGB_minVal
uwb_T2a_survRF
uwb_T2a_survRFmri</p>
      <p>identifier
CBMUniTO_T2a_coxnet
CBMUniTO_T2a_cwgbsa
CBMUniTO_T2a_evilcox
HULATUC3M_T2a_survcoxnet
HULATUC3M_T2a_survRF
AUROC</p>
      <p>O/E ratio
onto-med_T2a_0.01.1.0e-5.10000.100.adj 0.626 (0.446, 0.805) 0.38 (0.068, 0.692)
onto-med_T2a_0.2.1.0e-5.10000.100 0.636 (0.447, 0.825) 0.397 (0.078, 0.715)
onto-med_T2a_0.2.1.0e-5.10000.200 0.664 (0.477, 0.852) 0.366 (0.060, 0.671)
onto-med_T2a_0.2.1.0e-5.5000.100 0.538 (0.355, 0.722) 0.503 (0.144, 0.862)
onto-med_T2a_0.2.1.0e-5.5000.200 0.449 (0.267, 0.630) 0.499 (0.141, 0.856)
sbb_T2a_Cox
sbb_T2a_RSF
sbb_T2a_SSVM
sisinflab-aibio_T2a_GB1
sisinflab-aibio_T2a_GB2
sisinflab-aibio_T2a_GB3
sisinflab-aibio_T2a_RF1
sisinflab-aibio_T2a_RF2
uhu-etsi-1_T2a_03
uhu-etsi-1_T2a_04
uhu-etsi-1_T2a_05
uwb_T2a_CGBSA
uwb_T2a_survGB
uwb_T2a_survGB_minVal
uwb_T2a_survRF
uwb_T2a_survRFmri</p>
      <p>identifier
CBMUniTO_T2a_coxnet
CBMUniTO_T2a_cwgbsa
CBMUniTO_T2a_evilcox
HULATUC3M_T2a_survcoxnet
HULATUC3M_T2a_survRF
AUROC</p>
      <p>O/E ratio
onto-med_T2a_0.01.1.0e-5.10000.100.adj 0.631 (0.429, 0.834) 0.366 (0.078, 0.653)
onto-med_T2a_0.2.1.0e-5.10000.100 0.682 (0.490, 0.875) 0.383 (0.089, 0.677)
onto-med_T2a_0.2.1.0e-5.10000.200 0.702 (0.518, 0.886) 0.361 (0.075, 0.647)
onto-med_T2a_0.2.1.0e-5.5000.100 0.557 (0.344, 0.770) 0.465 (0.141, 0.789)
onto-med_T2a_0.2.1.0e-5.5000.200 0.404 (0.189, 0.618) 0.456 (0.135, 0.776)
sbb_T2a_Cox
sbb_T2a_RSF
sbb_T2a_SSVM
sisinflab-aibio_T2a_GB1
sisinflab-aibio_T2a_GB2
sisinflab-aibio_T2a_GB3
sisinflab-aibio_T2a_RF1
sisinflab-aibio_T2a_RF2
uhu-etsi-1_T2a_03
uhu-etsi-1_T2a_04
uhu-etsi-1_T2a_05
uwb_T2a_CGBSA
uwb_T2a_survGB
uwb_T2a_survGB_minVal
uwb_T2a_survRF
uwb_T2a_survRFmri</p>
      <p>identifier
CBMUniTO_T2b_coxnet
CBMUniTO_T2b_cwgbsa
HULATUC3M_T2b_survRF
onto-med_T2b_0.2.1.0e-5.10000.100
onto-med_T2b_0.2.1.0e-5.10000.200
onto-med_T2b_0.2.1.0e-5.5000.100
onto-med_T2b_0.2.1.0e-5.5000.200
sbb_T2b_Cox
sbb_T2b_RSF
sbb_T2b_SSVM
sisinflab-aibio_T2b_GB1
sisinflab-aibio_T2b_GB2
sisinflab-aibio_T2b_RF1
sisinflab-aibio_T2b_RF2
uhu-etsi-1_T2b_03
uhu-etsi-1_T2b_05
uhu-etsi-1_T2b_s02
uwb_T2b_CGBSA
uwb_T2b_survGB
uwb_T2b_survGB_minVal
uwb_T2b_survRF
uwb_T2b_survRFmri</p>
      <p>identifier
CBMUniTO_T2b_coxnet
CBMUniTO_T2b_cwgbsa
HULATUC3M_T2b_survRF
onto-med_T2b_0.2.1.0e-5.10000.100
onto-med_T2b_0.2.1.0e-5.10000.200
onto-med_T2b_0.2.1.0e-5.5000.100
onto-med_T2b_0.2.1.0e-5.5000.200
sbb_T2b_Cox
sbb_T2b_RSF
sbb_T2b_SSVM
sisinflab-aibio_T2b_GB1
sisinflab-aibio_T2b_GB2
sisinflab-aibio_T2b_RF1
sisinflab-aibio_T2b_RF2
uhu-etsi-1_T2b_03
uhu-etsi-1_T2b_05
uhu-etsi-1_T2b_s02
uwb_T2b_CGBSA
uwb_T2b_survGB
uwb_T2b_survGB_minVal
uwb_T2b_survRF
uwb_T2b_survRFmri</p>
      <p>identifier
CBMUniTO_T2b_coxnet
CBMUniTO_T2b_cwgbsa
HULATUC3M_T2b_survRF
onto-med_T2b_0.2.1.0e-5.10000.100
onto-med_T2b_0.2.1.0e-5.10000.200
onto-med_T2b_0.2.1.0e-5.5000.100
onto-med_T2b_0.2.1.0e-5.5000.200
sbb_T2b_Cox
sbb_T2b_RSF
sbb_T2b_SSVM
sisinflab-aibio_T2b_GB1
sisinflab-aibio_T2b_GB2
sisinflab-aibio_T2b_RF1
sisinflab-aibio_T2b_RF2
uhu-etsi-1_T2b_03
uhu-etsi-1_T2b_05
uhu-etsi-1_T2b_s02
uwb_T2b_CGBSA
uwb_T2b_survGB
uwb_T2b_survGB_minVal
uwb_T2b_survRF
uwb_T2b_survRFmri</p>
      <p>identifier
CBMUniTO_T2b_coxnet
CBMUniTO_T2b_cwgbsa
HULATUC3M_T2b_survRF
onto-med_T2b_0.2.1.0e-5.10000.100
onto-med_T2b_0.2.1.0e-5.10000.200
onto-med_T2b_0.2.1.0e-5.5000.100
onto-med_T2b_0.2.1.0e-5.5000.200
sbb_T2b_Cox
sbb_T2b_RSF
sbb_T2b_SSVM
sisinflab-aibio_T2b_GB1
sisinflab-aibio_T2b_GB2
sisinflab-aibio_T2b_RF1
sisinflab-aibio_T2b_RF2
uhu-etsi-1_T2b_03
uhu-etsi-1_T2b_05
uhu-etsi-1_T2b_s02
uwb_T2b_CGBSA
uwb_T2b_survGB
uwb_T2b_survGB_minVal
uwb_T2b_survRF
uwb_T2b_survRFmri</p>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          [1]
          <string-name>
            <given-names>G.</given-names>
            <surname>Faggioli</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Guazzo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Marchesin</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Menotti</surname>
          </string-name>
          , I. Trescato,
          <string-name>
            <given-names>H.</given-names>
            <surname>Aidos</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Bergamaschi</surname>
          </string-name>
          , G. Birolo,
          <string-name>
            <given-names>P.</given-names>
            <surname>Cavalla</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Chiò</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Dagliati</surname>
          </string-name>
          , M. de Carvalho,
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Di Nunzio</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Fariselli</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J. M.</given-names>
            <surname>García Dominguez</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Gromicho</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Longato</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S. C.</given-names>
            <surname>Madeira</surname>
          </string-name>
          , U. Manera,
          <string-name>
            <given-names>G.</given-names>
            <surname>Silvello</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Tavazzi</surname>
          </string-name>
          , E. Tavazzi,
          <string-name>
            <given-names>M.</given-names>
            <surname>Vettoretti</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B. Di</given-names>
            <surname>Camillo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Ferro</surname>
          </string-name>
          ,
          <article-title>Intelligent disease progression prediction: Overview of idpp@clef 2023</article-title>
          , in: A.
          <string-name>
            <surname>Arampatzis</surname>
            , E. Kanoulas,
            <given-names>T.</given-names>
          </string-name>
          <string-name>
            <surname>Tsikrika</surname>
            ,
            <given-names>S.</given-names>
          </string-name>
          <string-name>
            <surname>Vrochidis</surname>
            ,
            <given-names>A.</given-names>
          </string-name>
          <string-name>
            <surname>Giachanou</surname>
            ,
            <given-names>D.</given-names>
          </string-name>
          <string-name>
            <surname>Li</surname>
            ,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Aliannejadi</surname>
            ,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Vlachos</surname>
          </string-name>
          , G. Faggioli, N. Ferro (Eds.),
          <source>Experimental IR Meets Multilinguality, Multimodality, and Interaction</source>
          , Springer Nature Switzerland, Cham,
          <year>2023</year>
          , pp.
          <fpage>343</fpage>
          -
          <lpage>369</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          [2]
          <string-name>
            <given-names>R.</given-names>
            <surname>Küfner</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Zach</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Norel</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Hawe</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Schoenfeld</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Wang</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G.</given-names>
            <surname>Li</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Fang</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Mackey</surname>
          </string-name>
          ,
          <string-name>
            <given-names>O.</given-names>
            <surname>Hardiman</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Cudkowicz</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Sherman</surname>
          </string-name>
          , G. Ertaylan,
          <string-name>
            <given-names>M.</given-names>
            <surname>Grosse-Wentrup</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T.</given-names>
            <surname>Hothorn</surname>
          </string-name>
          ,
          <string-name>
            <surname>J. van Ligtenberg</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J. H.</given-names>
            <surname>Macke</surname>
          </string-name>
          , T. Meyer,
          <string-name>
            <given-names>B.</given-names>
            <surname>Schölkopf</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Tran</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Vaughan</surname>
          </string-name>
          , G. Stolovitzky,
          <string-name>
            <given-names>M. L.</given-names>
            <surname>Leitner</surname>
          </string-name>
          ,
          <article-title>Crowdsourced analysis of clinical trial data to predict amyotrophic lateral sclerosis progression</article-title>
          ,
          <source>Nature Biotechnology</source>
          <volume>33</volume>
          (
          <year>2015</year>
          )
          <fpage>51</fpage>
          -
          <lpage>57</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          [3]
          <string-name>
            <given-names>J. M.</given-names>
            <surname>Cedarbaum</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Stambler</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Malta</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Fuller</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Hilt</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B.</given-names>
            <surname>Thurmond</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Nakanishi</surname>
          </string-name>
          ,
          <string-name>
            <surname>The</surname>
            <given-names>ALSFRS</given-names>
          </string-name>
          -R:
          <article-title>a revised ALS functional rating scale that incorporates assessments of respiratory function</article-title>
          ,
          <source>Journal of the Neurological Sciences</source>
          <volume>169</volume>
          (
          <year>1999</year>
          )
          <fpage>13</fpage>
          -
          <lpage>21</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          [4]
          <string-name>
            <given-names>A.</given-names>
            <surname>Guazzo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>I.</given-names>
            <surname>Trescato</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Longato</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Hazizaj</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Dosso</surname>
          </string-name>
          , G. Faggioli,
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Di Nunzio</surname>
          </string-name>
          , G. Silvello,
          <string-name>
            <given-names>M.</given-names>
            <surname>Vettoretti</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Tavazzi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Roversi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Fariselli</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S. C.</given-names>
            <surname>Madeira</surname>
          </string-name>
          , M. de Carvalho,
          <string-name>
            <given-names>M.</given-names>
            <surname>Gromicho</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Chiò</surname>
          </string-name>
          ,
          <string-name>
            <given-names>U.</given-names>
            <surname>Manera</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Dagliati</surname>
          </string-name>
          , G. Birolo,
          <string-name>
            <given-names>H.</given-names>
            <surname>Aidos</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B. Di</given-names>
            <surname>Camillo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Ferro</surname>
          </string-name>
          ,
          <source>Intelligent Disease Progression Prediction: Overview of iDPP@CLEF</source>
          <year>2022</year>
          , in: A.
          <string-name>
            <surname>Barrón-Cedeño</surname>
          </string-name>
          , G. Da San Martino, M. Degli
          <string-name>
            <surname>Esposti</surname>
            ,
            <given-names>F.</given-names>
          </string-name>
          <string-name>
            <surname>Sebastiani</surname>
            ,
            <given-names>C.</given-names>
          </string-name>
          <string-name>
            <surname>Macdonald</surname>
            ,
            <given-names>G.</given-names>
          </string-name>
          <string-name>
            <surname>Pasi</surname>
            ,
            <given-names>A.</given-names>
          </string-name>
          <string-name>
            <surname>Hanbury</surname>
            ,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Potthast</surname>
          </string-name>
          , G. Faggioli, N. Ferro (Eds.),
          <source>Experimental IR Meets Multilinguality, Multimodality, and Interaction. Proceedings of the Thirteenth International Conference of the CLEF Association (CLEF 2022), Lecture Notes in Computer Science (LNCS) 13390</source>
          , Springer, Heidelberg, Germany,
          <year>2022</year>
          , pp.
          <fpage>395</fpage>
          -
          <lpage>422</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref5">
        <mixed-citation>
          [5]
          <string-name>
            <given-names>A.</given-names>
            <surname>Guazzo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>I.</given-names>
            <surname>Trescato</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Longato</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Hazizaj</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Dosso</surname>
          </string-name>
          , G. Faggioli,
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Di Nunzio</surname>
          </string-name>
          , G. Silvello,
          <string-name>
            <given-names>M.</given-names>
            <surname>Vettoretti</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Tavazzi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Roversi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Fariselli</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S. C.</given-names>
            <surname>Madeira</surname>
          </string-name>
          , M. de Carvalho,
          <string-name>
            <given-names>M.</given-names>
            <surname>Gromicho</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Chiò</surname>
          </string-name>
          ,
          <string-name>
            <given-names>U.</given-names>
            <surname>Manera</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Dagliati</surname>
          </string-name>
          , G. Birolo,
          <string-name>
            <given-names>H.</given-names>
            <surname>Aidos</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B. Di</given-names>
            <surname>Camillo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Ferro</surname>
          </string-name>
          ,
          <source>Overview of iDPP@CLEF</source>
          <year>2022</year>
          :
          <article-title>The Intelligent Disease Progression Prediction Challenge</article-title>
          , in: G. Faggioli,
          <string-name>
            <given-names>N.</given-names>
            <surname>Ferro</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Hanbury</surname>
          </string-name>
          , M. Potthast (Eds.),
          <source>CLEF 2022 Working Notes, CEUR Workshop Proceedings (CEUR-WS.org)</source>
          ,
          <source>ISSN 1613-0073</source>
          ,
          <year>2022</year>
          , pp.
          <fpage>1130</fpage>
          -
          <lpage>1210</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref6">
        <mixed-citation>
          [6]
          <string-name>
            <given-names>J. F.</given-names>
            <surname>Kurtzke</surname>
          </string-name>
          , Rating Neurologic Impairment in Multiple Sclerosis,
          <source>Neurology</source>
          <volume>33</volume>
          (
          <year>1983</year>
          )
          <fpage>1444</fpage>
          -
          <lpage>1444</lpage>
          . URL: https://n.neurology.org/content/33/11/1444. doi:
          <volume>10</volume>
          .1212/WNL.33.11.1444. arXiv:https://n.neurology.org/content/33/11/1444.full.pdf.
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>