<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Predicting the Functional Rating Scale and Self-Assessment Status of ALS Patients with Sensor Data Notebook for the iDPP@CLEF Lab at CLEF 2024</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Andreia</forename><forename type="middle">S</forename><surname>Martins</surname></persName>
							<email>asmartins@ciencias.ulisboa.pt</email>
							<affiliation key="aff0">
								<orgName type="department">Faculdade de Ciências</orgName>
								<orgName type="laboratory">LASIGE</orgName>
								<orgName type="institution">Universidade de Lisboa</orgName>
								<address>
									<country key="PT">Portugal</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Daniela</forename><forename type="middle">M</forename><surname>Amaral</surname></persName>
							<email>daniela.amaral@tecnico.ulisboa.pt</email>
							<affiliation key="aff0">
								<orgName type="department">Faculdade de Ciências</orgName>
								<orgName type="laboratory">LASIGE</orgName>
								<orgName type="institution">Universidade de Lisboa</orgName>
								<address>
									<country key="PT">Portugal</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Eduardo</forename><forename type="middle">N</forename><surname>Castanho</surname></persName>
							<email>ejcastanho@ciencias.ulisboa.pt</email>
							<affiliation key="aff0">
								<orgName type="department">Faculdade de Ciências</orgName>
								<orgName type="laboratory">LASIGE</orgName>
								<orgName type="institution">Universidade de Lisboa</orgName>
								<address>
									<country key="PT">Portugal</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Diogo</forename><forename type="middle">F</forename><surname>Soares</surname></persName>
							<email>dfsoares@ciencias.ulisboa.pt</email>
							<affiliation key="aff0">
								<orgName type="department">Faculdade de Ciências</orgName>
								<orgName type="laboratory">LASIGE</orgName>
								<orgName type="institution">Universidade de Lisboa</orgName>
								<address>
									<country key="PT">Portugal</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Ruben</forename><surname>Branco</surname></persName>
							<email>rmbranco@ciencias.ulisboa.pt</email>
							<affiliation key="aff0">
								<orgName type="department">Faculdade de Ciências</orgName>
								<orgName type="laboratory">LASIGE</orgName>
								<orgName type="institution">Universidade de Lisboa</orgName>
								<address>
									<country key="PT">Portugal</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Sara</forename><forename type="middle">C</forename><surname>Madeira</surname></persName>
							<email>sacmadeira@ciencias.ulisboa.pt</email>
							<affiliation key="aff0">
								<orgName type="department">Faculdade de Ciências</orgName>
								<orgName type="laboratory">LASIGE</orgName>
								<orgName type="institution">Universidade de Lisboa</orgName>
								<address>
									<country key="PT">Portugal</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Helena</forename><surname>Aidos</surname></persName>
							<email>haidos@ciencias.ulisboa.pt</email>
							<affiliation key="aff0">
								<orgName type="department">Faculdade de Ciências</orgName>
								<orgName type="laboratory">LASIGE</orgName>
								<orgName type="institution">Universidade de Lisboa</orgName>
								<address>
									<country key="PT">Portugal</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Predicting the Functional Rating Scale and Self-Assessment Status of ALS Patients with Sensor Data Notebook for the iDPP@CLEF Lab at CLEF 2024</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">D5A30ADD81C578D9575F3D67B258A268</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T17:54+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Amyotrophic Lateral Sclerosis</term>
					<term>Prognostic Prediction</term>
					<term>Time Series Data</term>
					<term>Biclustering</term>
					<term>Multi-Class Classification</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Amyotrophic Lateral Sclerosis (ALS) is a neurodegenerative disease causing progressive loss of cognitive and motor functions. Due to limited understanding of its mechanisms, there is no cure. Prognosis is still crucial for the effective planning of symptom treatment, however, the heterogeneity in patient progression drives the need for precision medicine research. iDPP CLEF 2024 aims to develop novel methodologies for predicting ALS disease progression, enabling the community to combine efforts and improve current prognostic methods. This report discusses our participation in tasks 1 and 2, evaluating the impact of sensor data on improving the prediction of ALSFRS-R scores. The proposed methodology combines temporal summarization techniques (extracting relevant statistics from the sensors), feature selection and extraction methods, and state-of-the-art classifiers for each ALSFRS-R question independently. Results show that random forest models yield the best overall performance, and selecting the k-best features and biclustering were the best overall feature selection and extraction strategies for tasks 1 and 2, respectively.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>Amyotrophic Lateral Sclerosis (ALS) is a devastating neurodegenerative disease characterized by the progressive degeneration of motor neurons, leading to muscle weakness, atrophy, and eventual paralysis <ref type="bibr" target="#b0">[1]</ref>. The progression of ALS varies significantly among patients, with some experiencing rapid deterioration while others decline more slowly <ref type="bibr" target="#b1">[2]</ref>. This variability complicates the ability to predict disease trajectory, making it challenging for clinicians to offer accurate prognoses and for patients to make informed decisions about their future care <ref type="bibr" target="#b2">[3]</ref>.</p><p>Traditionally, clinical assessments of ALS progression rely on periodic evaluations using scales like the ALS Functional Rating Scale-Revised (ALSFRS-R) <ref type="bibr" target="#b3">[4]</ref>. Although essential, these assessments provide only snapshots of a patient's condition at discrete time points and can miss subtle but critical changes between visits. This intermittent data collection limits the ability to detect early signs of disease worsening and delays the implementation of necessary interventions.</p><p>Recent advancements in sensor technology present a promising solution to these limitations. Sensors can generate a rich, real-time dataset by continuously monitoring physiological parameters such as muscle activity, respiratory function, and movement patterns <ref type="bibr" target="#b4">[5]</ref>. This continuous data capture offers a detailed and dynamic view of a patient's condition, potentially revealing early indicators of disease progression that would otherwise go unnoticed between clinical visits <ref type="bibr" target="#b5">[6]</ref>.</p><p>However, to fully understand and predict ALS progression, it is essential to complement sensor data with patients' self-assessment data <ref type="bibr" target="#b6">[7]</ref>. Self-assessments provide critical insights into subjective symptoms such as pain, fatigue, and emotional well-being, which are not easily quantifiable through sensors alone. Integrating objective sensor data with subjective self-assessment data creates a comprehensive, multidimensional dataset encompassing measurable physical changes and the patient's lived disease experience <ref type="bibr" target="#b7">[8]</ref>.</p><p>In this context, within the iDPP CLEF 2024 challenge<ref type="foot" target="#foot_0">1</ref> framework, we tackled Tasks 1 and 2, which target predicting the twelve scores of the ALSFRS-R from sensor data. Task 1 aims to predict the score assigned by the clinician at the second visit, while Task 2 targets the second patient's selfassessment score. This paper reports the work done to overcome this challenge. We approach this challenge as a multi-label, multi-class classification approach with high-dimensional data. To handle the longitudinal datasets, we consider a double-step approach that transforms the time series sensor data using statistics computed from a time window period. Additionally, we test two feature selection strategies (K-Best features in all sensors and K-Best features in each sensor) and one feature extraction strategy (Biclustering-based features). To classify the ALSFRS-R scores, we train several state-of-the-art classifiers for each question independently.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Related Work</head><p>Sensor technology has gained significant traction in recent years for monitoring ALS patients. Wearable sensors, such as accelerometers and gyroscopes, have continuously monitored motor function, gait, and other physical activities <ref type="bibr" target="#b4">[5,</ref><ref type="bibr" target="#b8">9,</ref><ref type="bibr" target="#b9">10,</ref><ref type="bibr" target="#b10">11]</ref>. Accelerometer studies demonstrated their effectiveness in capturing detailed movement patterns, providing valuable data for assessing motor decline in ALS patients <ref type="bibr" target="#b5">[6,</ref><ref type="bibr" target="#b9">10,</ref><ref type="bibr" target="#b8">9]</ref>. Vieira et al. <ref type="bibr" target="#b11">[12]</ref> developed a model targeting ALS progression prediction based on voice samples and accelerometer measurements from a four-year longitudinal dataset. This model was used to predict bulbar-related and limb-related ALSFRS-R scores. Straczkiewicz et al. <ref type="bibr" target="#b10">[11]</ref> used wrist wearables and ALSFRS-R self-entries data to propose new measures to quantify the count and duration of upper limb movements.</p><p>In addition to sensor data, integrating patients' self-assessment data has proven beneficial in understanding ALS progression. Studies have shown that self-reported pain, fatigue, and quality of life measures can provide critical insights that complement objective sensor data <ref type="bibr" target="#b12">[13,</ref><ref type="bibr" target="#b6">7]</ref>.</p><p>Machine learning techniques have been increasingly applied to predict disease progression in ALS <ref type="bibr" target="#b13">[14]</ref>. Predicting the progression of the functional domains (twelve questions) assessed by the well-known functional scale, the ALSFRS-R was also investigated by Gordon and Lerner <ref type="bibr" target="#b14">[15]</ref>. They modeled a multiclass classifier using demographic, respiratory assessments, genetic data, and other dynamic data to predict the values of each ALSFRS-R question at the time of the last patient visit.</p><p>Subspace techniques, such as pattern mining, biclustering, and triclustering, discover local patterns with non-constant coherencies with potential for predictive tasks. Martins et al. <ref type="bibr" target="#b15">[16]</ref> recently proposed combining itemset mining with sequential pattern mining to uncover disease presentation and progression patterns in ALS patients and utilize these patterns to forecast the need for NIV. In a similar approach with the same prognostic target, Matos et al. <ref type="bibr" target="#b16">[17]</ref> suggested a classifier based on biclustering. Biclustering <ref type="bibr" target="#b17">[18,</ref><ref type="bibr" target="#b18">19]</ref> was used to locate groups of patients with similar values in subsets of clinical features (biclusters), which were then combined with static data as features. Although promising, none of these methods considered the temporal relationship of features. Soares et al. <ref type="bibr" target="#b19">[20]</ref> proposed BicTric, a classifier capable of learning predictive models from both static and temporal data using discriminative patterns obtained through biclustering and triclustering <ref type="bibr" target="#b20">[21,</ref><ref type="bibr" target="#b21">22,</ref><ref type="bibr" target="#b22">23]</ref>. Recently, Soares et al. <ref type="bibr" target="#b23">[24]</ref> enhanced BicTric with TCtriCluster, a triclustering algorithm incorporating temporal contiguity constraints. These approaches utilized temporal preprocessing with snapshots and the time windows method proposed by Carreiro et al. <ref type="bibr" target="#b24">[25]</ref> to learn predictive models for various clinically relevant ALS endpoints.</p><p>Integrating multi-modal data sources, including sensor data, self-assessments, and traditional clinical metrics, has shown potential in providing a more comprehensive understanding of ALS progression. Johnson et al. <ref type="bibr" target="#b7">[8]</ref> conducted a study combining wearable sensor data with patient-reported outcomes and clinical assessments, demonstrating that multi-modal data fusion could enhance predictive accuracy and offer deeper insights into disease dynamics.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Methodology</head><p>The objective of Tasks 1 and 2 of the iDPP CLEF 2024 challenge is to predict the values of the ALSFRS-R sub-scores of a second evaluation, given the values of the first evaluation. This would imply a reduced set of training instances (52 patients, in total), so we decided to generalize the challenge to predict the ALSFRS-R sub-scores of any evaluation given a previous evaluation, resulting in 121 training instances for Task 1 and 220 instances for Task 2.</p><p>The dataset made available <ref type="bibr" target="#b25">[26,</ref><ref type="bibr" target="#b26">27]</ref> with this challenge contains information on ALS patients comprising the following data: static (including demographic and clinical information), all the ALSFRS-R evaluations (comprising the scores of the 12 questions for each patient), and sensor data (collected from the sensors of a fitness smartwatch). Figure <ref type="figure" target="#fig_0">1</ref> illustrates the processing of the dataset. Tasks 1 and 2 face a significant hurdle due to the sensor dataset's high dimensionality, stemming from a large number of sensor features (90 in total) and the numerous time points (approximately 268 sensor records per patient). To address this issue, we used a two-step processing of the dataset: first, we extracted temporal statistics from the longitudinal datasets. Second, we used feature selection or extraction techniques to obtain a representation of the sensor statistics with smaller dimensionality.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.">Time Series Statistics</head><p>We derived new features from the longitudinal sensor data for each evaluation using summarization techniques, consisting of statistical metrics such as mean, standard deviation, minimum and maximum values, and the first and last values of each feature (as in Branco et al. <ref type="bibr" target="#b27">[28]</ref>). To avoid the bias introduced by considering the entire sensor data history, these metrics were computed within fixed time intervals, specifically considering the interval [𝑡 − 𝛿, 𝑡], where 𝑡 represents the day of the target appointment and 𝛿 is the number of days within the interval (set to 15 days for Task 1 and 7 days for Task 2). This computation resulted in 540 new sensor features (90 original sensor features × 6 statistical metrics).</p><p>Another issue encountered with the dataset was missing values, even after the aforementioned computations. To address this, various interpolation and imputation techniques were explored, with polynomial interpolation of degree 5 proving to be the most effective in minimizing variance decrease across the feature sets.</p><p>After the interpolation step, sensor features exhibiting zero or near-zero variance (less than 10 −5 ) were deemed uninformative and consequently removed. Furthermore, highly correlated sensor features within the same category (calories, steps, beat_to_beat, heart_rate, respiration, and SpO 2 ) were also eliminated to mitigate redundancy. The selection of features for removal was based on Pearson correlation, with a correlation threshold set at 0.95 (see Table <ref type="table" target="#tab_0">1</ref>).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.">Feature Selection and Extraction Techniques</head><p>The sensor statistics obtained from the previously discussed step are still high dimensional, as there are 340 features for Task 1 and 352 features for Task 2. Subsequently, we applied three techniques (two feature selection (i) and (ii), and one feature extraction (iii)) to reduce the dataset dimensionality:</p><formula xml:id="formula_0">(i) K-Best features in all sensors; (ii) K-Best features in each target; (iii) Biclustering-based features.</formula><p>The first two feature selection techniques are based on a k-best selection strategy. First, we selected the top 5 features for predicting each target question based on ANOVA F-value between labels and features. Predictions were then made using the set of highest-ranked sensor statistical features across all questions (All Sensors). Alternatively, a specialized prediction approach was also adopted wherein the top 5 features were selected independently for each ALSFRS-R question based on mutual information (Each Target) (see Table <ref type="table">2</ref>). These selections were made using the SelectKBest class of the sklearn.feature_selection Python module.</p><p>As an alternative to these aforementioned feature selection strategies, we used a feature extraction strategy based on biclustering to reduce the dataset dimensionality. Biclustering, the simultaneous clustering of rows and columns of a data matrix, has shown its ability to discover local patterns with non-constant coherencies in both descriptive and predictive learning tasks <ref type="bibr" target="#b20">[21,</ref><ref type="bibr" target="#b17">18]</ref>. Our approach, illustrated in Figure <ref type="figure">2</ref>, applies biclustering to the Patient×Sensor Feature training matrix to obtain the</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 2</head><p>Number of selected top-ranked features, by category. Predictions were made using the pairs of strategy-models of highest-ranked computed sensor features, based on the ANOVA F-value, across all questions (All Sensors). Additionally, a specialized prediction method was employed, wherein the top 5 features were independently selected for each ALSFRS-R question based on mutual information (Each Target). </p><formula xml:id="formula_1">Q1 3 0 1 1 0 0 1 0 0 0 1 3 Q2 0 0 0 3 0 2 4 0 1 0 0 0 Q3 5 0 0 0 0 0 5 0 0 0 0 0 Q4 0 0 4 0 1 0 0 0 3 0 0 2 Q5 0 0 5 0 0 0 0 0 5 0 0 0 Each Target Q6 0 3 2 0 0 0 0 0 5 0 0 0 Q7 0 4 1 0 0 0 0 0 5 0 0 0 Q8 0 5 0 0 0 0 0 4 1 0 0 0 Q9 0 5 0 0 0 0 0 4 1 0 0 0 Q10 0 0 2 1 2 0 1 0 3 0 1 0 Q11 1 0 3 0 0 1 0 0 4 1 0 0 Q12 0 0 5 0 0 0 0 0 0 0 5 0</formula><p>biclusters, with the row pattern of each bicluster being computed as the mean value of each column.</p><p>Then, the Euclidean distance between each training (and test) sample and the row pattern of each bicluster is computed to obtain a reduced representation of the training (and test) set. We used an approach based on biclustering-computed features. First, we apply a biclustering algorithm to obtain a set of biclusters (sub-matrices) from the dataset. Second, we compute its row pattern for each bicluster. Finally, we compute the distance between each row of the dataset and each bicluster to obtain the new reduced dataset. To simplify the representation of this methodology, we illustrate the pattern of a bicluster by the mode of each column (instead of the mean value) and use the Manhattan distance between each row and bicluster instead of the Euclidean distance.</p><p>We considered Spectral Biclustering to mine the biclusters as implemented in scikit-learn <ref type="bibr" target="#b28">[29,</ref><ref type="bibr" target="#b29">30]</ref>. The number of biclusters influences the number of features in the reduced dataset. In our approach, we tested values for the number of biclusters and selected the value that maximizes the number of non-trivial biclusters (biclusters with more than 2 rows and columns).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3.">Modeling and Hyperparameter Optimization</head><p>In this section, we discuss our classification methodology, as illustrated in Figure <ref type="figure" target="#fig_3">3</ref>.  The tasks at hand are multi-label, multi-class tasks, which add complexity to the standard modeling techniques. Furthering the difficulty, the labels, which are the ALSFRS-R questions, are not completely independent, as the sub-scores are correlated within the different domains (bulbar, fine motor / upper limb, gross motor /lower limb, and respiratory).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Train Split</head><note type="other">Target</note><p>Despite this intricacy, we decided to simplify the task by separating them into independent multi-class problems, where a given patient ALSFRS-R evaluation and their sensor data are used to predict each sub-score individually. Despite not modeling the correlation between questions, we assume that the models could still connect a patient's condition in time with their ability to perform a single function. We train 12 models and combine their predictions to predict the full set of sub-scores.</p><p>We consider a set of well-known classifiers covering a diverse range of model types, using scikitlearn <ref type="bibr" target="#b28">[29]</ref>: Logistic Regression (LR), Random Forest (RF), XGBoost, and Support Vector Machines (SVC). Each model undergoes a model-appropriate pre-processing if required, and the optimal hyperparameters are searched for, as will be described later on.</p><p>For questions that have a sufficient representation across each of the scale values (0 to 4), <ref type="foot" target="#foot_1">2</ref> we employ imblearn <ref type="bibr" target="#b30">[31]</ref>'s implementation of SMOTE <ref type="bibr" target="#b31">[32]</ref>, to alleviate the issue of small training sample size.</p><p>It is common to scale the input data for linear models to avoid widely different magnitudes across features that can hurt learning and performance. We use a standard scaler for Logistic Regression and Support Vector Machines to scale the input data.</p><p>We optimize the models using the Mean Absolute Error metric, both as a loss function for the model optimization and as a hyperparameter optimization objective, which searches for the best hyperparameter optimization that yields better performance on the validation set. We use Optuna <ref type="bibr" target="#b32">[33]</ref> for hyperparameter optimization, with the Tree-Structured Parzen Estimator algorithm (as a sampler), avoiding a grid search brute-force approach to more efficiently sweep the hyperparameter space (see Table <ref type="table" target="#tab_2">3</ref> for hyperparameter range of each model). The best-performing model is then used for the submissions in the challenge.</p><p>To assess the generalization of trained models and to optimize hyperparameters, we split the provided dataset into two sets: a train set and a validation set. As the dataset is multi-label multi-class, regular kernel CategoricalDistribution( ["linear", "rbf", "poly", "sigmoid"]) stratified train test splits do not guarantee a representative proportion of each scale value for each question for both splits. We resort to a variant termed iterative stratified train test splitting <ref type="bibr" target="#b33">[34,</ref><ref type="bibr" target="#b34">35]</ref>, implemented in the scikit-multilearn package <ref type="bibr" target="#b35">[36]</ref>. This method works by iteratively populating both splits and assigning data points at each step to the split that requires them the most to maintain balance. Ultimately, we ensure each split is as similar to the overall dataset as possible. We split the provided training set following a 70/30 ratio, with 70% becoming the training set and 30% the validation set.</p><p>All the experiments were run on a Desktop Computer with an AMD Ryzen 9 7950X 16-Core with 64GB of RAM and Ubuntu 22.04.2. The code was run using Python 3.10.11.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Results &amp; Discussion</head><p>In this section, we cover the results obtained in Tasks 1 and 2 in the challenge, as reported and computed with the private test set made available by the lab organizers.</p><p>To examine the impact of our design choices on feature selection or extraction, we define an experimental space beyond the basic analysis of the challenge results. First, for each question, we select the best pair feature selection or extraction strategy and classification with the top-k (we consider 𝑘 = {1, 2, 3}) highest validation metric values for both Mean Absolute Error (MAE) and Root Mean Squared Error (RMSE) (see section 4.1). Next, to determine which feature selection or extraction performs best, we consider the mean RMSE and MAE across the four classifiers for each question (see section 4.2). Lastly, we will assess whether there is a significant advantage in using one classifier over another. Given that the classifiers are all different types, identifying specific model properties suited for this particular task could lead to improvements for each question (see section 4.3).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Selecting the best combination of feature strategy and classification model</head><p>We conducted experiments to predict the ALSFRS-R questions of a subsequent assessment by combining the best models for each target question based on their validation set performance. Specifically, we submitted the three best-performing pairs for both Tasks (see Table <ref type="table" target="#tab_3">4</ref>).</p><p>Table <ref type="table">5</ref> presents the results of the models trained in each feature selection or extraction strategy for predicting each target question, along with the global results (average RMSE and MAE values across all questions). For both Tasks 1 and 2, the best-performing combination of feature selection or extraction strategy and classification model (strategy-model pair) in the test set was the second-best strategy-model pair in predicting the ALSFRS-R questions in the validation set. This suggests that the training and validation sets used for optimizing and validating the classifiers were unsuitable for predicting the ALSFRS-R questions in the second evaluation. These sets included all evaluations made available for the challenge, leading the models to be trained for predicting the next evaluation rather than specifically the second evaluation.</p><p>In Task 1, two questions related to the bulbar domain, Q1 and Q2, and one respiratory question, Q11, were the easiest to predict (RMSE 0.309, MAE 0.095). Specifically, Q1 and Q11 were best predicted using the XGBoost classifier with the All Sensors (Best strategy-model pair) and Each Target (2nd best strategy-model) feature selection strategies, respectively. Question Q2 was best predicted using the RF classifier with the All Sensors strategy (3rd best strategy-model pair). In contrast, motor-related questions, Q7 (trunk domain) and Q9 (lower limb domain) had the highest prediction errors (RMSE 0.873, MAE 0.476).</p><p>For Task 2, questions Q11 and Q12 were correctly classified for all the evaluations (RMSE 0.000 and MAE 0.000). Both the questions used the RF classifier and the Biclustering strategy (2nd best strategy-model and Best strategy-model pair, respectively). Question Q11 was also correctly classified for all evaluations using the Each Target strategy (3rd best strategy-model pair). Conversely, Q4 had the most misclassified evaluations (RMSE 1.044, MAE 0.545).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 5</head><p>Results of the submitted strategy-model pairs. Root Mean Squared Error (RMSE) and Mean Absolute Error (MAE) metrics for the three best strategy-model pairs presented in Table <ref type="table" target="#tab_3">4</ref>. The performance metrics are provided for each target question and averaged across all the 12 questions (Global). </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Model</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Feature Selection and Extraction Comparison</head><p>As previously mentioned, one feature extraction and two feature selection strategies were assessed: biclustering and K-Best selection, both globally for all questions (All Sensors) and individually for each question (Each Target). Table <ref type="table" target="#tab_5">6</ref> presents the average model performance in the test set for each ALSFRS-R question and feature selection or extraction method. Overall, no strategy clearly outperformed the others, with the metrics typically not differing much between models with the same target question. However, the preferred strategy does change with the target.</p><p>In Task 1, the best overall method was individual k-best selection, Each Target (RMSE 0.780, MAE 0.474). It gathered the best average metrics in 6 out of the 12 questions, followed by the biclustering approach (RMSE 0.815, MAE 0.515) with 4 questions. Notably, there may be a preferred strategy by domain: the All Sensors approach performed best in the trunk domain questions (Q6 and Q7), and Each Target yielded the best metrics in the lower limb domain (Q8 and Q9). However, this behavior does not seem to occur for the upper limb domain (Q4 and Q5). For the bulbar (Q1-Q3) and respiratory (Q10-Q12) areas, the Biclustering and Each Target approaches achieved the best performance in two of the three targets. The best average performance was obtained for Q11 (RMSE 0.361, MAE 0.131) and the worst for Q6 (RMSE 0.909, MAE 0.667) and Q9 (RMSE 0.934, MAE 0.560).</p><p>For Task 2, the best overall strategy was feature transformation through Biclustering (RMSE 0.805, MAE 0.483), with the best average metrics in 8 out of 12 targets. Compared to Task 1, there is more overlap in the outcome of the three strategies, and as such, the second best method (Each Target; RMSE 0.836, MAE 0.507) had the best average metrics in 5 questions. Also, unlike Task 1, there is no preferred strategy by domain, save for the respiratory questions (Q10-Q12) that are most easily predicted by biclustering-based models. The best average performance was attained in Q12 (RMSE 0.419, MAE 0.318) and the worst in Q10 (RMSE 1.191, MAE 0.818).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.">Model Comparison</head><p>We conducted experiments to predict the ALSFRS-R questions in the second evaluation using four machine-learning classifiers -Logistic Regression (LR), Random Forests (RF), Support Vector Machine (SVC), and XGBoost (XGB). We optimized their hyperparameters and validated their performance on a validation set derived from the provided training set as described in section 3.3. In addition to these classifiers, we also submitted two naïve approaches: Last Observation Carried Forward (LOCF) and Majority Class. Table <ref type="table" target="#tab_6">7</ref> present the performance results for each model in predicting each target question, along with the overall results (average RMSE and MAE values across all questions). Notably, the LOCF approach performed the best for both tasks, indicating minimal variability between the ALSFRS-R scores of the first and second evaluations. On the other hand, the Majority Class approach was the worst performer, with RMSE values of 1.092 for Task 1 and 1.471 for Task 2. A potential reason for the classifiers' overall poor performance is that they were trained to predict the next score rather than specifically the second score, making the models too general for this particular task.</p><p>Regarding Task 1, questions Q3 (bulbar domain) and Q10 (respiratory domain) had the lowest prediction error using the LOCF approach (RMSE 0.218, MAE 0.048). Conversely, question Q9 (lower limb domain) predictions were the poorest, with the best classifier being RF (RMSE 0.873, MAE 0.381).</p><p>For Task 2, the conclusions are similar to those of Section 4.1. Questions regarding the respiratory domain, Q11, and Q12, were correctly predicted for all the evaluations. Particularly, the LOCF approach correctly predicted all the scores of question Q11, and the LR and RF classifiers accurately predicted all scores for question Q12 (RMSE 0.000, MAE 0.000). The most misclassified question was Q4 (upper limb domain), with an RMSE of 1.044 and MAE of 0.545 using the best-performing model (LOCF).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Conclusion</head><p>In a fast-acting and debilitating disease like ALS, the ability to predict how it evolves can be critical for clinical decision-making and life-prolonging therapy administration. Thus, the collection of sensor data can be a valuable resource for improving prognosis prediction, as it provides continuous monitoring of the patient's physiological status. This information can complement the periodic clinical assessments and possibly hint at the imminent occurrence of critical events, such as needing ventilation support. Machine learning techniques allow for meaningful insight to be extracted from these large datasets, which can potentially improve the performance of current prognosis prediction approaches or lead to the development of new ones. In the iDPP CLEF 2024 challenge, the main goal was to predict the ALSFRS-R scores (both clinical and self-assessed) of a patient's second assessment, given the first assessment and the sensor records between evaluations.</p><p>Our methodology consisted of independent multi-class models, each predicting an ALSFRS-R question. Four classification models were tested: Logistic Regression, Random Forest, XGBoost, and Support Vector Machine. The sensor data was handled first by deriving static features from the longitudinal ones using summarization techniques, i.e., by calculating summary statistics within an observation window before the target date. Then, the feature set was reduced using three methods: K-Best selection across all questions, K-Best selection by question, and biclustering. These models were also compared to baseline approaches Last Observation Carried Forward (LOCF) and Majority Class.</p><p>In both tasks, Random Forest yielded the best overall results but did not outperform LOCF, save for a few individual questions. Additionally, there was no consensus regarding the best feature selection or extraction approach. Independent K-Best selection and Biclustering were the best overall methods in tasks 1 and 2, respectively. However, further research is needed to capture the temporal patterns of sensors to fully understand their potential in tracking disease progression as measured by ALSFRS-R scores.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure1: Data processing pipeline. Addressing the challenge implies handling data from three sources: static clinical variables, ALSFRS-R scores, and sensor data. To handle the highly dimensional sensor time series data, we computed statistics for each sensor and then applied feature selection or extraction strategies to reduce the dimensionality of the sensor dataset. The final dataset (that feeds the classifiers) aggregates these data sources.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head></head><label></label><figDesc>Figure 2:We used an approach based on biclustering-computed features. First, we apply a biclustering algorithm to obtain a set of biclusters (sub-matrices) from the dataset. Second, we compute its row pattern for each bicluster. Finally, we compute the distance between each row of the dataset and each bicluster to obtain the new reduced dataset. To simplify the representation of this methodology, we illustrate the pattern of a bicluster by the mode of each column (instead of the mean value) and use the Manhattan distance between each row and bicluster instead of the Euclidean distance.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_3"><head>Figure 3 :</head><label>3</label><figDesc>Figure 3: The challenge implies a multi-label, multi-class tasks. To simplify the training, we train classifiers for each question independently. We use SMOTE to compensate for a lack of sufficient representation across each scale value when possible. We train several traditional classifiers for each question, optimized considering the mean absolute error.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>Number of excluded sensor features, by category. The sensor data can be grouped into 6 distinct categories (Category). For each original sensor feature within these categories, 6 statistical metrics -mean, standard deviation, minimum value, maximum value, first value, and last value -were computed (#Computed Features). Features exhibiting zero or near-zero variance (#Low Variance) and those highly correlated with other features within the same category (#High Correlation) were removed from the dataset.</figDesc><table><row><cell>Task 1</cell><cell>Task 2</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 3</head><label>3</label><figDesc>The hyperparameter space for each model. Int and Float Distributions describe a search space between two integers or floating values, whereas CategoricalDistribution specifies a set of discrete values.</figDesc><table><row><cell>Model</cell><cell>Hyperparameter</cell><cell>Distribution Space</cell></row><row><cell></cell><cell>n_estimators</cell><cell>IntDistribution(100, 1000)</cell></row><row><cell>XGBoost Classifier</cell><cell>max_depth</cell><cell>IntDistribution(1, 20)</cell></row><row><cell></cell><cell>learning_rate</cell><cell>FloatDistribution(0.01, 1)</cell></row><row><cell>Random Forest</cell><cell>n_estimators max_depth</cell><cell>IntDistribution(10, 1000) IntDistribution(1, 20)</cell></row><row><cell>Logistic Regression(max_iter=100000)</cell><cell>C</cell><cell>FloatDistribution(0.01, 10)</cell></row><row><cell></cell><cell>C</cell><cell>FloatDistribution(0.01, 10)</cell></row><row><cell>SVC(max_iter=100000, cache_size=1000)</cell><cell>gamma</cell><cell>FloatDistribution(0.01, 10)</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Table 4</head><label>4</label><figDesc>Results on the validation set for each combination of feature selection or extraction strategy and classification model. RF stands for Random Forest, SVC for Support Vector Machine Classifier, and LR for Logistic Regression.</figDesc><table><row><cell></cell><cell></cell><cell cols="2">Best pair</cell><cell cols="2">2nd best pair</cell><cell cols="2">3rd best pair</cell></row><row><cell></cell><cell>Question</cell><cell>Strategy</cell><cell>Model</cell><cell>Strategy</cell><cell>Model</cell><cell>Strategy</cell><cell>Model</cell></row><row><cell></cell><cell>Q1</cell><cell cols="2">All Sensors XGBoost</cell><cell cols="2">Each Target XGBoost</cell><cell>Each Target</cell><cell>RF</cell></row><row><cell></cell><cell>Q2</cell><cell>Each Target</cell><cell>RF</cell><cell>Biclustering</cell><cell>RF</cell><cell>All Sensors</cell><cell>RF</cell></row><row><cell></cell><cell>Q3</cell><cell>Biclustering</cell><cell>RF</cell><cell cols="2">Biclustering XGBoost</cell><cell>All Sensors</cell><cell>RF</cell></row><row><cell></cell><cell>Q4</cell><cell>Each Target</cell><cell>SVC</cell><cell>All Sensors</cell><cell>RF</cell><cell>All Sensors</cell><cell>SVC</cell></row><row><cell></cell><cell>Q5</cell><cell>Each Target</cell><cell>RF</cell><cell cols="2">All Sensors XGBoost</cell><cell>Each Target</cell><cell>SVC</cell></row><row><cell>Task 1</cell><cell>Q6 Q7</cell><cell cols="2">Biclustering XGBoost Each Target SVC</cell><cell>Biclustering Each Target</cell><cell>RF RF</cell><cell cols="2">All Sensors XGBoost All Sensors XGBoost</cell></row><row><cell></cell><cell>Q8</cell><cell>All Sensors</cell><cell>SVC</cell><cell>All Sensors</cell><cell>LR</cell><cell cols="2">All Sensors XGBoost</cell></row><row><cell></cell><cell>Q9</cell><cell cols="2">Biclustering XGBoost</cell><cell>Biclustering</cell><cell>RF</cell><cell>All Sensors</cell><cell>RF</cell></row><row><cell></cell><cell>Q10</cell><cell>All Sensors</cell><cell>SVC</cell><cell>Biclustering</cell><cell>SVC</cell><cell>Each Target</cell><cell>LR</cell></row><row><cell></cell><cell>Q11</cell><cell cols="2">Biclustering XGBoost</cell><cell cols="2">Each Target XGBoost</cell><cell>Biclustering</cell><cell>RF</cell></row><row><cell></cell><cell>Q12</cell><cell>All Sensors</cell><cell>RF</cell><cell>Biclustering</cell><cell>RF</cell><cell>All Sensors</cell><cell>LR</cell></row><row><cell></cell><cell>Q1</cell><cell>All Sensors</cell><cell>SVC</cell><cell>Biclustering</cell><cell>RF</cell><cell>Each Target</cell><cell>RF</cell></row><row><cell></cell><cell>Q2</cell><cell cols="2">Each Target XGBoost</cell><cell>Biclustering</cell><cell>SVC</cell><cell>Each Target</cell><cell>LR</cell></row><row><cell></cell><cell>Q3</cell><cell cols="2">Biclustering XGBoost</cell><cell>Biclustering</cell><cell>SVC</cell><cell cols="2">All Sensors XGBoost</cell></row><row><cell></cell><cell>Q4</cell><cell cols="2">Each Target XGBoost</cell><cell cols="2">All Sensors XGBoost</cell><cell>All Sensors</cell><cell>RF</cell></row><row><cell></cell><cell>Q5</cell><cell>All Sensors</cell><cell>RF</cell><cell>Biclustering</cell><cell>RF</cell><cell>Each Target</cell><cell>RF</cell></row><row><cell>Task 2</cell><cell>Q6 Q7</cell><cell cols="2">All Sensors XGBoost All Sensors RF</cell><cell>Each Target Biclustering</cell><cell>RF RF</cell><cell cols="2">All Sensors Each Target XGBoost RF</cell></row><row><cell></cell><cell>Q8</cell><cell>Biclustering</cell><cell>RF</cell><cell cols="2">Each Target XGBoost</cell><cell>Each Target</cell><cell>RF</cell></row><row><cell></cell><cell>Q9</cell><cell cols="2">Biclustering XGBoost</cell><cell>Biclustering</cell><cell>RF</cell><cell>Biclustering</cell><cell>SVC</cell></row><row><cell></cell><cell>Q10</cell><cell>All Sensors</cell><cell>SVC</cell><cell>Each Target</cell><cell>RF</cell><cell cols="2">Each Target XGBoost</cell></row><row><cell></cell><cell>Q11</cell><cell cols="2">All Sensors XGBoost</cell><cell>Biclustering</cell><cell>RF</cell><cell>Each Target</cell><cell>RF</cell></row><row><cell></cell><cell>Q12</cell><cell>Biclustering</cell><cell>RF</cell><cell>All Sensors</cell><cell>SVC</cell><cell>Each Target</cell><cell>RF</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_5"><head>Table 6</head><label>6</label><figDesc>Model results' summary, by feature selection and extraction strategy. Presented Root Mean Squared Error (RMSE) and Mean Absolute Error (MAE) report to the average performance of the 4 tested classifiers (LR, RF, SVC, XGBoost), in the test set. The performance metrics are provided for each target question and averaged across all of the strategy's models (Global), with the best outcome in bold.</figDesc><table><row><cell></cell><cell>Strategy</cell><cell></cell><cell>Q1</cell><cell>Q2</cell><cell>Q3</cell><cell>Q4</cell><cell>Q5</cell><cell>Q6</cell><cell>Q7</cell><cell>Q8</cell><cell>Q9</cell><cell>Q10</cell><cell>Q11</cell><cell>Q12 Global</cell></row><row><cell></cell><cell cols="13">Biclustering RMSE 0.730 0.775 0.616 0.820 0.909 1.008 1.015 0.843 1.095 0.825 0.378 0.761</cell><cell>0.815</cell></row><row><cell></cell><cell></cell><cell cols="12">MAE 0.393 0.488 0.298 0.440 0.643 0.702 0.810 0.571 0.667 0.548 0.143 0.476</cell><cell>0.515</cell></row><row><cell>Task 1</cell><cell>All Sensors</cell><cell cols="12">RMSE 0.744 0.836 0.883 0.975 0.804 0.909 0.884 0.733 1.205 0.842 0.477 0.849 MAE 0.417 0.571 0.488 0.560 0.536 0.667 0.571 0.429 0.821 0.548 0.190 0.536</cell><cell>0.845 0.528</cell></row><row><cell></cell><cell cols="13">Each Target RMSE 0.826 0.813 0.548 0.906 0.765 0.959 0.948 0.595 0.934 1.004 0.361 0.703</cell><cell>0.780</cell></row><row><cell></cell><cell></cell><cell>MAE</cell><cell cols="11">0.464 0.536 0.262 0.488 0.488 0.714 0.679 0.310 0.560 0.631 0.131 0.429</cell><cell>0.474</cell></row><row><cell></cell><cell cols="13">Biclustering RMSE 0.738 0.910 0.726 1.115 1.028 0.892 0.574 0.698 0.452 1.191 0.914 0.419</cell><cell>0.805</cell></row><row><cell></cell><cell></cell><cell cols="12">MAE 0.432 0.523 0.364 0.659 0.614 0.705 0.341 0.500 0.227 0.818 0.295 0.318</cell><cell>0.483</cell></row><row><cell>Task 2</cell><cell>All Sensors</cell><cell cols="12">RMSE 0.820 0.799 0.749 1.217 1.310 1.034 0.811 0.797 0.689 1.388 1.383 0.603 MAE 0.545 0.477 0.432 0.727 0.977 0.864 0.554 0.500 0.386 0.977 0.568 0.364</cell><cell>0.967 0.614</cell></row><row><cell></cell><cell cols="13">Each Target RMSE 0.808 0.860 0.686 1.147 0.993 0.875 0.696 0.665 0.518 1.232 1.128 0.433</cell><cell>0.836</cell></row><row><cell></cell><cell></cell><cell>MAE</cell><cell cols="11">0.455 0.477 0.295 0.727 0.636 0.636 0.455 0.455 0.273 0.864 0.500 0.318</cell><cell>0.507</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_6"><head>Table 7</head><label>7</label><figDesc>Results of the models. Root Mean Squared Error (RMSE) and Mean Absolute Error (MAE) metrics of four ML classifiers and two naïve approaches across the 12 target questions. The classifiers include Logistic Regression (LR), Random Forest (RF), Support Vector Classifier (SVC), and XGBoost. The naïve approaches are the Last Observation Carried Forward (LOCF) and Majority Class. The performance metrics are provided for each target question and averaged across all the questions (Global).</figDesc><table><row><cell></cell><cell>Model</cell><cell></cell><cell>Q1</cell><cell>Q2</cell><cell>Q3</cell><cell>Q4</cell><cell>Q5</cell><cell>Q6</cell><cell>Q7</cell><cell>Q8</cell><cell>Q9</cell><cell>Q10</cell><cell>Q11</cell><cell>Q12 Global</cell></row><row><cell></cell><cell>LOCF</cell><cell cols="12">RMSE 0.488 0.309 0.218 0.690 0.535 0.577 0.488 0.535 0.951 0.218 0.309 0.577</cell><cell>0.491</cell></row><row><cell></cell><cell></cell><cell>MAE</cell><cell cols="11">0.143 0.095 0.048 0.286 0.286 0.333 0.238 0.190 0.429 0.048 0.095 0.238</cell><cell>0.202</cell></row><row><cell></cell><cell cols="7">Majority Class RMSE 1.512 0.976 1.512 1.254 1.113</cell><cell>1.34</cell><cell cols="3">1.327 1.175 1.690</cell><cell>0.309</cell><cell>0.378 0.724</cell><cell>1.092</cell></row><row><cell>Task 1</cell><cell>LR</cell><cell cols="12">MAE RMSE 1.000 0.756 0.756 0.900 0.787 1.000 1.024 0.873 0.926 0.857 0.476 0.762 0.814 0.762 0.905 0.810 0.810 1.238 0.0952 0.143 0.333 0.816 0.378 0.845</cell><cell>0.659 0.838</cell></row><row><cell></cell><cell></cell><cell>MAE</cell><cell cols="9">0.619 0.381 0.381 0.524 0.524 0.714 0.762 0.571 0.571</cell><cell>0.476</cell><cell>0.143 0.429</cell><cell>0.508</cell></row><row><cell></cell><cell>RF</cell><cell cols="11">RMSE 0.690 0.578 0.436 0.926 0.655 0.900 0.900 0.617 0.873 0.577</cell><cell>0.378 0.577</cell><cell>0.676</cell></row><row><cell></cell><cell></cell><cell>MAE</cell><cell cols="10">0.381 0.238 0.190 0.476 0.429 0.619 0.619 0.286 0.381 0.238</cell><cell>0.143 0.238</cell><cell>0.353</cell></row><row><cell></cell><cell>SVC</cell><cell cols="10">RMSE 0.976 0.787 0.617 0.900 1.000 1.234 1.113 0.655 1.291</cell><cell>0.756</cell><cell>0.378 0.951</cell><cell>0.888</cell></row><row><cell></cell><cell></cell><cell>MAE</cell><cell cols="9">0.571 0.429 0.286 0.524 0.619 0.857 0.857 0.429 0.905</cell><cell>0.381</cell><cell>0.143 0.524</cell><cell>0.544</cell></row><row><cell></cell><cell>XGBoost</cell><cell cols="10">RMSE 0.309 1.134 0.655 0.900 0.617 0.900 0.756 0.787 1.291</cell><cell>1.215</cell><cell>0.378 1.024</cell><cell>0.830</cell></row><row><cell></cell><cell></cell><cell cols="10">MAE 0.095 1.095 0.333 0.429 0.381 0.619 0.476 0.429 0.810</cell><cell>1.095</cell><cell>0.143 0.952</cell><cell>0.571</cell></row><row><cell></cell><cell>LOCF</cell><cell cols="12">RMSE 0.674 0.674 0.426 1.044 0.739 0.603 0.739 0.603 0.302 0.522 0.000 0.603</cell><cell>0.577</cell></row><row><cell></cell><cell></cell><cell cols="12">MAE 0.455 0.273 0.182 0.545 0.364 0.364 0.364 0.364 0.091 0.273 0.000 0.182</cell><cell>0.288</cell></row><row><cell></cell><cell cols="11">Majority Class RMSE 1.348 0.905 1.168 1.314 1.477 1.809 1.651 1.044 1.883</cell><cell>1.758</cell><cell>2.089 1.206</cell><cell>1.471</cell></row><row><cell>Task 2</cell><cell>LR</cell><cell cols="10">MAE RMSE 0.798 0.790 0.905 1.168 1.168 0.953 0.674 0.798 0.603 0.909 0.455 0.636 0.818 1.091 1.636 1.273 0.909 1.545</cell><cell>1.273 1.279</cell><cell>1.091 0.727 1.537 0.000</cell><cell>1.030 0.890</cell></row><row><cell></cell><cell></cell><cell>MAE</cell><cell cols="9">0.455 0.455 0.455 0.818 0.818 0.727 0.455 0.636 0.364</cell><cell>0.909</cell><cell>0.727 0.000</cell><cell>0.568</cell></row><row><cell></cell><cell>RF</cell><cell cols="11">RMSE 0.905 0.905 0.739 1.087 1.279 0.905 0.674 0.798 0.302 1.128</cell><cell>1.508 0.000</cell><cell>0.852</cell></row><row><cell></cell><cell></cell><cell>MAE</cell><cell cols="10">0.636 0.455 0.364 0.636 0.909 0.818 0.455 0.636 0.091 0.727</cell><cell>0.636 0.000</cell><cell>0.530</cell></row><row><cell></cell><cell>SVC</cell><cell cols="10">RMSE 0.905 1.000 0.739 1.128 1.624 1.279 0.853 0.674 0.603</cell><cell>1.414</cell><cell>1.279 0.674</cell><cell>1.014</cell></row><row><cell></cell><cell></cell><cell>MAE</cell><cell cols="9">0.636 0.636 0.364 0.727 1.364 1.091 0.545 0.455 0.364</cell><cell>1.091</cell><cell>0.545 0.273</cell><cell>0.674</cell></row><row><cell></cell><cell>XGBoost</cell><cell cols="11">RMSE 0.674 0.739 0.522 1.206 1.168 1.000 1.044 0.522 0.302 1.732</cell><cell>1.206 1.000</cell><cell>0.926</cell></row><row><cell></cell><cell></cell><cell cols="11">MAE 0.455 0.364 0.273 0.727 0.818 0.818 0.727 0.273 0.091 1.182</cell><cell>0.364 1.000</cell><cell>0.591</cell></row></table></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="1" xml:id="foot_0">http://brainteaser.dei.unipd.it/challenges/idpp2024/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_1">Two questions in each task did not qualify, which were questions 11 and 12 for Task 1, and</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_2">and 11 for Task 2.</note>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgments</head><p>This work was partially supported by Fundação para a Ciência e a Tecnologia (FCT) through project AIpALS ref. PTDC/CCI-CIF/4613/2020 (https://doi.org/10.54499/PTDC/CCI-CIF/4613/2020), LASIGE Research Unit, ref. UIDB/00408/2020 (https://doi.org/10.54499/UIDB/00408/2020) and ref. UIDP/00408/2020 (https://doi.org/10.54499/UIDP/00408/2020), and PhD Research Scholarships to RB (2022.10727.BD), DFS ref. 2020.05100.BD (https://doi.org/10.54499/2020.05100.BD) and ENC ref. 2021.07810.BD (https: //doi.org/10.54499/2021.07810.BD); and by BRAINTEASER project, which has received funding from the European Union's Horizon 2020 research and innovation program under grant agreement No. 101017598.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Amyotrophic lateral sclerosis</title>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">C</forename><surname>Wijesekera</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">Nigel</forename><surname>Leigh</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Orphanet journal of rare diseases</title>
		<imprint>
			<biblScope unit="volume">4</biblScope>
			<biblScope unit="page" from="1" to="22" />
			<date type="published" when="2009">2009</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Amyotrophic lateral sclerosis (ALS) and related motor neuron diseases: an overview</title>
		<author>
			<persName><forename type="first">J</forename><surname>Morris</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">The Neurodiagnostic Journal</title>
		<imprint>
			<biblScope unit="volume">55</biblScope>
			<biblScope unit="page" from="180" to="194" />
			<date type="published" when="2015">2015</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Unraveling the complexity of amyotrophic lateral sclerosis survival prediction</title>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">R</forename><surname>Pfohl</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">B</forename><surname>Kim</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">S</forename><surname>Coan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">S</forename><surname>Mitchell</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Frontiers in neuroinformatics</title>
		<imprint>
			<biblScope unit="volume">12</biblScope>
			<biblScope unit="page">36</biblScope>
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">The ALSFRS-R: a revised als functional rating scale that incorporates assessments of respiratory function</title>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">M</forename><surname>Cedarbaum</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Stambler</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Malta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Fuller</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Hilt</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Thurmond</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Nakanishi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">A S</forename><surname>Group</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of the neurological sciences</title>
		<imprint>
			<biblScope unit="volume">169</biblScope>
			<biblScope unit="page" from="13" to="21" />
			<date type="published" when="1999">1999</date>
		</imprint>
	</monogr>
	<note>complete listing of the BDNF Study Group</note>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">A systematic review of digital technology to evaluate motor function and disease progression in motor neuron disease</title>
		<author>
			<persName><forename type="first">E</forename><surname>Beswick</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Fawcett</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Hassan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Forbes</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Dakin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Newton</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Abrahams</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Carson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Chandran</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Perry</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Neurology</title>
		<imprint>
			<biblScope unit="volume">269</biblScope>
			<biblScope unit="page" from="6254" to="6268" />
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Accelerometry for remote monitoring of physical activity in amyotrophic lateral sclerosis: a longitudinal cohort study</title>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">P</forename><surname>Van Eijk</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">N</forename><surname>Bakers</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">M</forename><surname>Bunte</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">J</forename><surname>De Fockert</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">J</forename><surname>Eijkemans</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">H</forename><surname>Van Den</surname></persName>
		</author>
		<author>
			<persName><surname>Berg</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of neurology</title>
		<imprint>
			<biblScope unit="volume">266</biblScope>
			<biblScope unit="page" from="2387" to="2395" />
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Online assessment of als functional rating scale compares well to in-clinic evaluation: a prospective trial</title>
		<author>
			<persName><forename type="first">A</forename><surname>Maier</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Holm</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Wicks</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Steinfurth</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Linke</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Münch</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Meyer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Meyer</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Amyotrophic Lateral Sclerosis</title>
		<imprint>
			<biblScope unit="volume">13</biblScope>
			<biblScope unit="page" from="210" to="216" />
			<date type="published" when="2012">2012</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Wearable device and smartphone data quantify als progression and may provide novel outcome measures</title>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">A</forename><surname>Johnson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Karas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">M</forename><surname>Burke</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Straczkiewicz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><forename type="middle">A</forename><surname>Scheier</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">P</forename><surname>Clark</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Iwasaki</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Lahav</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">S</forename><surname>Iyer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J.-P</forename><surname>Onnela</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">NPJ Digital Medicine</title>
		<imprint>
			<biblScope unit="volume">6</biblScope>
			<biblScope unit="page">34</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Remote monitoring of amyotrophic lateral sclerosis using wearable sensors detects differences in disease progression and survival: a prospective cohort study</title>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">W</forename><surname>Van Unnik</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Meyjes</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">R J</forename><surname>Van Mantgem</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">H</forename><surname>Van Den</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">P</forename><surname>Berg</surname></persName>
		</author>
		<author>
			<persName><surname>Van Eijk</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Ebiomedicine</title>
		<imprint>
			<biblScope unit="volume">103</biblScope>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">At-home wearables and machine learning sensitively capture disease progression in amyotrophic lateral sclerosis</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">S</forename><surname>Gupta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Patel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Premasiri</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Vieira</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Nature Communications</title>
		<imprint>
			<biblScope unit="volume">14</biblScope>
			<biblScope unit="page">5080</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Upper limb movements as digital biomarkers in people with als</title>
		<author>
			<persName><forename type="first">M</forename><surname>Straczkiewicz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Karas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">A</forename><surname>Johnson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">M</forename><surname>Burke</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Scheier</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">B</forename><surname>Royse</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Calcagno</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Clark</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Iyer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">D</forename><surname>Berry</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">EBioMedicine</title>
		<imprint>
			<biblScope unit="volume">101</biblScope>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">A machine-learning based objective measure for als disease severity</title>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">G</forename><surname>Vieira</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Venugopalan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">S</forename><surname>Premasiri</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Mcnally</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Jansen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Mccloskey</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">P</forename><surname>Brenner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Perrin</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">NPJ digital medicine</title>
		<imprint>
			<biblScope unit="volume">5</biblScope>
			<biblScope unit="page">45</biblScope>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Improved als clinical trials through frequent at-home self-assessment: a proof of concept study</title>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">B</forename><surname>Rutkove</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Narayanaswami</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Berisha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Liss</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Hahn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Shelton</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Qi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Pandeya</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">M</forename><surname>Shefner</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Annals of Clinical and Translational Neurology</title>
		<imprint>
			<biblScope unit="volume">7</biblScope>
			<biblScope unit="page" from="1148" to="1157" />
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Artificial intelligence and statistical methods for stratification and prediction of progression in amyotrophic lateral sclerosis: A systematic review</title>
		<author>
			<persName><forename type="first">E</forename><surname>Tavazzi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Longato</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Vettoretti</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Aidos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Trescato</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Roversi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">S</forename><surname>Martins</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">N</forename><surname>Castanho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Branco</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">F</forename><surname>Soares</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Artificial Intelligence in Medicine</title>
		<imprint>
			<biblScope unit="page">102588</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">Insights into amyotrophic lateral sclerosis from a machine learning perspective</title>
		<author>
			<persName><forename type="first">J</forename><surname>Gordon</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Lerner</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Clinical Medicine</title>
		<imprint>
			<biblScope unit="volume">8</biblScope>
			<biblScope unit="page">1578</biblScope>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Learning prognostic models using diseaseprogression patterns: Predicting the need fornon-invasive ventilation in amyotrophic lateralsclerosis</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">S</forename><surname>Martins</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Gromicho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Pinto</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Carvalho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">C</forename><surname>Madeira</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE/ACM Transactions on Computational Biology and Bioinformatics</title>
		<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">Unravelling disease presentation patterns in als using biclustering for discriminative meta-features discovery</title>
		<author>
			<persName><forename type="first">J</forename><surname>Matos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Pires</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Aidos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Gromicho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Pinto</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Carvalho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">C</forename><surname>Madeira</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">International Work-Conference on Bioinformatics and Biomedical Engineering</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="517" to="528" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Biclustering algorithms for biological data analysis: a survey</title>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">C</forename><surname>Madeira</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">L</forename><surname>Oliveira</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE/ACM transactions on computational biology and bioinformatics</title>
		<imprint>
			<biblScope unit="volume">1</biblScope>
			<biblScope unit="page" from="24" to="45" />
			<date type="published" when="2004">2004</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">Biclustering fmri time series: a comparative study</title>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">N</forename><surname>Castanho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Aidos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">C</forename><surname>Madeira</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">BMC bioinformatics</title>
		<imprint>
			<biblScope unit="volume">23</biblScope>
			<biblScope unit="page">192</biblScope>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">Learning prognostic models using a mixture of biclustering and triclustering: Predicting the need for non-invasive ventilation in amyotrophic lateral sclerosis</title>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">F</forename><surname>Soares</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Henriques</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Gromicho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Carvalho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">C</forename><surname>Madeira</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Biomedical Informatics</title>
		<imprint>
			<biblScope unit="volume">134</biblScope>
			<biblScope unit="page">104172</biblScope>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">Flebic: Learning classifiers from high-dimensional biomedical data using discriminative biclusters with non-constant patterns</title>
		<author>
			<persName><forename type="first">R</forename><surname>Henriques</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">C</forename><surname>Madeira</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Pattern Recognition</title>
		<imprint>
			<biblScope unit="volume">115</biblScope>
			<biblScope unit="page">107900</biblScope>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<analytic>
		<title level="a" type="main">Triclustering algorithms for three-dimensional data analysis: a comprehensive survey</title>
		<author>
			<persName><forename type="first">R</forename><surname>Henriques</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">C</forename><surname>Madeira</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">ACM Computing Surveys (CSUR)</title>
		<imprint>
			<biblScope unit="volume">51</biblScope>
			<biblScope unit="page" from="1" to="43" />
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">Comprehensive assessment of triclustering algorithms for three-way temporal data analysis</title>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">F</forename><surname>Soares</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Henriques</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">C</forename><surname>Madeira</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Pattern Recognition</title>
		<imprint>
			<biblScope unit="page">110303</biblScope>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<analytic>
		<title level="a" type="main">Triclustering-based classification of longitudinal data for prognostic prediction: targeting relevant clinical endpoints in amyotrophic lateral sclerosis</title>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">F</forename><surname>Soares</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Henriques</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Gromicho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Carvalho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">C</forename><surname>Madeira</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Scientific Reports</title>
		<imprint>
			<biblScope unit="volume">13</biblScope>
			<biblScope unit="page">6182</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b24">
	<analytic>
		<title level="a" type="main">Prognostic models based on patient snapshots and time windows: Predicting disease progression to assisted ventilation in amyotrophic lateral sclerosis</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">V</forename><surname>Carreiro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">M</forename><surname>Amaral</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Pinto</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Tomás</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Carvalho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">C</forename><surname>Madeira</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of biomedical informatics</title>
		<imprint>
			<biblScope unit="volume">58</biblScope>
			<biblScope unit="page" from="133" to="144" />
			<date type="published" when="2015">2015</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b25">
	<analytic>
		<title level="a" type="main">Overview of iDPP@CLEF 2024: The Intelligent Disease Progression Prediction Challenge</title>
		<author>
			<persName><forename type="first">G</forename><surname>Birolo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Bosoni</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Aidos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Bergamaschi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Cavalla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Chiò</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Dagliati</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Carvalho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Di Nunzio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Fariselli</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>García</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">G</forename><surname>Dominguez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Marta Gromicho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Longato</surname></persName>
		</author>
		<author>
			<persName><forename type="first">U</forename><surname>Madeira</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Manera</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Marchesin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Menotti</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Silvello</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Tavazzi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Tavazzi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Trescato</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">D</forename><surname>Vettoretti</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Camillo</surname></persName>
		</author>
		<author>
			<persName><surname>Ferro</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Working Notes of the Conference and Labs of the Evaluation Forum (CLEF 2024)</title>
				<meeting><address><addrLine>Grenoble, France</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2024">September 9th to 12th, 2024. 2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b26">
	<analytic>
		<title level="a" type="main">Intelligent Disease Progression Prediction: Overview of iDPP@CLEF</title>
		<author>
			<persName><forename type="first">G</forename><surname>Birolo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Bosoni</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Aidos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Bergamaschi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Cavalla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Chiò</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Dagliati</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Carvalho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Di Nunzio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Fariselli</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>García</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">G</forename><surname>Dominguez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Marta Gromicho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Longato</surname></persName>
		</author>
		<author>
			<persName><forename type="first">U</forename><surname>Madeira</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Manera</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Marchesin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Menotti</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Silvello</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Tavazzi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Tavazzi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Trescato</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">D</forename><surname>Vettoretti</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Camillo</surname></persName>
		</author>
		<author>
			<persName><surname>Ferro</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Experimental IR Meets Multilinguality, Multimodality, and Interaction -15th International Conference of the CLEF Association, CLEF 2024</title>
				<meeting><address><addrLine>Grenoble, France</addrLine></address></meeting>
		<imprint>
			<publisher>Proceedings</publisher>
			<date type="published" when="2024-09-09">2024. September 9-12, 2024. 2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b27">
	<monogr>
		<title level="m" type="main">Survival analysis for multiple sclerosis: predicting risk of disease worsening</title>
		<author>
			<persName><forename type="first">R</forename><surname>Branco</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Valente</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Martins</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Soares</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Castanho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Madeira</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Aidos</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2023">2023</date>
			<publisher>CLEF</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b28">
	<analytic>
		<title level="a" type="main">Scikit-learn: Machine learning in Python</title>
		<author>
			<persName><forename type="first">F</forename><surname>Pedregosa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Varoquaux</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Gramfort</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Michel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Thirion</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Grisel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Blondel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Prettenhofer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Weiss</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Dubourg</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Vanderplas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Passos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Cournapeau</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Brucher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Perrot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Duchesnay</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Machine Learning Research</title>
		<imprint>
			<biblScope unit="volume">12</biblScope>
			<biblScope unit="page" from="2825" to="2830" />
			<date type="published" when="2011">2011</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b29">
	<analytic>
		<title level="a" type="main">Spectral biclustering of microarray data: Coclustering genes and conditions</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Kluger</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Basri</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">T</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Gerstein</surname></persName>
		</author>
		<idno type="DOI">10.1101/gr.648603</idno>
	</analytic>
	<monogr>
		<title level="j">Genome Research</title>
		<imprint>
			<biblScope unit="volume">13</biblScope>
			<biblScope unit="page" from="703" to="716" />
			<date type="published" when="2003">2003</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b30">
	<analytic>
		<title level="a" type="main">Imbalanced-learn: A python toolbox to tackle the curse of imbalanced datasets in machine learning</title>
		<author>
			<persName><forename type="first">G</forename><surname>Lemaître</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Nogueira</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">K</forename><surname>Aridas</surname></persName>
		</author>
		<ptr target="http://jmlr.org/papers/v18/16-365.html" />
	</analytic>
	<monogr>
		<title level="j">Journal of Machine Learning Research</title>
		<imprint>
			<biblScope unit="volume">18</biblScope>
			<biblScope unit="page" from="1" to="5" />
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b31">
	<analytic>
		<title level="a" type="main">Smote: synthetic minority over-sampling technique</title>
		<author>
			<persName><forename type="first">N</forename><forename type="middle">V</forename><surname>Chawla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">W</forename><surname>Bowyer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">O</forename><surname>Hall</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><forename type="middle">P</forename><surname>Kegelmeyer</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of artificial intelligence research</title>
		<imprint>
			<biblScope unit="volume">16</biblScope>
			<biblScope unit="page" from="321" to="357" />
			<date type="published" when="2002">2002</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b32">
	<analytic>
		<title level="a" type="main">Optuna: A next-generation hyperparameter optimization framework</title>
		<author>
			<persName><forename type="first">T</forename><surname>Akiba</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Sano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Yanase</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Ohta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Koyama</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</title>
				<meeting>the 25th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</meeting>
		<imprint>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b33">
	<analytic>
		<title level="a" type="main">On the stratification of multi-label data</title>
		<author>
			<persName><forename type="first">K</forename><surname>Sechidis</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Tsoumakas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Vlahavas</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Machine Learning and Knowledge Discovery in Databases</title>
				<imprint>
			<date type="published" when="2011">2011</date>
			<biblScope unit="page" from="145" to="158" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b34">
	<analytic>
		<title level="a" type="main">A network perspective on stratification of multi-label data</title>
		<author>
			<persName><forename type="first">P</forename><surname>Szymański</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Kajdanowicz</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the First International Workshop on Learning with Imbalanced Domains: Theory and Applications</title>
				<editor>
			<persName><forename type="first">L</forename><surname>Torgo</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">B</forename><surname>Krawczyk</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Branco</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Moniz</surname></persName>
		</editor>
		<meeting>the First International Workshop on Learning with Imbalanced Domains: Theory and Applications<address><addrLine>Skopje, Macedonia</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2017">2017</date>
			<biblScope unit="volume">74</biblScope>
			<biblScope unit="page" from="22" to="35" />
		</imprint>
	</monogr>
	<note>Proceedings of Machine Learning Research, PMLR, ECML-PKDD</note>
</biblStruct>

<biblStruct xml:id="b35">
	<monogr>
		<author>
			<persName><forename type="first">P</forename><surname>Szymański</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Kajdanowicz</surname></persName>
		</author>
		<idno type="arXiv">arXiv:1702.01460</idno>
		<title level="m">A scikit-based Python environment for performing multi-label classification</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
	<note type="report_type">ArXiv e-prints</note>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
