<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Evaluating Multi-task Curriculum Learning for Forecasting Energy Consumption in Electric Heavy-duty Vehicles</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
				<date type="published" when="2024-10">October 2024</date>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Yuantao</forename><surname>Fan</surname></persName>
							<email>fan@hh.se</email>
							<affiliation key="aff0">
								<orgName type="department">Center for Applied Intelligent Systems Research (CAISR)</orgName>
								<orgName type="institution">Kristian IV</orgName>
								<address>
									<addrLine>:s väg 3</addrLine>
									<postCode>301 18</postCode>
									<settlement>Halmstad</settlement>
									<country key="SE">Sweden</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="institution">Halmstad University</orgName>
								<address>
									<addrLine>2 ; Gropegårdsgatan 2</addrLine>
									<postCode>417 15</postCode>
									<settlement>Göteborg</settlement>
									<country>Sweden, Volvo Group</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Sławomir</forename><surname>Nowaczyk</surname></persName>
							<email>slawomir.nowaczyk@hh.se</email>
							<affiliation key="aff0">
								<orgName type="department">Center for Applied Intelligent Systems Research (CAISR)</orgName>
								<orgName type="institution">Kristian IV</orgName>
								<address>
									<addrLine>:s väg 3</addrLine>
									<postCode>301 18</postCode>
									<settlement>Halmstad</settlement>
									<country key="SE">Sweden</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="institution">Halmstad University</orgName>
								<address>
									<addrLine>2 ; Gropegårdsgatan 2</addrLine>
									<postCode>417 15</postCode>
									<settlement>Göteborg</settlement>
									<country>Sweden, Volvo Group</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Zhenkan</forename><surname>Wang</surname></persName>
							<email>zhenkan.wang@volvo.com</email>
						</author>
						<author>
							<persName><forename type="first">Sepideh</forename><surname>Pashami</surname></persName>
							<email>sepideh.pashami@hh.se</email>
							<affiliation key="aff0">
								<orgName type="department">Center for Applied Intelligent Systems Research (CAISR)</orgName>
								<orgName type="institution">Kristian IV</orgName>
								<address>
									<addrLine>:s väg 3</addrLine>
									<postCode>301 18</postCode>
									<settlement>Halmstad</settlement>
									<country key="SE">Sweden</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="institution">Halmstad University</orgName>
								<address>
									<addrLine>2 ; Gropegårdsgatan 2</addrLine>
									<postCode>417 15</postCode>
									<settlement>Göteborg</settlement>
									<country>Sweden, Volvo Group</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="institution">Research Institutes of Sweden (RISE)</orgName>
								<address>
									<addrLine>Isafjordsgatan 28 A</addrLine>
									<postCode>164 40</postCode>
									<settlement>Kista</settlement>
									<country key="SE">Sweden</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff3">
								<address>
									<settlement>Santiago de Compostela</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Evaluating Multi-task Curriculum Learning for Forecasting Energy Consumption in Electric Heavy-duty Vehicles</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
						<imprint>
							<date type="published" when="2024-10">October 2024</date>
						</imprint>
					</monogr>
					<idno type="MD5">1301E33BD38EE4ABC48E5A21515EE4DD</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T19:29+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Energy Consumption Forecasting</term>
					<term>Curriculum Learning</term>
					<term>Multi-task Learning</term>
					<term>Electric Vehicles</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Accurate energy consumption prediction is crucial for optimising the operation of electric commercial heavy-duty vehicles, particularly for efficient route planning, refining charging strategies, and ensuring optimal truck configuration for specific tasks. This study investigates the application of multi-task curriculum learning to enhance machine learning models for forecasting the energy consumption of various onboard systems in electric vehicles. Multi-task learning, unlike traditional training approaches, leverages auxiliary tasks to provide additional training signals, which has been shown to enhance predictive performance in many domains. By further incorporating curriculum learning, where simpler tasks are learned before progressing to more complex ones, neural network training becomes more efficient and effective.</p><p>We evaluate the suitability of these methodologies in the context of electric vehicle energy forecasting, examining whether the combination of multi-task learning and curriculum learning enhances algorithm generalisation, even with limited training data. We primarily focus on understanding the efficacy of different curriculum learning strategies, including sequential learning and progressive continual learning, using complex, real-world industrial data.</p><p>Our research further explores a set of auxiliary tasks designed to facilitate the learning process by targeting key consumption characteristics projected into future time frames. The findings illustrate the potential of multi-task curriculum learning to advance energy consumption forecasting, significantly contributing to the optimisation of electric heavy-duty vehicle operations. This work offers a novel perspective on integrating advanced machine learning techniques to enhance energy efficiency in the exciting field of electromobility.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>Predicting energy consumption for electric vehicles (EVs), especially those used in commercial heavy-duty contexts, is paramount for improving their operational efficiency and promoting sustainability. Effective energy consumption forecasts are indispensable for strategic route planning, optimising charging protocols, and ensuring that vehicle configurations align well with specific operational demands. As electric vehicles gain traction as a viable and ecofriendly alternative to internal combustion engine vehicles, the importance of precise energy consumption predictions becomes increasingly pronounced. The challenges in this domain are multifaceted, stemming from the inherent variability in driving conditions, vehicle load, and diverse environmental factors, which collectively complicate the development of accurate predictive models. Overcoming these obstacles is essential not only for enhancing the reliability and performance of EVs but also for minimising operational costs and boosting the overall efficiency of electric transport systems.</p><p>The transition to electric vehicles is a significant step towards reducing greenhouse gas emissions and achieving sustainable transportation goals. However, since limited energy storage puts unique constraints on which operations are feasible, the benefits of EVs can only be fully realised through the development of specified forecasting methods that accurately anticipate energy needs. In this context, AI and ML emerge as transformative tools. AI-driven models can analyse vast amounts of data to uncover patterns and relationships that are not immediately apparent, providing more accurate and reliable energy consumption forecasts. These models can adapt to new data, continuously improving their predictions over time.</p><p>Nevertheless, energy consumption forecasting for EVs faces critical challenges, such as dynamic driving conditions and fluctuating loads, which makes even state-of-the-art methods struggle to handle complex real-world data effectively. While the potential to learn from historical data and identify trends that influence energy consumption is the biggest strength of ML-based approaches, it is crucial to develop robust models that can generalise well across different scenarios and vehicle types.</p><p>The complexity and variability inherent in forecasting energy consumption for electric vehicles make it a relevant testing ground for cutting-edge modelling techniques that promise to handle diverse and dynamic data inputs. In particular, Multi-Task Learning (MTL) presents a compelling solution by enabling simultaneous training across multiple related tasks, thereby leveraging shared information to improve the predictive performance of each task. In contrast, training machine learning models in a traditional setting only utilize the target task. MCL is particularly beneficial in scenarios with limited training data, as it enhances generalisation by incorporating auxiliary tasks that provide additional training signals. Moreover, the efficacy of MTL can be further amplified by integrating curriculum learning (CL), which structures the learning process in a progressive manner. Curriculum learning organises tasks from simple to complex, allowing the model to build a robust foundation before tackling more challenging problems. By combining these methodologies into multi-task curriculum learning (MCL), we can efficiently train neural networks that not only perform better on individual tasks but also generalise more effectively across different contexts. MCL optimises the learning trajectory, ensuring that simpler tasks enhance the model's capability to learn more complex ones, ultimately leading to more accurate and reliable energy consumption forecasts for electric heavy-duty vehicles. This integrative approach has been shown to be a potent strategy to address the multifaceted challenges in several domains but has not been applied to EV auxiliary energy forecasting before. Thus, this paper aims to evaluate the suitability of MCL in this real-world, complex scenario. Generating a set of auxiliary tasks is a critical step in the implementation of MCL -and how to do it for forecasting energy consumption in EVs requires experimental evaluation. To create auxiliary tasks, one must first obtain an understanding of the primary task, identifying key factors and variables that influence energy consumption and the types of patterns that are indicative of future behaviour. These factors often include vehicle load, driving speed, route characteristics, weather conditions, and driver behaviour. Each of these variables can serve as the basis for an auxiliary task. For instance, an auxiliary task might involve predicting the impact of vehicle load on energy consumption under different traffic conditions or estimating the effect of varying driving speeds on battery usage. Historical data from real-world vehicle operations can be mined to extract relevant patterns and correlations, which can then be used to define these auxiliary tasks. In this paper, we have decided to focus on the patterns within the forecasted value itself instead of exploiting multivariate vehicle signals. In particular, we define several types of energy consumption characteristics as targets for the auxiliary tasks, such as questioning whether the consumption in the next time frame exceeds the global mean, whether the consumption will be higher in the next time frame compared to the current consumption, or predicting the consumption difference between the start and the end of the next time frame. These tasks are general enough to be suitable for any forecasting task, while at the same time being sufficiently closely related to the actual primary task to, hopefully, provide useful information to boost the training process.</p><p>The core contribution of this paper is the evaluation of applying several multi-task curriculum learning techniques for forecasting the energy consumption of heavy-duty electric vehicles, including the proposition of utilising key consumption characteristics as targets for generating auxiliary tasks for MCL. Comparison of MCL variations, with combinations of curriculum learning strategy (sequential learning and progressive continual learning) and auxiliary tasks, illustrates the improvements in the performance on a real-world data collected from normal operations of commercial transportation electric vehicles. The experimental results show progressive continual learning, with a logistic growth weighting function governing the learning balance between the primary and the auxiliary task, achieves the best performance; the result also shows that the first auxiliary task is the most helpful task for subsystems 1 and 4; the third auxiliary task is the most helpful task for subsystems 2 and 3. Furthermore, it is observed that MCL with the proposed auxiliary tasks can improve the learning efficiency of the model, achieving faster convergence to a point beyond which the gain from further training is limited.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Related Work</head><p>Curriculum learning enables the training of machine learning models in a meaning order, from easy samples to sets of difficult and complex samples <ref type="bibr" target="#b3">[1]</ref>. A Common approach for CL introduces easy-to-hard ordering of samples for the training process, e.g., vanilla CL, self-paced CL, balanced CL, etc. When multiple tasks are available, the easy-to-hard ordering of the tasks to be learned can be applied as well. Multi-task learning can be applied, by sharing information across a set of related tasks in the training process, and the performance can be further improved <ref type="bibr" target="#b4">[2]</ref> via, e.g. Gradnorm <ref type="bibr" target="#b5">[3]</ref> balancing the losses between multiple tasks. While most multi-task learning approaches aim at learning multi-tasks simultaneously, progressive curriculum learning allows determining the best order to learn multiple tasks to maximise the final result. Study <ref type="bibr" target="#b6">[4]</ref> presented by Pentina et al. finds the best order of tasks to be learned in a sequence based on a generalisation bound criterion to optimise the average expected classification performance over all the tasks. Work <ref type="bibr" target="#b7">[5]</ref> by Siahpour et al. introduced a penalty coefficient, as a function of the epoch step, to govern the training process by suppressing the loss, and noise respectively, from the domain discrimination task in the early stage, to ensure the efficient training of neural networks. Shi et al. proposed progressive contrastive learning <ref type="bibr">[6]</ref> based on multi-prototypes in the dataset, the training process is ordered to learn the centroid prototype first, followed by the hard prototype, and finally the dynamic prototype. In this work, we explore sequential learning and progressive continual learning with a set of auxiliary tasks generated based on key characteristics of target signal.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Problem Formulation</head><p>For a given primary learning task 𝒯 𝑖 , we create a set of auxiliary tasks 𝒯 𝑗 𝑖 , where 𝒯 𝑖 corresponds to the primary task (in our case, the forecasting of energy consumption for the 𝑖-th auxiliary subsystem in an electric truck), and 𝒯 𝑗 𝑖 corresponds to the 𝑗-th type of auxiliary task. The majority of the multi-task learning studies aim to learn all relevant tasks together to improve the performance for each task 𝒯 𝑖 . In our study, we are only interested in improving the energy forecasting tasks 𝒯 𝑖 , not the generated auxiliary tasks 𝒯 𝑗 𝑖 . All energy forecasting tasks and the auxiliary tasks are learned from the same dataset, multi-variate time series sensor readings were collected from the normal operations of several heavy-duty electric vehicles.</p><p>Let us denote data of the multivariate time series x of each vehicle 𝑣 by 𝑋 = { 𝑥 𝑘 𝑣,𝑡 | 𝑡 = 1, 2, ..., 𝑇 𝑒 (𝑣), 𝑘 = 1, 2, ..., 𝐾}, where 𝑥 𝑘 𝑣,𝑡 is the value of the 𝑘-th feature x given a vehicle/trajectory 𝑣 at time 𝑡, and 𝑇 𝑒 (𝑣) corresponds to the end of the recording. A subset of the features 𝑢 𝑖 𝑣,𝑡 reflects the energy consumption of subsystem 𝑖 at time 𝑡. The target energy consumption 𝑦 𝑖 𝑣,𝑡 0 in a future time frame 𝜏 𝑝ℎ can be approximated by summing up the energy consumed over this time frame 𝑦 𝑖 𝑣,𝑡 0 = ∑︀ 𝑡∈[𝑡 0 ,𝑡 0 +𝜏 𝑝ℎ ] 𝑝 𝑖 (𝑡) • ∆𝑡, where 𝑝 𝑖 (𝑡) is the power consumption at time 𝑡, and ∆𝑡 is the time interval between two samples.</p><p>In this study, we set 𝜏 𝑝ℎ equal to 10 minutes. For a given forecasting task 𝒯 𝑖 , a regression model 𝑓 𝑗 𝑖 (•) is trained together with one of the auxiliary tasks 𝒯 𝑗 𝑖 to estimate consumption 𝑦 𝑖 𝑣,𝑡 . In this study, neural networks, a shared feature extractor, with multiple heads, each corresponding to one task, were trained under different settings and evaluated for their performance after 200 training epochs. We explore different multi-task curriculum learning settings and auxiliary tasks for forecasting energy consumption. The MCL methods were compared to the traditional approach</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Method</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Auxiliary Tasks</head><p>For a given regression task 𝒯 𝑖 (forecasting energy consumption for one of the subsystems), a set of auxiliary tasks was generated to assist the learning progress. We explore the use of five types of consumption characteristics as targets for creating the auxiliary tasks: i) 𝒯 1 𝑖 : classifying whether the consumption in the next time frame exceeds the global mean for that subsystem 𝑖; ii) 𝒯 2 𝑖 : classifying whether the consumption will increase in the next time frame, compared with the current consumption; iii) 𝒯 3 𝑖 : classifying whether the consumption at the end of the next time frame exceeds the starting point; iv) 𝒯 4  𝑖 : predicting the consumption difference between the start and the end of the next time frame; v) 𝒯 5  𝑖 predicting the difference between the peak consumption and the lowest consumption in the next time frame. The first three auxiliary tasks are classification task, the other two tasks are regression task. Learning to predict these key consumption characteristics in these auxiliary tasks 𝒯 𝑗 𝑖 , along with the primary tasks 𝒯 𝑖 , under MCL, are evaluated for their usefulness.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Network Architecture</head><p>The regression model evaluated for MCL in this study builds on a multi-layer perceptron. The model is comprised of a shared feature extractor and two heads, one head carries out the main task 𝒯 𝑖 , and the other corresponds to one of the five auxiliary tasks 𝒯 𝑗 𝑖 . The network architecture is illustrated in Figure <ref type="figure" target="#fig_0">1</ref>. For auxiliary tasks that are classification tasks, a sigmoid function was applied to the output of the corresponding head.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.">Curriculum Learning Strategy</head><p>The two curriculum learning strategies evaluated in this work are sequential learning (SeqL) and progressive continual learning (PCL). The overall optimisation loss ℒ can be defined as:</p><formula xml:id="formula_0">ℒ = 𝜆ℒ 𝒯 𝑖 + (1 − 𝜆)ℒ 𝒯 𝑗 𝑖 (1)</formula><p>where ℒ 𝒯 𝑖 denotes the loss for the primary tasks, while ℒ 𝒯 𝑗 𝑖 denotes the loss for the auxiliary task 𝑗. The SeqL employed imposes a fixed ordering of the tasks, e.g. learning the auxiliary task first, before a predetermined epoch, and the primary task afterwards:</p><formula xml:id="formula_1">𝜆 𝑆𝑒𝑞𝐿 = {︃ 0 if 𝜂 &lt; 𝒩 𝑒𝑝 1 if 𝜂 ≥ 𝒩 𝑒𝑝 (<label>2</label></formula><formula xml:id="formula_2">)</formula><p>where 𝜂 is the current training epochs, and 𝒩 𝑒𝑝 is the number of epochs predetermined to switch to another task. The PCL employs a weighting mechanism, a function of training epochs, to govern the learning process and gradually increases the weights on the loss corresponding to the primary task:</p><formula xml:id="formula_3">𝜆 𝑃 𝐶𝐿 = 2 1 + 𝑒𝑥𝑝(−10𝛼𝜂/𝒩 𝑡𝑜𝑡 ) − 1 (<label>3</label></formula><formula xml:id="formula_4">)</formula><p>where 𝛼 is a coefficient governing the change rate (see Figure <ref type="figure" target="#fig_1">2</ref> for an illustration), 𝜂 is the current training epochs, and 𝒩 𝑡𝑜𝑡 is the total amount of training epochs. The two curriculum learning strategies were compared with MTL without any special curriculum learning and learning only on the primary task 𝒯 𝑖 The two evaluation criteria in this study are (i) the test loss (Mean Absolute Error, MAE) after training converged, i.e., 𝑁 𝑡𝑜𝑡 epochs, and (2) whether the proposed learning strategy achieves a faster convergence time, i.e., the epoch at which the test loss has reached a saturation point (no further significant decrease in the loss afterwards). In each case, different variants of MCL are compared against a learning process without any multi-task curriculum learning. The saturation point is detected using a knee point detection algorithm <ref type="bibr" target="#b9">[7]</ref>, proposed by Satopaa et al. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Experiment Result</head><p>The energy consumption dataset was collected from several electric trucks operating in different countries for a couple of months, including sensor readings of mileage, speed, ambient temperature, and energy consumed for auxiliary subsystems, etc., from sessions of driving.</p><p>The four subsystems we forecast energy consumption for are the air compressor 𝒯 1 , the air conditioner 𝒯 2 , the cabin heater 𝒯 3 , and the heater of the energy storage system 𝒯 4 .</p><p>For the experiment conducted in this paper, the neural networks were implemented via Pytorch library <ref type="bibr" target="#b10">[8]</ref>, using an ADAM optimiser with a learning rate of 0.001. The loss function for the regression tasks is mean absolute error (MSE), and binary cross-entropy (BCE) was employed as the loss function for the classification tasks. The total number of training epochs 𝒩 𝑡𝑜𝑡 is set to 200. For the sequential learning strategy, 𝒩 𝑒𝑝 is set to 100, and for the progressive continual learning, 𝛼 value of 0.1 (i.e. a linear function) and 0.3 are tested. The experiments were conducted using 4-fold cross-validation driving session-wise, i.e. data from the same driving session would never appear in the training and the testing population together.</p><p>Table <ref type="table" target="#tab_0">1</ref> and Table <ref type="table" target="#tab_1">2</ref> show the training and testing losses after 200 epochs of training of the neural networks using multi-task learning with any curriculum learning (MTL), sequential learning (SeqL), progressive continual learning with an 𝛼 of 0.1 (PCL-lin), and an 𝛼 of 0.3 (PCL-exp). The baseline performance, single task learning (STL), is produced with learning only on the primary task 𝒯 𝑖 for each subsystem, shown in the parenthesis. It is shown in both tables that the lowest averaged MSE is achieved using PCL-exp. As a sanity check, Table <ref type="table" target="#tab_0">1</ref> demonstrated that the training losses, after 200 epochs of training, of most MCL methods did converge to a level comparable to STL. For the testing losses shown in Table <ref type="table" target="#tab_1">2</ref>, applying PCL-exp on task sets {𝒯 1 , 𝒯 1  1 } and {𝒯 4 , 𝒯 1 4 } achieved lowest averaged MSE for forecasting energy consumption of subsystem 1 and 4 (i.e., the first auxiliary task appears to be the most helpful auxiliary task for subsystem 1 and 4); similarly, applying PCL-exp on task sets {𝒯 2 , 𝒯 3 2 } and {𝒯 3 , 𝒯 3  3 } achieved lowest averaged MSE for forecasting energy consumption of subsystem 2 and 3.</p><p>Figure <ref type="figure" target="#fig_2">3</ref> illustrates the differences between several multi-task curriculum learning strategies, focusing on the convergence speed. Specifically, we identify a reference point (epoch) beyond which the gain from further training is limited. This reference point is computed using a knee point detection method (algorithm <ref type="bibr" target="#b9">[7]</ref> by Satopaa et al.) on the mean STL test losses (shown as grey dots and the corresponding dash line). The four plots in Figure <ref type="figure">.</ref> 3 illustrate the testing loss for learning the four primary tasks, along with their 5-th auxiliary task, i.e. 𝒯 5 𝑖 . It is observed in Figure <ref type="figure" target="#fig_2">3</ref>: i) there is no significant difference between the four approaches for 𝒯 1 ; ii) MTL and PCL-lin drop slightly slower compared to STL and PCL-exp for 𝒯 2 ; iii) both PCL approach drop slower compared with STL and MTL for 𝒯 3 ; iv) MTL, PCL-lin, and PCL-exp drops faster compared to STL. Table <ref type="table" target="#tab_2">3</ref> shows a comparison between MCL methods on the convergence time to the reference point (computed based on STL mean testing losses over the four folds). It is observed that: i) MTL outperforms STL in all four primary tasks, and converged to the reference point faster than other approaches in three out of four primary tasks; ii) the performance of PCL-lin achieved converged fast for two of the tasks; iii) PCL-exp achieved better performance compared to PCL-lin, with overall short convergence time. The result corresponding to SeqL is particularly interesting. Although a 𝒩 𝑒𝑝 of 100 epochs is adopted for SeqL (i.e. trained on one of the auxiliary tasks for the first 100 epochs before learning the primary task), the testing loss converges to the reference point within 10 epochs in the majority of the cases. From an empirical perspective, the proposed auxiliary tasks assisted the learning (of the models) for the primary task, resulting in a faster convergence time to the reference point. </p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: MLP network architecture</figDesc><graphic coords="6,218.93,84.20,157.41,445.17" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Figure 2 :</head><label>2</label><figDesc>Figure 2: PCL weighting function</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>Figure 3 :</head><label>3</label><figDesc>Figure 3: Comparison of convergence speed for different MCL approaches and auxiliary tasks.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>Comparison of training loss after the 200 epochs for different MCL approaches using different auxiliary tasks. The reference performances (using STL) are placed in parentheses. MCL results outperforming the baseline are highlighted in bold, and the best performance for each subsystem is underlined.</figDesc><table><row><cell>Task1 (0.6202 ± 0.0163)</cell><cell>MTL</cell><cell>SeqL</cell><cell>PCL-lin</cell><cell>PCL-exp</cell></row><row><cell>AuxTask1</cell><cell cols="4">0.6125 ± 0.0141 0.6054 ± 0.0167 0.609 ± 0.0185 0.6013 ± 0.0156</cell></row><row><cell>AuxTask2</cell><cell>0.6374 ± 0.0147</cell><cell>0.6502 ± 0.0138</cell><cell>0.6802 ± 0.0096</cell><cell>0.6435 ± 0.0156</cell></row><row><cell>AuxTask3</cell><cell cols="4">0.6165 ± 0.0166 0.6131 ± 0.0118 0.6076 ± 0.0154 0.6033 ± 0.0155</cell></row><row><cell>AuxTask4</cell><cell>0.6239 ± 0.0132</cell><cell>0.626 ± 0.0119</cell><cell cols="2">0.6567 ± 0.0161 0.6182 ± 0.0121</cell></row><row><cell>AuxTask5</cell><cell cols="2">0.6256 ± 0.0113 0.6152 ± 0.0121</cell><cell>0.625 ± 0.0147</cell><cell>0.614 ± 0.0117</cell></row><row><cell>Task2 (0.2617 ± 0.0245)</cell><cell>MTL</cell><cell>SeqL</cell><cell>PCL-lin</cell><cell>PCL-exp</cell></row><row><cell>AuxTask1</cell><cell>0.2681 ± 0.023</cell><cell>0.276 ± 0.0128</cell><cell cols="2">0.2959 ± 0.0157 0.2541 ± 0.0171</cell></row><row><cell>AuxTask2</cell><cell>0.2619 ± 0.0158</cell><cell>0.3016 ± 0.0308</cell><cell>0.2939 ± 0.0204</cell><cell>0.2475 ± 0.037</cell></row><row><cell>AuxTask3</cell><cell>0.2662 ± 0.0395</cell><cell cols="3">0.2862 ± 0.0324 0.2379 ± 0.0245 0.2158 ± 0.0186</cell></row><row><cell>AuxTask4</cell><cell>0.2534 ± 0.011</cell><cell>0.2866 ± 0.0255</cell><cell cols="2">0.2795 ± 0.0202 0.2366 ± 0.0432</cell></row><row><cell>AuxTask5</cell><cell>0.2638 ± 0.0168</cell><cell>0.2691 ± 0.0361</cell><cell cols="2">0.2971 ± 0.0167 0.2436 ± 0.0285</cell></row><row><cell>Task3 (0.3173 ± 0.0115)</cell><cell>MTL</cell><cell>SeqL</cell><cell>PCL-lin</cell><cell>PCL-exp</cell></row><row><cell>AuxTask1</cell><cell cols="2">0.3223 ± 0.0132 0.3138 ± 0.0084</cell><cell>0.3248 ± 0.012</cell><cell>0.3116 ± 0.0111</cell></row><row><cell>AuxTask2</cell><cell>0.3217 ± 0.0109</cell><cell>0.3222 ± 0.0116</cell><cell>0.3423 ± 0.0113</cell><cell>0.317 ± 0.0096</cell></row><row><cell>AuxTask3</cell><cell cols="4">0.3148 ± 0.0074 0.3117 ± 0.0151 0.3229 ± 0.0121 0.3018 ± 0.0116</cell></row><row><cell>AuxTask4</cell><cell>0.333 ± 0.0126</cell><cell>0.3272 ± 0.0137</cell><cell>0.356 ± 0.0214</cell><cell>0.3188 ± 0.0152</cell></row><row><cell>AuxTask5</cell><cell>0.3213 ± 0.0103</cell><cell>0.3188 ± 0.0143</cell><cell cols="2">0.3412 ± 0.0091 0.3171 ± 0.0122</cell></row><row><cell>Task4 (0.2936 ± 0.0129)</cell><cell>MTL</cell><cell>SeqL</cell><cell>PCL-lin</cell><cell>PCL-exp</cell></row><row><cell>AuxTask1</cell><cell cols="4">0.2903 ± 0.0186 0.3248 ± 0.0159 0.2684 ± 0.0208 0.2646 ± 0.0124</cell></row><row><cell>AuxTask2</cell><cell>0.2941 ± 0.0123</cell><cell>0.3511 ± 0.0171</cell><cell cols="2">0.3565 ± 0.0866 0.2583 ± 0.0156</cell></row><row><cell>AuxTask3</cell><cell>0.2979 ± 0.0136</cell><cell cols="2">0.3064 ± 0.0165 0.2624 ± 0.0081</cell><cell>0.344 ± 0.152</cell></row><row><cell>AuxTask4</cell><cell>0.3269 ± 0.0127</cell><cell>0.3712 ± 0.0117</cell><cell>0.425 ± 0.035</cell><cell>0.329 ± 0.0275</cell></row><row><cell>AuxTask5</cell><cell>0.3145 ± 0.0142</cell><cell>0.3142 ± 0.0311</cell><cell>0.3334 ± 0.0076</cell><cell>0.3036 ± 0.0182</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2</head><label>2</label><figDesc>Comparison of test loss after the 200 epochs for different MCL approaches using different auxiliary tasks. The reference performances (using STL) are placed in parentheses. MCL results outperforming the baseline are highlighted in bold, and the best performance for each subsystem is underlined.</figDesc><table><row><cell>Task1 (0.6861 ± 0.0713)</cell><cell>MTL</cell><cell>SeqL</cell><cell>PCL-lin</cell><cell>PCL-exp</cell></row><row><cell>AuxTask1</cell><cell>0.6829 ± 0.0707</cell><cell>0.692 ± 0.072</cell><cell cols="2">0.6827 ± 0.0727 0.6784 ± 0.0672</cell></row><row><cell>AuxTask2</cell><cell>0.6948 ± 0.0736</cell><cell>0.7037 ± 0.0749</cell><cell>0.7248 ± 0.0702</cell><cell>0.7076 ± 0.0709</cell></row><row><cell>AuxTask3</cell><cell cols="2">0.6812 ± 0.0744 0.6969 ± 0.0634</cell><cell>0.698 ± 0.0762</cell><cell>0.6943 ± 0.0774</cell></row><row><cell>AuxTask4</cell><cell>0.6917 ± 0.0775</cell><cell>0.6934 ± 0.069</cell><cell>0.7058 ± 0.0634</cell><cell>0.6881 ± 0.0684</cell></row><row><cell>AuxTask5</cell><cell>0.6968 ± 0.0712</cell><cell>0.6894 ± 0.0735</cell><cell>0.6913 ± 0.074</cell><cell>0.6862 ± 0.073</cell></row><row><cell>Task2 (0.4374 ± 0.1821)</cell><cell>MTL</cell><cell>SeqL</cell><cell>PCL-lin</cell><cell>PCL-exp</cell></row><row><cell>AuxTask1</cell><cell cols="4">0.4553 ± 0.2035 0.4199 ± 0.1608 0.4277 ± 0.1788 0.4285 ± 0.177</cell></row><row><cell>AuxTask2</cell><cell cols="4">0.4322 ± 0.1671 0.4448 ± 0.1766 0.4319 ± 0.1676 0.4329 ± 0.1678</cell></row><row><cell>AuxTask3</cell><cell cols="4">0.4109 ± 0.1556 0.4427 ± 0.1782 0.4105 ± 0.1398 0.3929 ± 0.1602</cell></row><row><cell>AuxTask4</cell><cell cols="2">0.4105 ± 0.1632 0.4436 ± 0.1673</cell><cell cols="2">0.4699 ± 0.1928 0.4362 ± 0.1752</cell></row><row><cell>AuxTask5</cell><cell>0.4702 ± 0.2093</cell><cell>0.4734 ± 0.2158</cell><cell>0.4874 ± 0.2344</cell><cell>0.4632 ± 0.2307</cell></row><row><cell>Task3 (0.3827 ± 0.0551)</cell><cell>MTL</cell><cell>SeqL</cell><cell>PCL-lin</cell><cell>PCL-exp</cell></row><row><cell>AuxTask1</cell><cell cols="2">0.3777 ± 0.0593 0.377 ± 0.0627</cell><cell cols="2">0.3829 ± 0.0609 0.3774 ± 0.0577</cell></row><row><cell>AuxTask2</cell><cell>0.387 ± 0.0503</cell><cell>0.3901 ± 0.0618</cell><cell>0.401 ± 0.0534</cell><cell>0.3892 ± 0.0659</cell></row><row><cell>AuxTask3</cell><cell>0.3859 ± 0.0534</cell><cell>0.3857 ± 0.0578</cell><cell cols="2">0.3868 ± 0.0588 0.3766 ± 0.0684</cell></row><row><cell>AuxTask4</cell><cell>0.3847 ± 0.0683</cell><cell>0.3828 ± 0.0563</cell><cell>0.4021 ± 0.0724</cell><cell>0.3865 ± 0.0602</cell></row><row><cell>AuxTask5</cell><cell>0.3874 ± 0.0587</cell><cell>0.3868 ± 0.0605</cell><cell>0.4016 ± 0.0563</cell><cell>0.38 ± 0.0549</cell></row><row><cell>Task4 (0.4166 ± 0.0679)</cell><cell>MTL</cell><cell>SeqL</cell><cell>PCL-lin</cell><cell>PCL-exp</cell></row><row><cell>AuxTask1</cell><cell>0.4155 ± 0.065</cell><cell>0.4783 ± 0.0698</cell><cell cols="2">0.4332 ± 0.0801 0.3986 ± 0.0567</cell></row><row><cell>AuxTask2</cell><cell cols="2">0.4085 ± 0.0745 0.4786 ± 0.0853</cell><cell>0.5225 ± 0.1217</cell><cell>0.4339 ± 0.0813</cell></row><row><cell>AuxTask3</cell><cell>0.424 ± 0.0492</cell><cell>0.5029 ± 0.0554</cell><cell>0.4253 ± 0.054</cell><cell>0.4874 ± 0.1068</cell></row><row><cell>AuxTask4</cell><cell>0.4251 ± 0.0774</cell><cell>0.4548 ± 0.0767</cell><cell>0.4936 ± 0.0718</cell><cell>0.4375 ± 0.0664</cell></row><row><cell>AuxTask5</cell><cell>0.4291 ± 0.0613</cell><cell>0.4442 ± 0.0662</cell><cell>0.4535 ± 0.0651</cell><cell>0.4347 ± 0.0716</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 3</head><label>3</label><figDesc>Comparison of convergence speed to reach a point beyond which the gain from further training is limited. The reference point is given by STL loss sequences averaged over 4 folds. MCL results outperforming the baseline are highlighted in bold, and the best performance for each subsystem is underlined.</figDesc><table><row><cell>Task1 (20 Ep.)</cell><cell>MTL</cell><cell>SeqL</cell><cell>PCL-lin</cell><cell>PCL-exp</cell></row><row><cell>AuxTask1</cell><cell>13.0 ± 1.4142</cell><cell>102.25 ± 0.433</cell><cell>16.75 ± 2.2776</cell><cell>16.25 ± 8.9268</cell></row><row><cell>AuxTask2</cell><cell>32.5 ± 4.0311</cell><cell>125.75 ± 2.2776</cell><cell>103.5 ± 28.8141</cell><cell>45.75 ± 26.6962</cell></row><row><cell>AuxTask3</cell><cell>15.75 ± 3.8971</cell><cell>106.5 ± 1.118</cell><cell>29.0 ± 3.3912</cell><cell>22.25 ± 8.1968</cell></row><row><cell>AuxTask4</cell><cell>10.5 ± 1.5</cell><cell>106.5 ± 2.2913</cell><cell>70.5 ± 32.7605</cell><cell>29.0 ± 15.1493</cell></row><row><cell>AuxTask5</cell><cell>15.25 ± 6.7961</cell><cell>102.5 ± 0.5</cell><cell>18.0 ± 4.3012</cell><cell>19.0 ± 9.083</cell></row><row><cell>Task2 (19 Ep.)</cell><cell>MTL</cell><cell>SeqL</cell><cell>PCL-lin</cell><cell>PCL-exp</cell></row><row><cell>AuxTask1</cell><cell cols="2">15.625 ± 7.9047 58.875 ± 50.5827</cell><cell>34.0 ± 22.7211</cell><cell>20.125 ± 14.4606</cell></row><row><cell>AuxTask2</cell><cell>18.0 ± 9.2736</cell><cell>108.25 ± 9.8075</cell><cell>44.5 ± 14.239</cell><cell>23.5 ± 11.4127</cell></row><row><cell>AuxTask3</cell><cell>26.0 ± 16.4773</cell><cell>106.75 ± 7.9804</cell><cell>42.5 ± 12.0312</cell><cell>22.75 ± 7.1545</cell></row><row><cell>AuxTask4</cell><cell>32.0 ± 33.2039</cell><cell cols="2">111.25 ± 13.3112 60.3333 ± 21.6384</cell><cell>32.0 ± 18.8149</cell></row><row><cell>AuxTask5</cell><cell>17.0 ± 11.2916</cell><cell>108.5 ± 8.6168</cell><cell>100.0 ± 86.0145</cell><cell>28.0 ± 24.8697</cell></row><row><cell>Task3 (14 Ep.)</cell><cell>MTL</cell><cell>SeqL</cell><cell>PCL-lin</cell><cell>PCL-exp</cell></row></table></figure>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgments</head><p>The work was carried out with support from the Knowledge Foundation and Vinnova (Sweden's innovation agency) through the Vehicle Strategic Research and Innovation Programme FFI.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">MTL SeqL PCL-lin PCL</title>
		<author>
			<persName><surname>Task4</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">-exp AuxTask1</title>
		<imprint>
			<biblScope unit="volume">14</biblScope>
			<biblScope unit="page">1651</biblScope>
			<date>19</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title/>
		<idno>7178 18.875 ± 8.1 17.1429 ± 4.4538</idno>
	</analytic>
	<monogr>
		<title level="j">AuxTask3</title>
		<imprint>
			<biblScope unit="volume">14</biblScope>
			<biblScope unit="issue">75 ± 2</biblScope>
			<biblScope unit="page" from="375" to="422" />
			<date>9896 61</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<monogr>
		<title level="m">for forecasting the energy consumption of auxiliary subsystems in heavy-duty electric vehicles. The preliminary results show that progressive continual learning has achieved the best performance (lowest averaged MSE) compared to multi-task learning with any CL, Sequential CL, and the traditional approach</title>
				<imprint/>
	</monogr>
	<note>) enabling CL across primary tasks, based on task relevancy</note>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Curriculum learning: A survey</title>
		<author>
			<persName><forename type="first">P</forename><surname>Soviany</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">T</forename><surname>Ionescu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Rota</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Sebe</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">International Journal of Computer Vision</title>
		<imprint>
			<biblScope unit="volume">130</biblScope>
			<biblScope unit="page" from="1526" to="1565" />
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">A survey on multi-task learning</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Q</forename><surname>Yang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE Transactions on Knowledge and Data Engineering</title>
		<imprint>
			<biblScope unit="volume">34</biblScope>
			<biblScope unit="page" from="5586" to="5609" />
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Gradnorm: Gradient normalization for adaptive loss balancing in deep multitask networks</title>
		<author>
			<persName><forename type="first">Z</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Badrinarayanan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C.-Y</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Rabinovich</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">International conference on machine learning</title>
				<imprint>
			<publisher>PMLR</publisher>
			<date type="published" when="2018">2018</date>
			<biblScope unit="page" from="794" to="803" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Curriculum learning of multiple tasks</title>
		<author>
			<persName><forename type="first">A</forename><surname>Pentina</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Sharmanska</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">H</forename><surname>Lampert</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the IEEE conference on computer vision and pattern recognition</title>
				<meeting>the IEEE conference on computer vision and pattern recognition</meeting>
		<imprint>
			<date type="published" when="2015">2015</date>
			<biblScope unit="page" from="5492" to="5500" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">A novel transfer learning approach in remaining useful life prediction for incomplete dataset</title>
		<author>
			<persName><forename type="first">S</forename><surname>Siahpour</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Li</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Lee</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE Transactions on Instrumentation and Measurement</title>
		<imprint>
			<biblScope unit="volume">71</biblScope>
			<biblScope unit="page" from="1" to="11" />
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<monogr>
		<author>
			<persName><forename type="first">J</forename><surname>Shi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Yin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Xie</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Qu</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2402.19026</idno>
		<title level="m">Progressive contrastive learning with multi-prototype for unsupervised visible-infrared person re-identification</title>
				<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Finding a&quot; kneedle&quot; in a haystack: Detecting knee points in system behavior</title>
		<author>
			<persName><forename type="first">V</forename><surname>Satopaa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Albrecht</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Irwin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Raghavan</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">2011 31st international conference on distributed computing systems workshops</title>
				<imprint>
			<publisher>IEEE</publisher>
			<date type="published" when="2011">2011</date>
			<biblScope unit="page" from="166" to="171" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<monogr>
		<author>
			<persName><forename type="first">A</forename><surname>Paszke</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Gross</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Chintala</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Chanan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Yang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Devito</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Lin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Desmaison</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Antiga</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Lerer</surname></persName>
		</author>
		<title level="m">Automatic differentiation in pytorch</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
