<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Comparative Evaluation of Computational Models Predicting Eye Fixation Patterns During Reading: Insights from Transformers and Simpler Architectures</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Alessandro</forename><surname>Lento</surname></persName>
							<email>alessandro.lento@ilc.cnr.it</email>
							<affiliation key="aff0">
								<orgName type="department">Istituto di Linguistica Computazionale</orgName>
								<orgName type="institution">Consiglio Nazionale delle Ricerche &quot;A. Zampolli&quot;</orgName>
								<address>
									<settlement>Pisa</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="institution">Università Campus Bio-Medico</orgName>
								<address>
									<settlement>Roma</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Andrea</forename><surname>Nadalini</surname></persName>
							<email>andrea.nadalini@ilc.cnr.it</email>
							<affiliation key="aff0">
								<orgName type="department">Istituto di Linguistica Computazionale</orgName>
								<orgName type="institution">Consiglio Nazionale delle Ricerche &quot;A. Zampolli&quot;</orgName>
								<address>
									<settlement>Pisa</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Nadia</forename><surname>Khlif</surname></persName>
							<email>nadia.khlif@ilc.cnr.it</email>
							<affiliation key="aff0">
								<orgName type="department">Istituto di Linguistica Computazionale</orgName>
								<orgName type="institution">Consiglio Nazionale delle Ricerche &quot;A. Zampolli&quot;</orgName>
								<address>
									<settlement>Pisa</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="institution">University Mohammed First</orgName>
								<address>
									<settlement>Oujda</settlement>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Vito</forename><surname>Pirrelli</surname></persName>
							<email>vito.pirrelli@ilc.cnr.it</email>
							<affiliation key="aff0">
								<orgName type="department">Istituto di Linguistica Computazionale</orgName>
								<orgName type="institution">Consiglio Nazionale delle Ricerche &quot;A. Zampolli&quot;</orgName>
								<address>
									<settlement>Pisa</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Claudia</forename><surname>Marzi</surname></persName>
							<email>claudia.marzi@ilc.cnr.it</email>
							<affiliation key="aff0">
								<orgName type="department">Istituto di Linguistica Computazionale</orgName>
								<orgName type="institution">Consiglio Nazionale delle Ricerche &quot;A. Zampolli&quot;</orgName>
								<address>
									<settlement>Pisa</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Marcello</forename><surname>Ferro</surname></persName>
							<email>marcello.ferro@ilc.cnr.it</email>
							<affiliation key="aff0">
								<orgName type="department">Istituto di Linguistica Computazionale</orgName>
								<orgName type="institution">Consiglio Nazionale delle Ricerche &quot;A. Zampolli&quot;</orgName>
								<address>
									<settlement>Pisa</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Comparative Evaluation of Computational Models Predicting Eye Fixation Patterns During Reading: Insights from Transformers and Simpler Architectures</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">26D5216A28FC04ABF350BA590C609204</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T17:36+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>eye-tracking</term>
					<term>eye fixation time prediction</term>
					<term>neural network</term>
					<term>contextual word embeddings</term>
					<term>lexical features</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Eye tracking records of natural text reading are known to provide significant insights into the cognitive processes underlying word processing and text comprehension, with gaze patterns, such as fixation duration and saccadic movements, being modulated by morphological, lexical, and higher-level structural properties of the text being read. Although some of these effects have been simulated with computational models, it is still not clear how accurately computational modelling can predict complex fixation patterns in connected text reading. State-of-the-art neural architectures have shown promising results, with pre-trained transformer-based classifiers having recently been claimed to outperform other competitors, achieving beyond 95% accuracy. However, transformer-based models have neither been compared with alternative architectures nor adequately evaluated for their sensitivity to the linguistic factors affecting human reading. Here we address these issues by evaluating the performance of a pool of neural networks in classifying eye-fixation English data as a function of both lexical and contextual factors. We show that i) accuracy of transformer-based models has largely been overestimated, ii) other simpler models make comparable or even better predictions, iii) most models are sensitive to some of the major lexical factors accounting for at least 50% of human fixation variance, iv) most models fail to capture some significant context-sensitive interactions, such as those accounting for spillover effects in reading. The work shows the benefits of combining accuracy-based evaluation metrics with non-linear regression modelling of fixed and random effects on both real and simulated eye-tracking data.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>Eye-tracking records of natural text reading are a valuable window on the cognitive processes underlying word processing and text comprehension. By looking at fixation patterns it is possible to estimate the effects that lexical properties (e.g. length, frequencies, orthographic similarity <ref type="bibr" target="#b0">[1]</ref>  <ref type="bibr" target="#b1">[2]</ref>), contextual constraints (e.g. predictability <ref type="bibr" target="#b2">[3]</ref>) and higher-level structures (e.g. syntactic structure or prosodic contour <ref type="bibr" target="#b3">[4]</ref>) can have on human word identification and processing. While psycholinguistic experiments have reliably assessed how such effects modulate reading times, it is not clear to what extent computational models of reading can simulate actual behavioural data such as gaze patterns and fixation durations.</p><p>Over the past 30 years, research in this field has made considerable progress, leading to the development of sophisticated computational models accounting for finegrained aspects of eye movement behaviour during word and sentence reading (e.g. EZ-Reader <ref type="bibr" target="#b4">[5]</ref>, Swift <ref type="bibr" target="#b5">[6]</ref>). A significant boost in this area came from large eye-tracking corpora of natural reading (e.g. GECO <ref type="bibr" target="#b6">[7]</ref>, ZUCO <ref type="bibr" target="#b7">[8]</ref>, MECO <ref type="bibr" target="#b8">[9]</ref>), which allow for (deep) learning models to be tested in prediction tasks of eye tracking metrics. Of late, Hollenstein and colleagues <ref type="bibr" target="#b9">[10]</ref> reported that fine-tuned, pre-trained transformer language models can make reliable predictions on a wide range of eye-tracking measurements, covering both early and late stages of lexical processing. The evidence suggests that transformers can inherently encode the relative prominence of language units in a text, in ways that accurately replicate human reading skills and their underlying cognitive mechanisms.</p><p>Although the accuracy of multilingual transformers is validated across eye-tracking evidence from different languages, the paper neither compares the performance of transformers with the performance of other neural network classifiers trained on the same task, nor it shows what specific knowledge is encoded and put to use by transformers, by looking at the factors affecting their behaviour. In the present paper, we address both issues by assessing the performance of a pool of neural network classifiers on the English batch of Hollenstein et al.'s <ref type="bibr" target="#b9">[10]</ref> data.</p><p>In what follows, we first describe the English data set and the pool of tested classifiers. Classifiers were selected to include and test either simpler neural architectures than transformers (as is the case with multi-layer perceptrons), or cognitively more plausible processing models (i.e. sequential long-short terms memories). Hybrid models, resulting from the combination of different architectures, were also tested. We then move on to discussing the metrics used in <ref type="bibr" target="#b9">[10]</ref> for evaluation, to suggest alternative ways to measure accuracy in a fixation prediction task. Finally, we investigate how sensitive each tested architecture is to a few linguistic factors that are known to account for a sizeable amount of variance in human reading gaze patterns. Although some neural networks turn out to be reasonably good at predicting fixation patterns and replicating some robust psycholinguistic effects that are found in human data, it is still unclear whether this ability is due to specific aspects of their architecture, to the type of information they are provided in input, or to their space of trainable parameters. We conclude that, contrary to recent over-enthusiastic reports, predicting eye-fixation patterns of human natural reading is still a big challenge for currently available neural architectures, including transformer-based ones. For this very reason, we contend that the task is key to understanding the inductive bias of these models, as well as assessing their cognitive plausibility as models of language behaviour.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Data and Experiments</head><p>All models described in the following paragraphs were trained, validated, and tested on data from the GECO corpus <ref type="bibr" target="#b6">[7]</ref>. We used a 5-fold cross-validation with 95% training, 5% validation and 5% test. Experiments were conducted using the PyTorch library <ref type="bibr" target="#b10">[11]</ref> in Python or MatLab <ref type="bibr" target="#b11">[12]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1.">Dataset</head><p>The GECO corpus <ref type="bibr" target="#b6">[7]</ref> contains data from 14 English native speakers whose eye movements were recorded while reading Agatha Christie's novel "The Mysterious Affair at Styles" (56410 tokens). Out of the eight word-level eye tracking measurements used in <ref type="bibr" target="#b9">[10]</ref>, we focused on i) first-pass duration (FPD) (the time spent fixating a word the first time it is encountered, averaged over subjects, see Fig. <ref type="figure" target="#fig_1">2</ref>) and ii) fixation proportion (FPROP) or probability (number of subjects that fixated a word, divided by the total number of subjects).</p><p>Word tokens in the original dataset were encoded with linguistic information including: i) character length (removing punctuation) ii) log frequency (source: BNC <ref type="bibr" target="#b12">[13]</ref>)</p><p>iii) part-of-Speech tag (source: Stanza <ref type="bibr" target="#b13">[14]</ref>) iv) context surprisal/predictability (source: GPT-2 <ref type="bibr" target="#b14">[15,</ref><ref type="bibr" target="#b15">16,</ref><ref type="bibr" target="#b2">3]</ref>) v) distance from the beginning of the sentence (number of intervening tokens) vi) distance from the end of the sentence (number of intervening tokens) vii) presence of heavy punctuation after the token viii) presence of light punctuation after the token.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2.">BERT ++</head><p>To replicate results from <ref type="bibr" target="#b9">[10]</ref>, we used BERT <ref type="bibr" target="#b16">[17]</ref> with a linear layer on top of it. The linear layer gets BERT contextual word embeddings as input, to predict FPD and FPROP.</p><p>After sentence padding and tokenization, irrelevant and special subtokens were masked to enforce a correspondence between each vector in the target sequence and each vector in the output sequence, and train the loss only on relevant tokens. Mean Square Error (MSE) loss was used along with the AdamW optimizer (with no weight decay for the biases). The initial learning rate was set to 5 • 10 −5 , and a linear scheduler was used. We used a 16 sentences batch size and 100 training epochs, with an early stopping criterion (best model on the validation set). The model was trained both with fine-tuning (i.e. by also training BERT internal weights: bert FT + layer) and without fine-tuning (by only training final layer weights: bert + layer).</p><p>Finally, we used BERT also in combination with a sequential LSTM network. This model (bert + LSTM) takes the pre-trained BERT contextual word embeddings (i.e. without fine-tuning) in input, along with the lexical features (i), (ii) and (iv), to predict FPD and FPROP.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.3.">LSTM</head><p>Reading is inherently sequential. Thus, recurrent neural networks appear to offer a promising approach to modelling a fixation prediction task, and a good alternative to transformers. Using the GECO dataset split into pages rather than sentences, we trained an LSTM with 96 hidden units and a single layer, with a feed-forward network using tanh activation functions on top of it. The model (lstm) takes as input the lexical features (i)-(iv) for the target token and 4 tokens to its left and 3 to its right, to predict FPD and FPROP of the target token. MSE loss was used along with the AdamW optimizer. The initial learning rate was set to 5 • 10 −3 , with a linear scheduler and a batch containing the entire training dataset. The model was trained for 3000 epochs with an early stopping criterion (best model on the validation set).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.4.">MLP</head><p>A Multi-Layer-Perceptron (mlp) was trained using the entire set of lexical features (i)-(viii) as input, with an input context consisting of the two words immediately preceding and ensuing the target word. Several instances of this architecture were tested, but only the results of the best performing instance (with a single hidden layer of 10 units, sigmoidal activation functions, the Adam optimiser, the MSE loss, a constant learning rate of 0.1, and 1000 training epochs) are reported here.</p><p>An identical MLP model (mlp UDT) was eventually trained on a subset of GECO training data, obtained by sampling target features uniformly. This was done to train the network with an equal number of tokens for each bin of fixation times, and assess the impact of different distributions of input data on the network's performance on test data.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.5.">Evaluation</head><p>We evaluated the performance of all our models using three accuracy metrics based on the absolute error between the predicted value 𝑜𝑖 and the target value 𝑡𝑖 on the i-th token of the GECO dataset:</p><formula xml:id="formula_0">𝑒𝑖 = |𝑜𝑖 − 𝑡𝑖|</formula><p>Loss accuracy (accL) is a measure of the overall similarity between predicted and target values, calculated as the complement to 1 of the Mean Absolute Error (MAE) after fitting the target data 𝑡𝑖 in the training set into the [0; 1] range with the min-max scaling:</p><formula xml:id="formula_1">𝑎𝑐𝑐𝐿(𝑠𝑒𝑡) = 1 − 1 𝑁𝑠𝑒𝑡 𝑁 𝑠𝑒𝑡 ∑︁ 𝑖∈𝑠𝑒𝑡 𝑒 ˆ𝑖 where 𝑒 ˆ𝑖 = |𝑜 ˆ𝑖 − 𝑡 ˆ𝑖|, 𝑡 ˆ𝑖 = 𝑡𝑖/ max 𝑗=𝑡𝑟𝑎𝑖𝑛𝑖𝑛𝑔𝑠𝑒𝑡 {𝑡𝑗}, and</formula><p>𝑜 ˆ𝑖 is the model prediction for 𝑡 ˆ𝑖. Loss accuracy is the metric used in <ref type="bibr" target="#b9">[10]</ref>.</p><p>Threshold accuracy (accT) measures how many times the predicted value is close to the target value within a fixed threshold, and is calculated as follows:</p><formula xml:id="formula_2">𝑎𝑐𝑐𝑇 (𝑠𝑒𝑡) = 1 − 1 𝑁𝑠𝑒𝑡 𝑁 𝑠𝑒𝑡 ∑︁ 𝑖∈𝑠𝑒𝑡 𝜃[𝑒𝑖 − 𝜖]</formula><p>Sensitivity accuracy (accS) counts how many times the predicted value is close to the target value within a threshold dynamically calculated on the basis of the target value: the higher the target value, the higher the threshold. An offset value is needed to obtain a positive threshold also for zero target values. This is calculated as follows:</p><formula xml:id="formula_3">𝑎𝑐𝑐𝑆(𝑠𝑒𝑡) = 1 − 1 𝑁𝑠𝑒𝑡 𝑁 𝑠𝑒𝑡 ∑︁ 𝑖∈𝑠𝑒𝑡 𝜃 [𝑒𝑖 − (𝛼 • 𝑡𝑖 + 𝜖)]</formula><p>where 𝑁𝑠𝑒𝑡 is the number of examples in the training/test set, 𝜃 is the Heaviside step function, 𝜖 is a threshold and 𝛼 is a sensitivity coefficient.</p><p>As for FPD, which is a duration expressed in seconds, we used 𝜖 = 25𝑚𝑠 and 𝛼 = 10% for accS, and 𝜖 = 50𝑚𝑠 for accT. As for FPROP, which is a probability, we used 𝜖 = 0.01 and 𝛼 = 10% for accS, and 𝜖 = 0.1 for accT.</p><p>Finally, the performance of our models was compared against a baseline model (const) that always outputs the overall mean fixation duration (across both subjects and items) in the training data.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Results</head><p>Models' results for FPD prediction are summarised in Table <ref type="table" target="#tab_0">1</ref> and plotted in Fig. <ref type="figure" target="#fig_0">1</ref>. The accL results reported in <ref type="bibr" target="#b9">[10]</ref> for bert FT + layer are essentially replicated. However, being a simple average over all test instances, accL is blind to error magnitude, as well as the possible presence of prediction biases for specific ranges of fixation values. Note that the const model, which predicts the same average FDP for every token in the test set, scores a flattering 95.68% on accL, vs. 36.97% on accS, and 48.10% on accT Table <ref type="table">2</ref> summarises accS values of all models, by binning them into three FPD ranges.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Data analysis</head><p>To what extent are neural network models sensitive to some of the factors accounting for gaze patterns in human natural reading? Are language models able to adapt themselves to both lexical properties and in-context features of a reading text, thus exhibiting a human-like performance?</p><p>Human reading behaviour is shown to be affected by lexical features -e.g. word length and frequency, and morphological complexity -as well as by contextual factors, with a facilitatory effect of contextual redundancy and predictability <ref type="bibr" target="#b17">(18,</ref><ref type="bibr" target="#b18">19)</ref>  </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 2</head><p>Sensitivity accuracy (accS) values for three bins from the FPD distribution: low (FPD below the 5 𝑡ℎ percentile = 36ms), medium (FPD ranging from the 5 𝑡ℎ to the 95 𝑡ℎ percentile), and high (FPD above the 95 𝑡ℎ percentile = 280ms).</p><p>measure of how unexpected or unpredictable the word is, and the probability of the word immediately preceding the target word in context (to account for so-called spill-over effects). Additionally, we used a Generalised Additive Model (GAM), with token log-frequency as a smooth term, to model for possibly non-linear effects of predictors. Models' coefficients and effect plots are shown in Appendix C (Figure <ref type="figure" target="#fig_3">3</ref> and Table <ref type="table" target="#tab_3">4</ref>). GAMs with identical independent variables have been run to model the FPDs predicted by all our neural networks, on both training and test data. Inspection of effect plots and model coefficients -as reported in Appendix C -shows a behavioural alignment of all models with human data for what concerns the modulation of fixation times by lexical features, in both train and test data. In contrast, all models fail to capture some contextual effects on test data, such as those observed in a context window of -at least -two adjacent words. To illustrate, efficient syntactic chunking (e.g. of noun, verb and prepositional phrases) has been shown to lead to faster and more accurate human reading (see, for example, <ref type="bibr" target="#b19">[20]</ref>). Conversely, most neural networks show no statistically significant effect on fixation duration of the probability of the immediately preceding word in context. This is observed either is isolation (probMinus1) in LSTMs and transformer-based models with BERT representations (either fine-tuned or not), or in interaction with the unpredictability of the target word (surprisal:probMinus1).</p><p>The evidence shows that most neural models cannot replicate, among other things, so-called spillover effects of the left-context on the reading time of ensuing words <ref type="bibr" target="#b20">[21]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">General Discussion</head><p>Transformer-based neural networks appear to reasonably predict fixation probability and first-pass duration of words in human reading of English connected texts.</p><p>Our present investigation basically supports this conclusion, while providing new evidence on two related questions. Two questions naturally arise in this context. How accurate are transformer-based predictions compared with the best predictions of other neural network classifiers trained on the same task? How cognitively plausible are the mechanisms underpinning this perfor-mance? Here, we addressed both questions by testing various models on the task of predicting human reading measurements from the GECO corpus, using different evaluation metrics and regressing network predictions on a few linguistic factors that are known to account for human reading behaviour. Our first observation is that assessing a network's performance by looking at its MAE loss function provides a rather gross evaluation of the effective power of a neural network simulating human reading behaviour. A baseline model assigning each token a constant gaze duration that equals the average of all FPD values attested in GECO achieves a 95.7% loss-based accuracy on both test and training data. That a transformer-based classification scores 97.2% on the same metric and the same test data cannot be held, as such, as a sign of outstanding performance. In fact, it turns out that the MAE loss function is blind to both the magnitude of a network error, and possible biases in the prediction of very low/high target values. Thus, it provides an inflated estimate of a model's accuracy. We suggest that binary evaluation metrics, based on a fixed threshold partially overcome these limitations. Yet, as single word fixation times typically range between tens to hundreds of milliseconds, application of a fixed threshold will differently affect tokens with different fixation times. We conclude that a relative threshold based on each word's fixation time is a fairer way to measure prediction accuracy. Clearly, this comes at a cost. When assessed with a relative threshold, the accuracy of a transformer-based architecture on test data drops from 70% down to 57.8%.</p><p>It turned out that all other network models tested for the present purposes showed accuracy levels that are comparable to the accuracy of a transformer-based architecture. Since the former are trained on a more restricted set of lexical and contextual input features than the latter, this seems to suggest that word embeddings are of limited use in the task at hand. Although fine-tuned word embeddings actually appear to score much higher on training data (even using accT and accS), we observe that this is due to data overfitting, as clearly shown by the considerably poorer performance of the fine-tuned model on test data.</p><p>An analysis of the psychometric plausibility of the gaze patterns simulated with our neural models reveals that a relatively small set of linguistic factors that are known to account for a sizeable amount of variance in human fixation times can also account for the bulk of variance in models' behaviour. This is relatively unsurprising, as most of these models were trained on input features that encode at least some of these factors. Nonetheless, we believe that the result is interesting for at least two reasons. First, it shows a promising convergence between computational metrics of model accuracy and quantitative models of psychometric assessment. Secondly, it sug-gests that one can gain non trivial insights in a model's behaviour by analysing to what extent the behaviour is sensitive to the same linguistic factors human readers are known to be sensitive to. On the one hand, this is a step towards understanding what information a neural model is actually learning and putting to use for the task. On the other hand, this is instrumental in developing better models, as it shows what type of input information is more needed to successfully carry out a task, at least if one is trying to simulate the way the same task is carried out by speakers.</p><p>In the end, it may well be the case that a 70% fixedthreshold accuracy in simulating average gaze patterns in human reading is not as disappointing as it might seem. Given the wide variability in human reading behaviour (and even in a single reader when confronted with different texts), a considerable amount of variance in our data may simply be accounted for by by-subject (or by-token) random effects. In some experiments not reported here we trained our models to predict single-reader behaviour. All architectures fared rather poorly on the task, a result which is in line with similar disappointing results on other output features reported in <ref type="bibr" target="#b9">[10]</ref>. Looking back at Figure <ref type="figure" target="#fig_0">1</ref>, it can be noted that all models' predictions fall into a 𝜇𝑖 ± 𝜎𝑖 range, where 𝜇𝑖 and 𝜎𝑖 are, respectively, the by-reader mean and standard deviation of FPD values for token 𝑖 (see also Table <ref type="table">2</ref>). This pattern may suggest that models' predictions are in fact bounded by the standard deviation we observe in human behaviour and cannot reach out of these bounds. Conversely, this evidence may be interpreted as suggesting that more input features are needed to build more accurate classifiers. Further experiments are needed to test the merits of either conjecture.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6.">Limitations and outlook</head><p>In the present paper, we replicated recent experimental data of transformer-based architectures simulating word fixation duration in reading a connected text <ref type="bibr" target="#b9">[10]</ref>, with a view to assessing their relative performance compared with reading times by humans and other neural architectures. This justifies our exclusive focus on fixation duration, which is, admittedly, only one behavioural correlate of a complex, inherently multimodal task such as reading. In fact, reading requires the fine coordination of eye movements and articulatory movements for text decoding and comprehension. The eye provides access to the visual stimuli needed for voice articulation to unfold at a relatively constant rate. In turn, articulation can feedback oculomotor control for eye movements to be directed when and where processing difficulties arise. Incidentally, this is also true of silent reading as shown by evidence supporting the Implicit Prosody Hypothesis <ref type="bibr" target="#b21">[22]</ref>, i.e. the idea that, in silent reading, readers activate prosodic representations that are similar to those they would produce when reading the text aloud. Hence, a reader must always rely on a tight control strategy to ensure that fixation and articulation are optimally coordinated.</p><p>A clear limitation of our current work and all experiments reported here is that we are only focusing on one dimension of a complex, multimodal behaviour like reading. Recently, we showed that there is a lot about gaze patterns that we can understand by correlating eye movements with voice articulation <ref type="bibr" target="#b22">[23]</ref>. This information, which cannot be represented in a dataset structured at the word level, may be critical for a model to accurately learn and mimic the cognitive mechanisms underlying natural reading. Likewise, as correctly pointed out by one of our reviewers, focusing on fixation times while ignoring saccadic movements may seriously detract from the explanatory power of any computational model of human reading. In fact, this could be tantamount to timing a bike rider's speed, while ignoring if she is climbing up a hill or approaching a sharp turn. More realistic models of reading are bound to include more aspects of reading behaviour in more ecologically valid tasks. In the end, it may well be the case that the task of predicting gaze patterns of human reading should be conceptualized differently, by anchoring these patterns not only to the syntagmatic dimension of a written text, but also to the time-line of the different movements and multimodal processes that unfold during reading. The rightmost box plot shows the average distribution across all 14 participants. Bottom panel: plot of all 56410 tokens in the dataset, in ascending order of mean FPD (dashed black line). For each token, the standard deviation calculated on the distribution of the FPDs of the 14 participants is shown both above and below the mean value (gray dots).  </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A. GeCO FPD data</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>B. FPROP accuracy</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>C. Data analysis</head><p>In this section, coefficients of Generalised Additive Models (GAMs) are detailed for each neural model. Statistical non-significant p-values on GAM predicting terms are given in bold-face. GAMs are fitted using the package gamm4 version 0.2-6 of the R statistical software <ref type="bibr" target="#b23">[24]</ref>, as they do not assume a linear relation between the fitted variable and its predictors. All plots were created via the ggplot2 package, version 3.5.               </p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: Models predictions (red dots) plotted with target FPD values (black dots), after ordering tokens for increasing FPDs. Grey dots represent averaged FPD values plus\minus their standard deviation across participants. Left: training data. Right: test data. From top to bottom: MLP, LSTM, BERT fine-tuned. For each plot, the Spearman-𝜌 correlation coefficient between predicted and target values is shown along with the significance value.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Figure 2 :</head><label>2</label><figDesc>Figure 2:A view of FPD data in the GECO dataset, consisting of eye-tracking patterns of 14 adult participants reading the novel "The Mysterious Affair at Styles" by Agata Christie. Top panel: distributions of FPD data, with chapters grouped into 4 parts, for participant #1 (with 3 more participants showing a similar distribution), participant #2 (with 8 more participants showing a similar distribution) and participant #10. The rightmost box plot shows the average distribution across all 14 participants. Bottom panel: plot of all 56410 tokens in the dataset, in ascending order of mean FPD (dashed black line). For each token, the standard deviation calculated on the distribution of the FPDs of the 14 participants is shown both above and below the mean value (gray dots).</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>&lt;</head><label></label><figDesc>Human FPD parametric coeff. estimate std. error t value pr(&gt;|t|) Intercept (content) 6.960e-02 7.858e-04 88.568 &lt; 2𝑒 − 16 surprisal 1.928e-03 5.002e-05 38.539 &lt; 2𝑒 − 16 probMinus1 -1.395e-02 1.363e-03 -10.233 &lt; 2𝑒 − 16 Intercept (function) -2.599e-02 1.143e-03 -22.746 &lt; 2𝑒 − 16 length (content) 1.562e-02 1.423e-04 109.767 &lt; 2𝑒 − 16 length (function) 5.499e-03 2.791e-04 19.704 &lt; 2𝑒 − 16 surprisal:probMinus1 4.692e-04 1.776e-04 2.642</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_3"><head>Figure 3 :</head><label>3</label><figDesc>Figure 3: Effects of surprisal, probability of the preceding token (probMinus1), word length (len) as predictors, and word log-frequency (logFreq) as a smooth term, on human fixation first-pass duration (fixFPD) as a response variable.</figDesc><graphic coords="8,303.64,112.54,201.32,135.74" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_4"><head>Figure 4 :</head><label>4</label><figDesc>Figure 4: MLP effects in training (top panel) and test (bottom panel) data, with surprisal, probability of the preceding token (probMinus1), word length (len) as predictors, word log-frequency as a smooth term (logFreq), and fixation first-pass duration as response variable.</figDesc><graphic coords="8,303.64,468.83,201.32,135.74" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_5"><head>Figure 5 :</head><label>5</label><figDesc>Figure 5: LSTM effects in training (top panel) and test (bottom panel) data, with surprisal, probability of the preceding token (probMinus1), word length (len) as predictors, word log-frequency as a smooth term (logFreq), and fixation first-pass duration as response variable.</figDesc><graphic coords="9,90.31,467.60,201.32,135.74" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_6"><head>Figure 6 :</head><label>6</label><figDesc>Figure 6: fine-tuned BERT effects in training (top panel) and test (bottom panel) data, with surprisal, probability of the preceding token (probMinus1), word length (len) as predictors, word log-frequency as a smooth term (logFreq), and fixation first-pass duration as response variable.</figDesc><graphic coords="9,303.64,477.05,201.32,135.74" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_7"><head>Figure 7 :</head><label>7</label><figDesc>Figure 7: untuned BERT effects in training (top panel) and test (bottom panel) data, with surprisal, probability of the preceding token (probMinus1), word length (len) as predictors, word log-frequency as a smooth term (logFreq), and fixation first-pass duration as response variable.</figDesc><graphic coords="10,90.31,477.05,201.32,135.74" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>Overall FPD prediction accuracy in the GECO dataset. For each model, three different accuracy scores are given as described in the text; const is used as a baseline; highest accuracies in bold; lowest accuracies in italics.</figDesc><table><row><cell>on reading duration and eye</cell></row><row><cell>fixations. Accordingly, we modelled human FPDs as a</cell></row><row><cell>response variable resulting from the interaction of both</cell></row><row><cell>lexical and contextual predictors: namely, word length,</cell></row><row><cell>a dichotomous classification of token POS into content</cell></row><row><cell>versus function words, surprisal of the target word as a</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 3</head><label>3</label><figDesc>Accuracy values of neural models predicting the fixation probabilities of the GECO dataset. For each model three different accuracy metrics are used, as described in the paper. The "const" model was used as a baseline; highest accuracy scores are highlighted in bold: lowest scores are shown in italic</figDesc><table /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Table 4</head><label>4</label><figDesc>GAM coefficients fitting human fixation FPD: FPD ∼ surprisal × probMinus1 + POSgroup × wordlength + s(logFreq).</figDesc><table /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_5"><head>Table 5 GAM</head><label>5</label><figDesc></figDesc><table /><note>coefficients fitting MLP fixation FPD in training (top) and test (bottom) data: FPD ∼ surprisal × probMinus1 + POSgroup × wordlength + s(logFreq).</note></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_7"><head>Table 6 GAM</head><label>6</label><figDesc></figDesc><table /><note>coefficients fitting LSTM fixation FPD in training (top) and test (bottom) data: FPD ∼ surprisal × probMinus1 + POSgroup × wordlength + s(logFreq).</note></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_8"><head></head><label></label><figDesc>Intercept (content) 6.950e-02 8.572e-04 81.075 &lt; 2𝑒 − 16 surprisal 2.013e-03 5.446e-05 36.9562 &lt; 2𝑒 − 16 probMinus1 -1.475e-02 1.483e-03 -9.9416 &lt; 2𝑒 − 16 Intercept (function) -2.631e-02 1.248e-03 -21.0852 &lt; 2𝑒 − 16</figDesc><table><row><cell></cell><cell>BERT+fine-tuning FPD</cell><cell></cell></row><row><cell>parametric coeff.</cell><cell>estimate std. error t value</cell><cell>pr(&gt;|t|)</cell></row><row><cell>length (content)</cell><cell cols="2">1.570e-02 1.550e-04 101.307 &lt; 2𝑒 − 16</cell></row><row><cell>length (function)</cell><cell cols="2">5.528e-03 3.046e-04 18.148 &lt; 2𝑒 − 16</cell></row><row><cell cols="2">surprisal:probMinus1 5.024e-04 1.937e-04 2.594</cell><cell>&lt; 0.01</cell></row><row><cell>s(logFreq)</cell><cell></cell><cell>&lt; 2𝑒 − 16</cell></row><row><cell>R 2</cell><cell>57.5%</cell><cell></cell></row><row><cell cols="3">Intercept (content) 0.0714503 0.0022332 31.99 &lt; 2𝑒 − 16</cell></row><row><cell>surprisal</cell><cell cols="2">0.0014206 0.0001441 9.859 &lt; 2.3𝑒 − 13</cell></row><row><cell>probMinus1</cell><cell>-0.0017461 0.0038742 -0.451</cell><cell>0.65</cell></row><row><cell cols="3">Intercept (function) -0.0239773 0.0031336 -7.652 &lt; 2.7𝑒 − 14</cell></row><row><cell>length (content)</cell><cell cols="2">1.707e-02 2.499e-04 68.321 &lt; 2𝑒 − 16</cell></row><row><cell>length (function)</cell><cell>1.579e-03 4.627e-04 3.411</cell><cell>&lt; 0.001</cell></row><row><cell cols="2">surprisal:probMinus1 -5.244e-04 3.561e-04 -1.473</cell><cell>0.14</cell></row><row><cell>s(logFreq)</cell><cell></cell><cell>&lt; 2𝑒 − 16</cell></row><row><cell>R 2</cell><cell>78.4%</cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_9"><head>Table 7</head><label>7</label><figDesc>GAM coefficients fitting BERT+fine-tuning fixation FPD in training (top) and test (bottom) data: FPD ∼ surprisal × probMinus1 + POSgroup × wordlength + s(logFreq).</figDesc><table /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_10"><head></head><label></label><figDesc>Intercept (content) 9.626e-02 4.765e-04 202.020 &lt; 2𝑒 − 16 surprisal 1.319e-03 3.027e-05 43.586 &lt; 2𝑒 − 16 probMinus1 -4.998e-03 8.245e-04 -6.0616 &lt; 1.3𝑒 − 09 Intercept (function) -2.293e-02 6.937e-04 -33.053 &lt; 2𝑒 − 16</figDesc><table><row><cell></cell><cell>BERT FPD</cell><cell></cell></row><row><cell>parametric coeff.</cell><cell>estimate std. error t value</cell><cell>pr(&gt;|t|)</cell></row><row><cell>length (content)</cell><cell cols="2">1.019e-02 8.616e-05 118.232 &lt; 2𝑒 − 16</cell></row><row><cell>length (function)</cell><cell cols="2">2.892e-03 1.693e-04 17.0848 &lt; 2𝑒 − 16</cell></row><row><cell cols="2">surprisal:probMinus1 -3.874e-04 1.077e-04 -3.599</cell><cell>&lt; 0.001</cell></row><row><cell>s(logFreq)</cell><cell></cell><cell>&lt; 2𝑒 − 16</cell></row><row><cell>R 2</cell><cell>75.6%</cell><cell></cell></row><row><cell cols="3">Intercept (content) 0.0960782 0.0021829 44.014 &lt; 2𝑒 − 16</cell></row><row><cell>surprisal</cell><cell cols="2">0.0012786 0.0001409 9.073 &lt; 2.3𝑒 − 13</cell></row><row><cell>probMinus1</cell><cell>-0.0013508 0.0037907 -0.356</cell><cell>0.72</cell></row><row><cell cols="3">Intercept (function) -0.0192904 0.0030629 -6.298 &lt; 3.4𝑒 − 10</cell></row><row><cell>length (content)</cell><cell cols="2">0.0102735 0.0003941 26.069 &lt; 2𝑒 − 16</cell></row><row><cell>length (function)</cell><cell>0.0027876 0.0007299 3.819</cell><cell>&lt; 0.001</cell></row><row><cell cols="2">surprisal:probMinus1 -0.0008111 0.0004600 -1.763</cell><cell>0.08</cell></row><row><cell>s(logFreq)</cell><cell></cell><cell>&lt; 2𝑒 − 16</cell></row><row><cell>R 2</cell><cell>73.5%</cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_11"><head>Table 8</head><label>8</label><figDesc>GAM coefficients fitting BERT fixation FPD for the training (top) and test (bottom) settings: FPD ∼ surprisal × probMi-nus1 + POSgroup × wordlength + s(logFreq).</figDesc><table /></figure>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgments</head><p>The present study has partly been funded by the Read-Ground research grant from the National Research Council (CNR), and the ReMind and Braillet PRIN grants, from the Ministry of University and Research (MUR). Alessandro Lento is a PhD student enrolled in the National PhD in Artificial Intelligence, XXXVII cycle, course on Health and Life sciences, organized by Università Campus Bio-Medico in Rome. Nadia Khlif is a PhD student in the Computer Science Research Laboratory, Faculty of Sciences, at the University Mohammed First of Oujda, Morocco. Andrea Nadalini's work is kindly covered by the "RAISE -Robotics and AI for Socio-economic Empowerment" grant (ECS00000035), funded by the European Union -NextGenerationEU and by the Ministry of University and Research (MUR), National Recovery and Resilience Plan (NRRP), Mission 4, Component 2, Investment 1.5.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Reading development, word length and frequency effects: An eye-tracking study with slow and fast readers</title>
		<author>
			<persName><forename type="first">S</forename><surname>Gerth</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Festman</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Frontiers in Communication</title>
		<imprint>
			<biblScope unit="volume">6</biblScope>
			<biblScope unit="page">743113</biblScope>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Eye movements of children and adults reading in three different orthographies</title>
		<author>
			<persName><forename type="first">S</forename><surname>Schroeder</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Häikiö</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Pagán</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">H</forename><surname>Dickins</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Hyönä</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">P</forename><surname>Liversedge</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Experimental Psychology: Learning, Memory, and Cognition</title>
		<imprint>
			<biblScope unit="volume">48</biblScope>
			<biblScope unit="page">1518</biblScope>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">A study on surprisal and semantic relatedness for eye-tracking data prediction</title>
		<author>
			<persName><forename type="first">L</forename><surname>Salicchi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Chersoni</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Lenci</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Frontiers in Psychology</title>
		<imprint>
			<biblScope unit="volume">14</biblScope>
			<biblScope unit="page">1112365</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Punctuation and intonation effects on clause and sentence wrap-up: Evidence from eye movements</title>
		<author>
			<persName><forename type="first">M</forename><surname>Hirotani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Frazier</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Rayner</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Memory and Language</title>
		<imprint>
			<biblScope unit="volume">54</biblScope>
			<biblScope unit="page" from="425" to="443" />
			<date type="published" when="2006">2006</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">The E-Z Reader model of eye-movement control in reading: Comparisons to other models</title>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">D</forename><surname>Reichle</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Rayner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Pollatsek</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Behavioral and Brain Sciences</title>
		<imprint>
			<biblScope unit="volume">26</biblScope>
			<biblScope unit="page" from="445" to="476" />
			<date type="published" when="2003">2003</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">SWIFT: A Dynamical Model of Saccade Generation During Reading</title>
		<author>
			<persName><forename type="first">R</forename><surname>Engbert</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Nuthmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Richter</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Kliegl</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Psychological review</title>
		<imprint>
			<biblScope unit="volume">112</biblScope>
			<biblScope unit="page" from="777" to="813" />
			<date type="published" when="2005">2005</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Presenting GECO: An eyetracking corpus of monolingual and bilingual sentence reading</title>
		<author>
			<persName><forename type="first">U</forename><surname>Cop</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Dirix</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Drieghe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Duyck</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Behavior Research Methods</title>
		<imprint>
			<biblScope unit="volume">49</biblScope>
			<biblScope unit="page" from="602" to="615" />
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">ZuCo, a simultaneous EEG and eye-tracking resource for natural sentence reading</title>
		<author>
			<persName><forename type="first">N</forename><surname>Hollenstein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Rotsztejn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Troendle</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Pedroni</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Langer</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Scientific Data</title>
		<imprint>
			<biblScope unit="volume">5</biblScope>
			<biblScope unit="page">180291</biblScope>
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Expanding horizons of cross-linguistic research on reading: The Multilingual Eye-movement Corpus (MECO)</title>
		<author>
			<persName><forename type="first">N</forename><surname>Siegelman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Schroeder</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Acartürk</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H.-D</forename><surname>Ahn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Alexeeva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Amenta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Bertram</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Bonandrini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Brysbaert</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Chernova</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">M</forename><surname>Da Fonseca</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Dirix</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Duyck</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Fella</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Frost</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">A</forename><surname>Gattei</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Kalaitzi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Kwon</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Lõo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Marelli</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">C</forename><surname>Papadopoulos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Protopapas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Savo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Shalom</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Slioussar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Stein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Sui</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Taboh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Tønnesen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">A</forename><surname>Usal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Kuperman</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Behavior Research Methods</title>
		<imprint>
			<biblScope unit="volume">54</biblScope>
			<biblScope unit="page" from="2843" to="2863" />
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Multilingual language models predict human reading behavior</title>
		<author>
			<persName><forename type="first">N</forename><surname>Hollenstein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Pirovano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Jäger</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Beinborn</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
				<meeting>the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</meeting>
		<imprint>
			<date type="published" when="2021">2021</date>
			<biblScope unit="page" from="106" to="123" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">PyTorch 2: Faster Machine Learning Through Dynamic Python Bytecode Transformation and Graph Compilation</title>
		<author>
			<persName><forename type="first">J</forename><surname>Ansel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Yang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>He</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Gimelshein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Jain</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Voznesensky</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Bao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Bell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Berard</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Burovski</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Chauhan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Chourdia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Constable</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Desmaison</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Devito</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Ellison</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Feng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Gong</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Gschwind</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Hirsh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Huang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Kalambarkar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Kirsch</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Lazos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Lezcano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Liang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Liang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Lu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">K</forename><surname>Luk</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Maher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Pan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Puhrsch</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Reso</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Saroufim</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">Y</forename><surname>Siraichi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Suk</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Suo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Tillet</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Zhao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Zhou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Zou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Mathews</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Wen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Chanan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Wu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Chintala</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems</title>
				<meeting>the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems</meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2024">2024</date>
			<biblScope unit="volume">2</biblScope>
			<biblScope unit="page" from="929" to="947" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title/>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">M</forename><surname>Inc</surname></persName>
		</author>
		<idno>.0.1190202</idno>
	</analytic>
	<monogr>
		<title level="j">Matlab version</title>
		<imprint>
			<biblScope unit="volume">9</biblScope>
			<biblScope unit="issue">7</biblScope>
			<date type="published" when="2019">r2019b. 2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<monogr>
		<title level="m" type="main">The british national corpus</title>
		<author>
			<persName><forename type="first">B</forename><surname>Consortium</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2007">2007</date>
		</imprint>
	</monogr>
	<note>xml edition</note>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Stanza: A Python natural language processing toolkit for many human languages</title>
		<author>
			<persName><forename type="first">P</forename><surname>Qi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Bolton</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">D</forename><surname>Manning</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations</title>
				<meeting>the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations</meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<monogr>
		<title level="m" type="main">Language models are unsupervised multitask learners</title>
		<author>
			<persName><forename type="first">A</forename><surname>Radford</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Wu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Child</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Luan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Amodei</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Sutskever</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<monogr>
		<title level="m" type="main">Do language models make human-like predictions about the coreferents of italian anaphoric zero pronouns?</title>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">A</forename><surname>Michaelov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">K</forename><surname>Bergen</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2208.14554</idno>
		<imprint>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b16">
	<monogr>
		<title level="m" type="main">BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding</title>
		<author>
			<persName><forename type="first">J</forename><surname>Devlin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M.-W</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Toutanova</surname></persName>
		</author>
		<idno>ArXiv:1810.04805</idno>
		<imprint>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
	<note>cs. version: 2</note>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Attentional and automatic context effects in reading</title>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">E</forename><surname>Stanovich</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Interactive processes in reading</title>
				<imprint>
			<publisher>Routledge</publisher>
			<date type="published" when="2017">2017</date>
			<biblScope unit="page" from="241" to="267" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">Lexical and sentence context effects in word recognition</title>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">B</forename><surname>Simpson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">R</forename><surname>Peterson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>Casteel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Burgess</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Experimental Psychology: Learning, Memory, and Cognition</title>
		<imprint>
			<biblScope unit="volume">15</biblScope>
			<biblScope unit="page">88</biblScope>
			<date type="published" when="1989">1989</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">Eye movements as reflections of comprehension processes in reading</title>
		<author>
			<persName><forename type="first">K</forename><surname>Rayner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">H</forename><surname>Chace</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">J</forename><surname>Slattery</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Ashby</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Scientific studies of reading</title>
		<imprint>
			<biblScope unit="volume">10</biblScope>
			<biblScope unit="page" from="241" to="255" />
			<date type="published" when="2006">2006</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">The effect of word predictability on reading time is logarithmic</title>
		<author>
			<persName><forename type="first">N</forename><forename type="middle">J</forename><surname>Smith</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Levy</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Cognition</title>
		<imprint>
			<biblScope unit="volume">128</biblScope>
			<biblScope unit="page" from="302" to="319" />
			<date type="published" when="2013">2013</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<analytic>
		<title level="a" type="main">Empirical investigations of the role of implicit prosody in sentence processing</title>
		<author>
			<persName><forename type="first">M</forename><surname>Breen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Language and Linguistics Compass</title>
		<imprint>
			<biblScope unit="volume">8</biblScope>
			<biblScope unit="page" from="37" to="50" />
			<date type="published" when="2014">2014</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<monogr>
		<title level="m" type="main">Eye-voice and finger-voice spans in adults&apos; oral reading of connected texts. Implications for reading research and assessment, The Mental Lexicon</title>
		<author>
			<persName><forename type="first">A</forename><surname>Nadalini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Marzi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Ferro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Taxitari</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Lento</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Crepaldi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Pirrelli</surname></persName>
		</author>
		<ptr target="https://benjamins.com/catalog/ml.00025.nad" />
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<monogr>
		<author>
			<persName><forename type="first">Team</forename><surname>Core</surname></persName>
		</author>
		<ptr target="https://www.R-project.org/" />
		<title level="m">R: A Language and Environment for Statistical Computing, R Foundation for Statistical Computing</title>
				<meeting><address><addrLine>Vienna, Austria</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
