<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">The Self-Contained Italian Negation Test (SCIN)</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Viola</forename><surname>Gullace</surname></persName>
							<email>viola.gullace@sns.it</email>
							<affiliation key="aff0">
								<orgName type="laboratory">Lattice</orgName>
								<orgName type="institution">CNRS &amp; ENS-PSl &amp; U. Sorbonne-Nouvelle</orgName>
								<address>
									<addrLine>1 rue Maurice Arnoux</addrLine>
									<postCode>F-92120</postCode>
									<settlement>Montrouge</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department" key="dep1">Dipartimento di Filologia</orgName>
								<orgName type="department" key="dep2">Letteratura e Linguistica</orgName>
								<orgName type="laboratory">CoLing Lab</orgName>
								<orgName type="institution">Università di Pisa</orgName>
								<address>
									<addrLine>Via Santa Maria</addrLine>
									<postCode>56126</postCode>
									<settlement>Pisa</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="institution">Scuola Normale Superiore</orgName>
								<address>
									<addrLine>Piazza dei Cavalieri 7</addrLine>
									<postCode>56126</postCode>
									<settlement>Pisa</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">David</forename><surname>Kletz</surname></persName>
							<email>david.kletz@sorbonne-nouvelle.fr</email>
							<affiliation key="aff0">
								<orgName type="laboratory">Lattice</orgName>
								<orgName type="institution">CNRS &amp; ENS-PSl &amp; U. Sorbonne-Nouvelle</orgName>
								<address>
									<addrLine>1 rue Maurice Arnoux</addrLine>
									<postCode>F-92120</postCode>
									<settlement>Montrouge</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="laboratory">LLF</orgName>
								<orgName type="institution" key="instit1">CNRS</orgName>
								<orgName type="institution" key="instit2">Université Paris Cité</orgName>
								<address>
									<addrLine>8 Rue Albert Einstein</addrLine>
									<postCode>75013</postCode>
									<settlement>Paris</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Thierry</forename><surname>Poibeau</surname></persName>
							<email>thierry.poibeau@ens.psl.eu</email>
							<affiliation key="aff0">
								<orgName type="laboratory">Lattice</orgName>
								<orgName type="institution">CNRS &amp; ENS-PSl &amp; U. Sorbonne-Nouvelle</orgName>
								<address>
									<addrLine>1 rue Maurice Arnoux</addrLine>
									<postCode>F-92120</postCode>
									<settlement>Montrouge</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Alessandro</forename><surname>Lenci</surname></persName>
							<email>alessandro.lenci@unipi.it</email>
							<affiliation key="aff1">
								<orgName type="department" key="dep1">Dipartimento di Filologia</orgName>
								<orgName type="department" key="dep2">Letteratura e Linguistica</orgName>
								<orgName type="laboratory">CoLing Lab</orgName>
								<orgName type="institution">Università di Pisa</orgName>
								<address>
									<addrLine>Via Santa Maria</addrLine>
									<postCode>56126</postCode>
									<settlement>Pisa</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Pascal</forename><surname>Amsili</surname></persName>
							<email>pascal.amsili@ens.fr</email>
							<affiliation key="aff0">
								<orgName type="laboratory">Lattice</orgName>
								<orgName type="institution">CNRS &amp; ENS-PSl &amp; U. Sorbonne-Nouvelle</orgName>
								<address>
									<addrLine>1 rue Maurice Arnoux</addrLine>
									<postCode>F-92120</postCode>
									<settlement>Montrouge</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">The Self-Contained Italian Negation Test (SCIN)</title>
					</analytic>
					<monogr>
						<imprint>
							<date/>
						</imprint>
					</monogr>
					<idno type="MD5">361F4F390231AE5E0DF789F24CB35950</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T17:34+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>negation</term>
					<term>Italian PLMs</term>
					<term>testing</term>
					<term>self-contained</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Recent research has focused extensively on state-of-the-art pretrained language models, particularly those based on Transformer architectures, and how well they account for negation and other linguistic phenomena in various tasks. This study aims to evaluate the understanding of negation in Italian bert-and robert-based models, contrasting the predominant Englishfocused prior research. We develop the SCIN Set, an Italian dataset designed to model the influence of polarity constraints on models in a masked predictions task. Applying the SCIN Set reveals that these models do not adjust their behaviour based on sentences polarity, even when the resulting sentence is contradictory. We conclude that the tested models lack a clear understanding of how negation alters sentence meaning.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>Compositionality is a fundamental feature of human language, based on the principle that the meaning of a complex expression derives from its parts and their respective arrangements.</p><p>One notable compositional phenomenon is negation, formally defined as a semantic operator (or function) that reverses the truth-value of a sentence <ref type="bibr" target="#b0">[1]</ref>.</p><p>Given its importance, understanding how well pretrained language models (PLMs) can grasp and apply this principle is crucial.</p><p>These models achieve impressive performance across a wide array of language modeling tasks. Nonetheless, they often reveal to rely on shallow heuristics or exhibit other issues in handling specific aspects of language.</p><p>A prominent bias in the body of research is that the vast majority of research on language models has predominantly concentrated on English. This focus raises concerns about the generalizability of findings to other languages which may be structurally different from English. Conducting similar experiments in other languages could provide valuable context and material for compar-ison, potentially highlighting language-specific effects or revealing new generalization. Therefore, we decide to undertake a new experiment focusing on Italian negation.</p><p>Thus, in this article, we aim to explore whether the behavior of PLMs accurately models the polarity of sentences. We will investigate how the addition of negation to a sentence can alter its overall meaning (demonstrating the models' capability to handle shifts in meaning due to structural changes).</p><p>Given the limitations explained above, our work has deliberately chosen to concentrate on Italian. This choice not only addresses the need to explore how these models perform with languages other than English but also serves as a critical test for PLMs dedicated to Italian. We suspect that these models may not be as advanced or effective as their English counterparts, highlighting the need for further developments outside English.</p><p>We adapt the test set developed for English by Kletz et al. <ref type="bibr" target="#b1">[2]</ref> to Italian, creating the Self-Contained Italian Neg Set (SCIN Set). Using the dataset to evaluate bertand roberta-based models for Italian, we find that these models are unable to adjust their prediction in response to constraints posed by negation, often generating contradictory text.</p><p>The article will be structured as follows. The rest of Section 1 will introduce compositional phenomena and Italian negation in particular. Section 2 will briefly review related work. Section 3 will detail the composition of the SCIN Set. Section 4 will present the tests conducted on several bert-based Italian models using the SCIN Set; in particular, we tested the following bert-base-cased models:</p><p>• bert-base for Italian, both in its basic and its XXL versions (bert-base-italian-cased,</p><formula xml:id="formula_0">CEUR Workshop Proceedings ceur-ws.org ISSN 1613-0073 bert-base-italian-xxl-cased) 1 [3], • m-bert (multilingual bert) 2 [4],</formula><p>• alb3rt0<ref type="foot" target="#foot_2">3</ref>  <ref type="bibr" target="#b4">[5]</ref>, and • UmBERTo<ref type="foot" target="#foot_3">4</ref>  <ref type="bibr" target="#b5">[6]</ref>. Section 5 will discuss the results, followed by a final section containing our general conclusions and ideas for further research.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Related work</head><p>Although negation plays an essential role in human communication, it appears to present challenges for PLMs. In recent years, much research has focused on this topic.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1.">Effect of negation on the model's prediction</head><p>Kassner and Schütze <ref type="bibr" target="#b6">[7]</ref> and Ettinger <ref type="bibr" target="#b7">[8]</ref> analyzed to what extent Transformer-based language models' predictions are sensitive to the presence or absence of negation in sentences involving factual knowledge, such as (1-a-b):</p><p>( They found that in such pairs the top-1 predictions are unchanged most of the time: models do not seem to take into account the polarity of the environment (presence or absence of a negation in the surrounding sentence) to adapt their predictions. They concluded that models do not deal correctly with negation. Gubelmann and Handschuh <ref type="bibr" target="#b8">[9]</ref> criticized such studies, noting in particular that the pragmatic component was overlooked in Ettinger's experiments. They noted that a statement containing a negation stating a false fact (for example, Birds cannot fly) can be more plausible than a formally true but unusual statement (say, Birds cannot breastfeed). In fact, a vast number of words could potentially fit the negative statement, making it true, many of them with little association with the rest of the sentence. This makes it challenging for any single word to become the top prediction in the negative case.</p><p>Gubelmann and Handschuh <ref type="bibr" target="#b8">[9]</ref> developed a more pragmatically informed test set, in which each instance is (in <ref type="bibr" target="#b1">[2]</ref>'s terms) self-contained. This means that each item in the set includes some context information, allowing direct evaluation of the model's completion. Building on this work, <ref type="bibr" target="#b1">[2]</ref> developed the Self-Contained Neg Test, which aimed to address some issues in the test set from <ref type="bibr" target="#b8">[9]</ref> and more accurately determine the model's handling of negation without interference of world knowledge.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2.">The Self-Contained Neg Test</head><p>The Self-Contained Neg Test, developed by Kletz et al. <ref type="bibr" target="#b1">[2]</ref>, is a set of pairs of sentences consisting of a context (C) and a target (T) sentence, either positive (p) or negative (n). The target sentence contains a masked position, syntactically constrained to be filled by a verb (2).</p><p>(2)</p><p>Jessica is an architect who likes to dance. She isn't happy to <ref type="bibr">[MASK]</ref>.</p><p>The instances are designed in such a way that a model that predicts (in the masked position of T) the last verb of C will produce a semantically well-formed paragraph only if C and T have the same polarity. For instance, in (2), the context is positive (Cp), the target is negative (Tn), and as a consequence a model predicting dance in the masked position produces an ill-formed paragraph:</p><p>(3) #Jessica is an architect who likes to dance. She isn't happy to dance.</p><p>In contrast, a CnTn version of (3) would accept the verb dance in the same position:</p><p>(4) Jessica is an architect who doesn't like to dance. She isn't happy to dance.</p><p>To produce the sentences of the set, the pattern ( <ref type="formula">5</ref>) is taken as a starting point, where NAME and PRON are substituted with a proper noun and a compatible third person pronoun, PRO is substituted with a profession name, and ACT is substituted with an action verb.</p><p>(5) NAME is a PROF who likes/doesn't like to ACT. PRON is/isn't happy to <ref type="bibr">[MASK]</ref>.</p><p>A large number of triplets (NAME, PRO, ACT) are tested with each model, and the ones that are retained are the ones such that the model's top one prediction is the ACT verb itself when C and T are both positive (CpTp). Here for instance, assuming that (6) are a model's predictions, the triplet (Jessica, architect, dance) would be retained while the triplet (Luke, janitor, swim) would not.</p><p>(6) a. Jessica is an architect who likes to dance. She is happy to dance. b. Luke is a janitor who likes to swim. He is happy to ski.</p><p>Once triplets have been selected (the set of all triplets such that the ACT verb is repeated in CpTp instances), CpTn and CnTp instances can be formed, and the expectation is that a model that "understands" negation should not predict the ACT verb in those cases since it would lead to contradictory instances. As a control, two additional confirgurations are considered: CnTn where it is expected that the repetition of ACT is possible (though not required), and CpTv in which an adverb (very) is inserted in the positive target, which should not change the preferred prediction of ACT since both sentences are positive. The different configurations are illustrated below. <ref type="bibr" target="#b6">(7)</ref> CpTp Jessica is an architect who likes to dance.</p><p>She </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">SCIN construction</head><p>In Italian, negation is most commonly expressed by the negative invariable proclitic non (not) <ref type="bibr" target="#b9">[10]</ref>.</p><p>It is this expression of negation that we use for the Italian adaptation of the Self-Contained Neg Test that we present in this section: the SCIN set.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.">Italian patterns</head><p>Following the preparation of the Self-Contained Neg Test, we collect a list of Italian verbs, professions and names that will be used to create the triplets to be tested. The verbs are taken from the Dizionario Italiano Sabatini Coletti 2022 (online version); only the intransitive (3138 verbs) are retained; among these, for each of the tested models we further exclude the verbs that are not tokenized as a single token. The selected names are the 100 most popular in Italy in 2024 <ref type="foot" target="#foot_4">5</ref> . Lastly, the professions are taken from a site specializing in job searches in Italy <ref type="foot" target="#foot_5">6</ref> ; of those present on the site, only those consisting of a single word have been selected.</p><p>The patterns cannot simply be a direct translation of English patterns into Italian. In fact, for the test to be adequate for evaluating models, we need the masked position to be syntactically constrained to be a verb. This would not be the case if we used a direct translation of the original sentences: for example, the sequence (8) can be completed with the token "questo" ( = PRON is happy to do this). <ref type="bibr" target="#b7">(8)</ref> NAME è un PROF che ama ACT. È felice di MASK.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>NAME is a PROF who loves to ACT. (PRON) is happy to MASK.</head><p>We choose instead to rely on the pair (9), involving a semantic inference relation. <ref type="bibr" target="#b8">(9)</ref> ha l'abitudine di / molto spesso is used to / very often</p><p>The final form of the SCIN set is available in table 1. The shape of the contexts is given in row 1, that of the targets in row 2, and the test target Tv is added in row 3.</p><p>Our assumption is that, if the model repeats the ACT token in the CpTp configuration, it is proof that the model has resolved the ha l'abitudine di / molto spesso inference. When confronted with the CpTn or CnTp configuration, the model should have the addition of the negation as the only element that can explain the modification of its predictions. Finally, the CpTv control allows us to check the extent to which the addition of a different, non-negative adverb in the sequence modifies the model's predictions; we can assume that any modification of greater magnitude than that associated to CpTv are due to the influence of negation.</p><p>The complete list of new patterns is available in Table <ref type="table" target="#tab_2">1</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.">Pattern selection</head><p>The triplets (name, profession, verb) used for testing are selected by testing them on the CpTp configuration: only triplets leading to a repetition of the ACT token are retained (see Table <ref type="table">2</ref>). This ensures that only patterns for which the model is already biased towards repetition are tested, and the model has to understand the influence of negation on sentence semantics to reverse this tendency. All available triplets are tested, i.e. all configurations between verbs monotokenized by the model, first names and occupations selected in subsection 3.1. As tokenization is model-dependent, the number of verbs tested is not the same for each model: details are available in the first row of table <ref type="table" target="#tab_3">3</ref>.</p><p>The results of this test are available in table <ref type="table" target="#tab_3">3</ref>. The results are highly model-dependent: while the bert-base-italian-cased model predicts the ACT token in almost 25% of cases, this is the case in only 0.03% of cases for alb3rt0.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Testing</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Setup</head><p>Tests are performed as in Kletz et al. <ref type="bibr" target="#b10">[11]</ref>. Contexts (C) and targets (T) are combined to create two test patterns CpTn, CnTp; in addition to these two, the test includes two control patterns CnTn and CpTv where the repetition of the ACT verb is not contradictory.</p><p>All selected triplets are then used to saturate the patterns, and the resulting patterns are provided as inputs to pol.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>C(ontext)</head><p>T(arget) Retained?</p><formula xml:id="formula_1">b-b-italian-c fuma ✓ b-b-italian-xxl-c fuma ✓ m-bert balla no alb3rt0</formula><p>parla no</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 2</head><p>An example of selecting a triplet for testing. A NAME/PROF/VERB triplet is used to saturate the CpTp pattern of SCIN. The sequence contains a mask and is used as input to a PLM. If the model prediction is the ACT token, the triplet is retained (indicated by the ✓ symbol). In the name of the models given as examples, "b-b" means bert-base, "it" stands for italian and "c" for cased. the models. Predictions at masked positions are collected.</p><p>We use drop as a measure of the models' performance: for each pattern, given the rate 𝑡𝑟 of repetitions of the Act Token in the predictions, the drop is defined as 100 − 𝑡𝑟. The higher the drop for the CpTn and CnTp patterns and the lower for the CnTn and CpTv controls, the better the model has understood the negation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Results and Discussion</head><p>Results are shown in table <ref type="table" target="#tab_4">4</ref>.</p><p>In contrast with the observations made by <ref type="bibr" target="#b7">[8]</ref> and <ref type="bibr" target="#b6">[7]</ref>, the models are not insensitive to the presence of negation in a sentence: all the models show a drop in both configurations CpTn and CnTp, showing an adaptation of their predictions to the presence of a negation cue. This observation is confirmed by the fact that the drops in the CpTv control are always lower than those observed in CpTn or CnTp.</p><p>This shows that simply adding an adverb is not sufficient to change the model's predictions. While we cannot definitively attribute this to its logical function, the negation marker does exert a distinct influence.</p><p>Nevertheless, it is important to emphasize the very clear limitations of these results. Firstly, the drops never exceed 25%, meaning that 75% of the times the model predicts a semantically prohibited token. On the other hand, with the exception of m-bert, all the models have a highe drop for the CnTn control than for the CnTp configuration, thus indicating that even though the models have acquired a certain understanding of negation, this remains superficial and does not, for example, clearly include an understanding of the positive value of a double negation.</p><p>A broader examination of the results reveals that while the drops in CpTn and CnTp configurations increase together, the CnTn controls also show a corresponding increase.</p><p>Finally, the training corpus of the models seems to have an influence on their performance. For example, note that the alb3rt0 model is the model obtaining the results least in line with our expectations, while bert-base-italian-xxl-cased and bert-base-italian-cased had better drop values, with the former performing better than the latter. However, these three models have identical numbers of layers, attention heads and hidden sizes, the difference between them only consisting in their training data. The alb3rt0 model was trained exclusively on tweets, which likely limits the diversity of its data, particularly with respect negation. In contrast, bert-base-italian-cased and bert-base-italian-xxl-cased models were trained on more varied corpora, with the latter featuring a larger dataset.</p><p>In the future, this should lead us to study the correlation between the performance of the models and the fine-grained distribution of negative and affirmative contexts in their training corpus.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Comparison with English</head><p>In this section we compare the results obtained with the SCIN Set with those observed by <ref type="bibr" target="#b1">[2]</ref> in English. Drops of Italian pretrained language models on the SCIN Set, for each pattern type. In the two first rows, a high number is expected -the higher number of each row in bold face; in the two last rows, a lower number is expected. In the column titles "b-b" means bert-base, "it" stands for italian and "c" for cased</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Model</head><formula xml:id="formula_2">b-b-it-c b-b-it-xxl m-</formula><p>The scale of the drops in the two articles is notably very different: the maximum drop observed in Italian is 23% (CpTn m-bert), while in English it's 82.8%. Similarly, the CpTv drops of Italian-speaking models hardly exceed 15%, while those of English-speaking models are never less than 25%.</p><p>On the other hand, model architecture and type of training do not seem to have a major influence: Umberto has the same architecture as roberta-base, but while the latter is the best performing model in <ref type="bibr" target="#b1">[2]</ref>, the former's drops are the lowest for all configurations of the SCIN Set. Conversely, the other Italian models are built with the same architecture as bert-base-cased, i.e. the worst performing model for English; however, even the worst performing Italian model, namely alb3rt0, features higher drops than bert-base-cased. This confirms the observation from the previous section, that while architecture is indeed a limiting criterion, training data probably plays a significant role.</p><p>In general, we note that none of these models, neither for Italian nor for English, shows definitive drops compatible with a full understanding of the semantic constraints of negation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6.">Conclusion</head><p>In this paper, we investigated the ability of several Italian PLMs to take negation into account in their predictions. To do this, we adapted to Italian the Self-Contained Neg Test proposed by Kletz et al. <ref type="bibr" target="#b1">[2]</ref>, which is based on minimal pairs of aligned sentences.</p><p>Applying this test to six models enabled us to show that negation modifies their predictions, but that this does not happen consistently or in a way that is always coherent with the semantic effect that we expect negation to have on sentences. These results suggest a strong need to adapt these models to make them more sensitive to negation and its semantic consequences. Nevertheless, we also noted a fairly marked difference in performance from one model to another, correlated with the different corpora used to train them. We thus suggest that a lexical and statistical study of these corpora could shed further light on the behavior of the models.</p><p>Lastly, it would be interesting to compare these results with the performance of generative models, in order to study the relative importance of the number of model parameters in relation to their architecture.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A. Verb statistics by PLM</head><p>Details of the number of monotokenised intransitive verbs available for each PLM tested are available in table 5. Detail of the number of Italian intransitive verbs tokenised as a single token for each of the Italian models tested.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 1</head><label>1</label><figDesc>Complete list of contexts and targets used to build masked sequences in the SCIN dataset. Masks are always in the target. Contexts and targets can be either positive or negative, and the target can also have an adverb added which is not a negation cue. Patterns are made up of a context and a target, i.e. 5 possible patterns.</figDesc><table><row><cell>1</cell><cell>p</cell><cell>NAME è un(a) PROF che ha l'abitudine di ACT. NAME is a PROF who is used to ACT-ing.</cell><cell>PRON [MASK] molto spesso. PRON [MASK] often.</cell></row><row><cell>2</cell><cell>n</cell><cell>NAME è un(a) PROF che non ha l'abitudine di ACT. NAME is a PROF who is not used to ACT-ing.</cell><cell>PRON non [MASK] molto spesso. PRON doesn't [MASK] often.</cell></row><row><cell>3</cell><cell>v</cell><cell>-</cell><cell>PRON [MASK] davvero molto spesso. PRON [MASK] really often.</cell></row><row><cell></cell><cell cols="2">Instantiated NAME/PROF:</cell><cell></cell></row><row><cell></cell><cell></cell><cell>Jessica / Ballerina (Dancer)</cell><cell></cell></row><row><cell></cell><cell cols="2">Tested verb: Fumare (To smoke)</cell><cell></cell></row><row><cell cols="3">Tested example: Jessica è una ballerina che</cell><cell></cell></row><row><cell cols="3">ha l'abitudine di fumare. Lei [MASK] spesso.</cell><cell></cell></row><row><cell>Model</cell><cell></cell><cell>Top 1</cell><cell></cell></row><row><cell></cell><cell></cell><cell>pred.</cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Table 3</head><label>3</label><figDesc>Details of the verb sets created for each model. The first line shows the number of triples available per model, the second the number of these triples which, in a CpTp configuration, led to a repetition (prediction by the ACT token model), and line 3 the percentage of triples this represents.) The last line shows how many of the triplets leading to a repeat were retained, the maximum for one model being 20,000. In the column titles, "b-b" means bert-base, "it" stands for italian and "c" for cased</figDesc><table><row><cell>bert</cell><cell>alb3rt0 UmBERTo</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_4"><head>Table 4</head><label>4</label><figDesc></figDesc><table /></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="1" xml:id="foot_0">https://huggingface.co/dbmdz/bert-base-italian-xxl-cased</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_1">https://huggingface.co/bert-base-multilingual-cased</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_2">https://github.com/marcopoli/AlBERTo-it</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_3">https://github.com/musixmatchresearch/umberto</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="5" xml:id="foot_4">https://www.nostrofiglio.it/gravidanza/nomi-per-bambini/ i-100-nomi-per-bambini-piu-amati-dai-genitori-di-nostrofiglio-it</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="6" xml:id="foot_5">https://www.wecanjob.it/pagina9_elenco-professioni.html</note>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgments</head><p>We would like to express our gratitude to Marie Candito for her valuable assistance and guidance throughout the course of this study.</p><p>This work was funded in part by the French government under management of Agence Nationale de la Recherche as part of the "Investissements d'avenir" program, reference ANR-19-P3IA0001 (PRAIRIE 3IA Institute). This research was also partially funded by the Labex EFL (ANR-10-LABX-0083) and by PNRR-M4C2-Investimento 1.3, Partenariato Esteso PE00000013--"FAIR-Future Artificial Intelligence Research"-Spoke 1 "Human-centered AI, " funded by the European Commission under the NextGeneration EU programme.</p></div>
			</div>


			<div type="availability">
<div xmlns="http://www.tei-c.org/ns/1.0"><p>(P. Amsili) https://people.unipi.it/alessandro_lenci/ (A. Lenci); https://lattice.cnrs.fr/amsili/ (P.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Negation</title>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">R</forename><surname>Horn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Wansing</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">The Stanford Encyclopedia of Philosophy</title>
				<editor>
			<persName><forename type="first">E</forename><forename type="middle">N</forename><surname>Zalta</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">U</forename><surname>Nodelman</surname></persName>
		</editor>
		<meeting><address><addrLine>Winter</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2022">2022. 2022</date>
		</imprint>
		<respStmt>
			<orgName>Metaphysics Research Lab, Stanford University</orgName>
		</respStmt>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">The self-contained negation test set</title>
		<author>
			<persName><forename type="first">D</forename><surname>Kletz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Amsili</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Candito</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2023.blackboxnlp-1.16</idno>
		<ptr target="https://aclanthology.org/2023.blackboxnlp-1.16.doi:10.18653/v1/2023.blackboxnlp-1.16" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 6th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP, Association for Computational Linguistics</title>
				<editor>
			<persName><forename type="first">Y</forename><surname>Belinkov</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">S</forename><surname>Hao</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">J</forename><surname>Jumelet</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Kim</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>Mccarthy</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Mohebbi</surname></persName>
		</editor>
		<meeting>the 6th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP, Association for Computational Linguistics<address><addrLine>Singapore</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
			<biblScope unit="page" from="212" to="221" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<monogr>
		<title level="m" type="main">Italian bert and electra models</title>
		<author>
			<persName><forename type="first">S</forename><surname>Schweter</surname></persName>
		</author>
		<idno type="DOI">10.5281/zenodo.4263142</idno>
		<ptr target="https://doi.org/10.5281/zenodo.4263142.doi:10.5281/zenodo.4263142" />
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<monogr>
		<title level="m" type="main">BERT: pre-training of deep bidirectional transformers for language understanding</title>
		<author>
			<persName><forename type="first">J</forename><surname>Devlin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M.-W</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Toutanova</surname></persName>
		</author>
		<idno>CoRR abs/1810.04805</idno>
		<ptr target="http://arxiv.org/abs/1810.04805.arXiv:1810.04805" />
		<imprint>
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">AlBERTo: Modeling italian social media language with bert</title>
		<author>
			<persName><forename type="first">M</forename><surname>Polignano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Basile</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Basile</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>De Gemmis</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Semeraro</surname></persName>
		</author>
		<idno type="DOI">10.4000/ijcol.472</idno>
		<ptr target="https://doi.org/10.4000/ijcol.472" />
	</analytic>
	<monogr>
		<title level="j">IJCoL</title>
		<imprint>
			<biblScope unit="volume">25</biblScope>
			<biblScope unit="page" from="11" to="31" />
			<date type="published" when="1984">1984</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<monogr>
		<title level="m" type="main">Umberto: an italian language model trained with whole word masking</title>
		<author>
			<persName><forename type="first">L</forename><surname>Parisi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Francia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Magnani</surname></persName>
		</author>
		<ptr target="https://github.com/musixmatchresearch/umberto" />
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">but can+not fly</title>
		<author>
			<persName><forename type="first">N</forename><surname>Kassner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Schütze</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/2020.acl-main.698" />
	</analytic>
	<monogr>
		<title level="m">Negated and misprimed probes for pretrained language models: Birds can talk</title>
				<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">What BERT is not: Lessons from a new suite of psycholinguistic diagnostics for language models</title>
		<author>
			<persName><forename type="first">A</forename><surname>Ettinger</surname></persName>
		</author>
		<idno type="DOI">10.1162/tacl_a_00298</idno>
		<ptr target="https://doi.org/10.1162/tacl_a_00298" />
	</analytic>
	<monogr>
		<title level="j">Transactions of the Association for Computational Linguistics</title>
		<imprint>
			<biblScope unit="volume">8</biblScope>
			<biblScope unit="page" from="34" to="48" />
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Context matters: A pragmatic study of PLMs&apos; negation understanding</title>
		<author>
			<persName><forename type="first">R</forename><surname>Gubelmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Handschuh</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/2022.acl-long.315" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics</title>
				<meeting>the 60th Annual Meeting of the Association for Computational Linguistics<address><addrLine>Dublin, Ireland</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2022">2022</date>
			<biblScope unit="volume">1</biblScope>
			<biblScope unit="page" from="4602" to="4621" />
		</imprint>
	</monogr>
	<note>: Long Papers), Association for Computational Linguistics</note>
</biblStruct>

<biblStruct xml:id="b9">
	<monogr>
		<author>
			<persName><forename type="first">L</forename><surname>Renzi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">G</forename><surname>Salvi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Cardinaletti</surname></persName>
		</author>
		<title level="m">Grande grammatica italiana di consultazione</title>
				<meeting><address><addrLine>Il Mulino, Bologna</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2001">2001</date>
			<biblScope unit="volume">2</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Probing structural constraints of negation in pretrained language models</title>
		<author>
			<persName><forename type="first">D</forename><surname>Kletz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Candito</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Amsili</surname></persName>
		</author>
		<ptr target="https://openreview.net/forum?id=_7VPETQwnPX" />
	</analytic>
	<monogr>
		<title level="m">The 24rd Nordic Conference on Computational Linguistics</title>
				<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
