<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">To Click it or not to Click it: An Italian Dataset for Neutralising Clickbait Headlines</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Daniel</forename><surname>Russo</surname></persName>
							<email>drusso@fbk.eu</email>
							<affiliation key="aff0">
								<orgName type="institution">University of Trento</orgName>
								<address>
									<settlement>Trento</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="institution">Fondazione Bruno Kessler</orgName>
								<address>
									<settlement>Trento</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="department" key="dep1">Essere Informati</orgName>
								<orgName type="department" key="dep2">Voxnews</orgName>
								<orgName type="department" key="dep3">DirettaNews</orgName>
								<orgName type="department" key="dep4">Informati</orgName>
								<orgName type="institution">TGNewsItalia</orgName>
								<address>
									<country key="IT">Italia</country>
								</address>
							</affiliation>
							<affiliation key="aff4">
								<orgName type="department">TG5Stelle</orgName>
								<orgName type="institution" key="instit1">Jeda News</orgName>
								<orgName type="institution" key="instit2">News Cronaca</orgName>
								<address>
									<addrLine>TG24-ore</addrLine>
								</address>
							</affiliation>
							<affiliation key="aff5">
								<orgName type="institution" key="instit1">ByoBlu</orgName>
								<orgName type="institution" key="instit2">WorldNotix</orgName>
								<address>
									<postCode>Mag24</postCode>
									<settlement>Fortementein</settlement>
									<region>lo sapevi che</region>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Oscar</forename><surname>Araque</surname></persName>
							<email>o.araque@upm.es</email>
							<affiliation key="aff2">
								<orgName type="institution">Universidad Politécnica de Madrid</orgName>
								<address>
									<settlement>Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Marco</forename><surname>Guerini</surname></persName>
							<email>guerini@fbk.eu</email>
							<affiliation key="aff1">
								<orgName type="institution">Fondazione Bruno Kessler</orgName>
								<address>
									<settlement>Trento</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="department" key="dep1">Essere Informati</orgName>
								<orgName type="department" key="dep2">Voxnews</orgName>
								<orgName type="department" key="dep3">DirettaNews</orgName>
								<orgName type="department" key="dep4">Informati</orgName>
								<orgName type="institution">TGNewsItalia</orgName>
								<address>
									<country key="IT">Italia</country>
								</address>
							</affiliation>
							<affiliation key="aff4">
								<orgName type="department">TG5Stelle</orgName>
								<orgName type="institution" key="instit1">Jeda News</orgName>
								<orgName type="institution" key="instit2">News Cronaca</orgName>
								<address>
									<addrLine>TG24-ore</addrLine>
								</address>
							</affiliation>
							<affiliation key="aff5">
								<orgName type="institution" key="instit1">ByoBlu</orgName>
								<orgName type="institution" key="instit2">WorldNotix</orgName>
								<address>
									<postCode>Mag24</postCode>
									<settlement>Fortementein</settlement>
									<region>lo sapevi che</region>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">To Click it or not to Click it: An Italian Dataset for Neutralising Clickbait Headlines</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">A216FD1C1218C87D96BEC368207D645D</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T17:36+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>clickbait</term>
					<term>natural language processing</term>
					<term>natural language generation</term>
					<term>large language model</term>
					<term>language resource</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Clickbait is a common technique aimed at attracting a reader's attention, although it can result in inaccuracies and lead to misinformation. This work explores the role of current Natural Language Processing methods to reduce its negative impact. To do so, a novel Italian dataset is generated, containing manual annotations for classification, spoiling, and neutralisation of clickbait. Besides, several experimental evaluations are performed, assessing the performance of current language models. On the one hand, we evaluate the performance in the task of clickbait detection in a multilingual setting, showing that augmenting the data with English instances largely improves overall performance. On the other hand, the generation tasks of clickbait spoiling and neutralisation are explored. The latter is a novel task, designed to increase the informativeness of a headline, thus removing the information gap. This work opens a new research avenue that has been largely uncharted in the Italian language.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>Accuracy and truthfulness are essential characteristics of journalism. Nevertheless, in an effort to improve revenue, a large number of newspapers and magazines publish clickbait articles, a viral journalism strategy that seeks to attract users to click on a link to a page through tactics such as sensationalist stories and catchy headlines that act as bait. The use of these tactics harms the quality of news pieces and thus hinders the ability of citizens to obtain reliable and objective information. The literature distinguishes between two main types of clickbait. (i) Classical clickbait <ref type="bibr" target="#b0">[1]</ref> embeds within the headlines information gaps, also known as curiosity gaps <ref type="bibr" target="#b1">[2,</ref><ref type="bibr" target="#b3">3]</ref>, in order to arouse curiosity in the reader that is forced to access the article's content which is ultimately disappointing. Classical clickbait usually makes use of hyperbolic language, caps lock, demonstrative pronouns and superlative to grasp the user's attention <ref type="bibr" target="#b0">[1,</ref><ref type="bibr" target="#b4">4,</ref><ref type="bibr" target="#b5">5]</ref>. (ii) Deceptive clickbait <ref type="bibr" target="#b5">[5]</ref> refers to headlines that resemble traditional media headlines by offering a summary of the article, still leading to content that differs from the reader's expectations. These headlines promise high news value but deliver content with low news value, resulting in reader disappointment.</p><p>Although clickbait headlines are considered one of the less harmful forms of fake news, as their main goal is to increase profit by driving traffic to their website <ref type="bibr" target="#b6">[6,</ref><ref type="bibr" target="#b7">7]</ref>, they can sometimes pose a danger, especially when they deal with potentially harmful topics such as health and science. To address this problem, Natural Language Processing techniques have been widely employed to detect clickbait headlines, with a particular focus on the English language <ref type="bibr" target="#b8">[8,</ref><ref type="bibr" target="#b9">9]</ref>. Hagen et al. <ref type="bibr" target="#b10">[10]</ref> proposed the clickbait spoiling task, i.e., the generation of a short text that satisfies the curiosity induced by a clickbait post.</p><p>In light of this, this work addresses the issue of clickbait in the Italian language, studying its characteristics and the possibilities of current technology to reduce its negative impact. In doing so, we have generated a novel Italian dataset that gathers a large collection of clickbait articles, which is made public for the community to use 1 . We named the dataset ClickBaIT. This dataset contains manually annotated instances as clickbait/non-clickbait, as well as manually generated spoilers and neutralised headlines. We have also performed a thorough multilingual evaluation, exploiting the availability of English data to complement our dataset in the task of clickbait detection. Finally, this work also explores the use of our annotated dataset and large language models to automatically generate both spoilers and, as a novel task, a neutralised version of clickbait headlines. A graphical illustration of the experimental design is presented in Figure <ref type="figure" target="#fig_1">1</ref>.  The experimental design is depicted, encompassing three tasks: clickbait detection, spoiler generation, and clickbait neutralisation. The robot icon represents the language model used for either classification or generation. We utilized DistilBERT and Llama3-8B for task 1, and LLaMAntino-3-8B for tasks 2 and 3. The models were tested for generative tasks using zero-shot, few-shot, and fine-tuning configurations, except for question rewriting, for which we employed a few-shot approach.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Related Work</head><p>The use of clickbait is common in many news outlets, and thus it has been extensively studied.</p><p>There are several works that address clickbait detection: Potthast et al. <ref type="bibr" target="#b8">[8]</ref> collected a corpus of clickbait articles, posted by well-known English-speaking newspapers on Twitter, and proposed a set of lexical and semantic features to be used with a Random Forest classifier. Following the general trend in Natural Language Processing (NLP) field, clickbait detection has also been explored using deep learning methods, such as convolutional <ref type="bibr" target="#b11">[11]</ref> and recurrent <ref type="bibr" target="#b12">[12]</ref> neural networks, as well as more recent Transformer-based approaches <ref type="bibr" target="#b9">[9]</ref>.</p><p>Other works leveraged Natural Language Generations (NLG) strategies to create a piece of text, the spoiler, comprising the information needed to fulfil the curiosity gap present in clickbait headlines. This task was proposed by Fröbe et al. <ref type="bibr" target="#b13">[13]</ref> with the name of spoiling generation. The authors created the Webis Clickbait Spoiling Corpus 2022, and cast spoiler generation as a Question Answering task.</p><p>Eventually, they open the challenge to the community through a SemEval-2023 shared task <ref type="bibr" target="#b13">[13,</ref><ref type="bibr" target="#b14">14]</ref>. The optimal spoiler generator operates with five independent sequence-to-sequence generative models. It selects the best spoiler through a majority vote, determined by comparing edit distances among the outputs <ref type="bibr" target="#b15">[15]</ref>.</p><p>Regarding the languages studied, the majority of works are based on English. Other works were performed in Chinese <ref type="bibr" target="#b16">[16]</ref>, Turkish <ref type="bibr" target="#b17">[17,</ref><ref type="bibr" target="#b18">18]</ref> and Spanish <ref type="bibr" target="#b19">[19,</ref><ref type="bibr" target="#b20">20]</ref>. To the best of our knowledge, this is the first work that fully addresses the study of clickbait detection and spoiling in the Italian language. Moreover, we propose a novel task, i.e., clickbait neutralisation, which aims at filling the curiosity gap by rewriting the headline levering the information of the spoiler.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Dataset</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.">Dataset Creation</head><p>Data were collected from fourteen news websites 2 , notorious for acting as news aggregators, engaging in plagiarism, lacking fact-checking, and using sensational headlines to draw in readers. In all the websites, articles are labelled according to specific categories; we decided to focus on four macro-categories: health, science, economy, and environment. These categories have been selected to cover some of the most frequent -and potentially hazardous -domains where clickbait is usually found. Since the categories varied a lot from website to website, we manually mapped each category into one of the four macro categories under analysis. Two annotators, knowledgeable in the area, were then provided with the headlines and the related articles and were asked to label whether a headline was clickbait. For aiding in this task, we have used as reference the clickbait measure as computed by Arthur et al. <ref type="bibr" target="#b21">[21]</ref>. Eventually, given the clickbait dataset, the two annotators were required to extract the gold spoilers from the article's text and to produce the neutralised forms for each headline. To this end, we employed an author reviewer strategy <ref type="bibr" target="#b22">[22]</ref>: an LLM (ChatGPT gpt-3.5-turbo-0125 3 ) was used to generate both the spoilers and the neutralised forms (author component) 4 , and the native Italian speaking annotators were asked to manually post-edited the generations (reviewer component). 5 This procedure was proven to be more effective and less time-consuming than writing the data  <ref type="bibr" target="#b25">[25]</ref>.</p><p>The obtained HTER results for the spoiler generation (0.4) are higher than those computed upon the neutralisation (0.3), in par or slightly lower than the 0.4 threshold. The high HTER values, especially for the spoiler annotation, can be attributed to the model's tendency to generate spoilers comprising more details than those necessary to fill the curiosity gap. While in some cases a simple deletion was sufficient, in others the annotator had to rewrite the spoiler almost completely. Regarding the annotation of the neutralisation texts, the higher results are a consequence of the spoiler generation, as the model was required to generate them simultaneously.</p><p>With this, we have generated the golden set of the dataset, in which all the instances were manually annotated. Further details regarding the dataset creation can be found in Appendix A. To expand this set, we have used a clickbait classifier (see Sect. 4.1) to automatically detect clickbait headlines. This new set of data, automatically annotated, constitutes the silver set of our dataset. Several examples of dataset entries are provided in Table <ref type="table" target="#tab_0">1</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.">Dataset Analysis</head><p>The complete ClickBaIT dataset consists of 4,144 entries. Each entry includes the following fields: (i) source website, that specifies the source of the article; (ii) publication date, which is captured from the original source; Table <ref type="table" target="#tab_1">2</ref> shows the main statistics of the final version of the dataset. The golden set is manually annotated and thus contains high-quality information. Additionally, the silver set has been annotated automatically as described and therefore contains a larger number of instances.</p><p>To gain a deeper understanding of the content of the dataset we have used Variationist <ref type="bibr" target="#b26">[26]</ref>, a tool that allows to inspect useful statistics and patterns in textual data. Upon inspection of the data, we have detected several patterns frequently used for generating the curiosity gap.</p><p>Of course, one of the most common strategies used in</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Set Clickbait (%) Non-clickbait (%) Total</head><p>Golden 698 (53%) 629 (47%) clickbait headlines is the formulation of a question that is later answered in the article, even though sometimes it is not. In the instance "Quanto è green il gas? " (How green is gas? ) the article explains that gas is not considered green. Another frequent strategy we have detected is the introduction to the content of the article, which invites the reader to click it: Beve un cucchiaio di aceto di mele nell'acqua tutti i giorni, ecco cosa succede (Drinks a tablespoon of apple cider vinegar in water every day, this is what happens).</p><p>Another usual pattern is the reference to enumerations, frequently using round and manageable numbers such as 10, 8, and 5. This can be done for introducing numbered content, as in "Le 10 fantasie femminili più segrete" (The 10 most secret female fantasies), or even to generate a reaction in the reader: "Hai solo 10 secondi per salvarti. Ecco cosa devi fare:" (You only have 10 seconds to save yourself.</p><p>Here's what you have to do:). Other means can be used to make headlines noticeable, such as introducing text in all caps, using striking vocabulary or even punctuation marks, as in "[ALLARME] Truffa AUTO USATE, fate attenzione!" ([ALERT] USED CAR scam, beware!).</p><p>See Table <ref type="table">8</ref> (Appendix A.2) for a collection of patterns that have been considered during the manual annotation of the dataset. Besides, Appendix B includes a graphical summary of the dataset, while its interactive version can be accessed online. 6 Details are provided in Appendix C.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Experimental Design</head><p>The experimental design comprises three steps: clickbait detection, spoiler generation and clickbait neutralisation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Clickbait Detection</head><p>This is the first and most basic task aimed at addressing the clickbait phenomenon. To explore the effect of using additional data in the training process, we use the Webis-Clickbait-17 <ref type="bibr" target="#b27">[27]</ref>, an English dataset containing clickbait that is also annotated in a binary fashion.</p><p>Following the insights by Araque et al. <ref type="bibr" target="#b28">[28]</ref>, we use the training on English data to improve the classification of Italian data. The main idea is to harness the availability of large amounts of English data, generating a compound dataset with a lower amount of Italian instances. To do so, a multilingual mixture dataset is created so that 35% of the final dataset comprises Italian instances, while the rest are in English.</p><p>We model the detection challenge as a binary classification task: clickbait/non-clickbait. To study the complexity of the task, we explore two different models for classification: (i) a DistilBERT <ref type="bibr" target="#b29">[29]</ref> (distil-base-6 https://oaraque.github.io/ClickBaIT/clickbait.html multilingual-cased 7 ) model trained in a multilingual setting, and (ii) the Llama3-8B language model (metallama/Meta-Llama-3-8B 8 ). The composed dataset has been split into train and test splits, which have been used to fine-tune and evaluate these models, respectively.</p><p>To assess the effect of using a mixture of both English and Italian instances in the dataset, we evaluate the performance of the two models in a monolingual setting (e.g., fine-tuning in Italian and predicting in the same language) as well as the multilingual variant (e.g., fine-tuning in English and Italian text, and predicting on Italian instances).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Spoiler Generation</head><p>The spoiler generation task consists in generating a short message that fulfils the curiosity gap present in a given clickbait title, by extracting the information from the linked article. To this end, we tested LLaMAntino-3-ANITA-8B-Inst-DPO-ITA (LLaMAntino-3-8B hereafter) <ref type="bibr" target="#b30">[30]</ref> on our clickbait dataset. The model was tested both in in-context learning (zero-and few-shot) and finetuning settings.</p><p>Building on prior research that frames spoiler generation as a Question Answering task <ref type="bibr" target="#b31">[31]</ref>, we prompt the model to rewrite clickbait headlines as questions and extract the corresponding answers, i.e., the spoilers, from the linked articles.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.">Clickbait Neutralisation</head><p>The best-performing configuration was employed for the neutralisation of the clickbait headlines. To this end, we instructed the LLM to perform a style transfer task, from a clickbait headline style to a more journalistic one, while integrating the spoiler information into the original headline.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Results and Discussion</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.1.">Evaluation Metrics</head><p>Firstly, for the evaluation of the clickbait detection task we use the macro-averaged precision, recall and f-score. This allows us to assess the performance even in an unbalanced scenario. For the generation tasks, we assessed lexical similarity through ROUGE score <ref type="bibr" target="#b32">[32]</ref> and semantic similarity. For the latter, text embeddings, computed using sentence-bert-base-italianxxl-uncased 9 , were compared using cosine similarity. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 3</head><p>LLaMAntino-3-8B results for the spoiler generation task. We report ROUGE 1 and L (R1, RL) and semantic similarity (SemSim).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.2.">Clickbait detection</head><p>Table <ref type="table" target="#tab_3">4</ref> shows the results of the evaluation in the task of clickbait classification. As expected, introducing data instances in English improves the performance in Italian.</p><p>In the case of classification in Italian, we see a staggering improvement for the Llama3 model of 8.43 points. This further supports previous results <ref type="bibr" target="#b28">[28]</ref>. We argue that augmenting the training set with instances in a diverse language is an effective strategy that can be generalised to other tasks. We also see that the best model for the classification of clickbait is the one obtained with Llama3, trained with both English and Italian data. Hence, we use this model to predict on the silver set of our dataset. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.3.">Spoiler Generation Results</head><p>Results for the spoiler generation task are reported in Table <ref type="table">3</ref>. We evaluated the capabilities of LLaMAntino-3-8B in both in-context learning scenarios (zero-and few-shot) and through fine-tuning. As inputs, we used clickbait headlines and questions generated by ChatGPT, instructing the model to execute a Question Answering task for the latter. When using headlines as input, few-shot and fine-tuning approaches outperform zero-shot methods. Few-shot approaches demonstrate higher performance in terms of semantic similarity, while fine-tuning exhibits stronger lexical adherence to the source document, as reflected in ROUGE scores. This can be attributed to the few examples provided in the few-shot approach, which make the model aware of the task while allowing more creative outputs (resulting in lower ROUGE scores). Conversely, the fine-tuned model learned from the training data to adhere more closely to the source article, which comes at the expense of producing semantically richer responses (evidenced by lower SemSim scores). Interestingly, casting spoiler generation as a questionanswering task yields higher results in the zero-shot setting compared to using headlines as input. However, the results for few-shot and fine-tuning scenarios tend to be on par. This can be explained by the fact that headlines may contain multiple gaps that the human-annotated dataset accounted for, but the non-supervised "question generation" module could not fully capture. Generally, this approach leads to sufficiently good results; however, we believe that more attention should be given to the quality of the questions, either through more efficient prompts or with human-generated/curated data.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.4.">Clickbait Neutralisation Results</head><p>In Table <ref type="table">5</ref>, we report the results for clickbait neutralisation. For this task, we prompted LLaMAntino-3-8B with a few-shot approach, employing the spoilers generated with the three configurations of the previous experiments (headlines as input). Using spoilers generated with the fine-tuned models leads to higher results both for lexical and semantic metrics. Interestingly, scores tend to increase when the training complexity of the input data increases. In Table <ref type="table">6</ref> we report examples of headlines along with their generated spoilers (through the finetuned model) and their neutralisation. Neutralisation generation results. Automatically generated spoilers from the previous experiments were used as input for the few-shot generation of the data. We report ROUGE 1 and L (R1 and RL) and the semantic similarity scores.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Headline</head><p>Spoiler Neutralisation "Juventus in Serie B": perché c'è panico tra i tifosi, la scoperta delle ultime ore 15 punti di penalizzazione Juventus in grave difficoltà: 15 punti di penalizzazione e il rischio di cadere in Serie B Lutto tremendo nello sport italiano, morto giovanissimo dopo un malore "Samuel Dilas era un giocatore di pallacanestro che militava nel Virtus Lumezzane a Brescia, in Serie B" e "aveva 24 anni" e "era alto 206 centimetri" e "nato a Novellara (Reggio Emilia)" e "aveva un padre di nome Torsen, una madre di nome Chiara e una sorella minore di nome Maia" e "era in convalescenza dopo una polmonite" e "era arrivato alla Virtus Lumezzane nella scorsa stagione".</p><p>Tragico decesso del pallacanestrista Samuel Dilas, 24 anni, ex convalescente da polmonite e giocatore della Virtus Lumezzane Un papà si rifiuta di mangiare accanto a un bambino Down di 5 anni, il cameriere decide di fare questo Il cameriere ha fuori il maleducato padre che voleva essere spostato a causa della presenza di un bambino con sindrome di Down.</p><p>Un cameriere espelle un cliente maleducato che chiede di essere spostato per non sedersi accanto a un bambino con sindrome di Down. E' doloroso e si forma tra le dita dei piedi, ecco come rimuoverlo "L'occhio di pernice è causato principalmente dalla pressione della scarpa che favorisce la formazione di un'ispessimento di pelle che provoca dolore, in quanto è soggetto all'attrito tra le dita. Per rimuovere l'occhio di pernice è fondamentale ammorbidire prima la zona interessata per poi provare a rimuovere l'ispessimento utilizzando rimedi naturali senza dolore e in modo semplice. "</p><p>Come rimuovere l'occhio di pernice, un problema di pressione e attrito causato dalle scarpe La chiamano "LA BOMBA" la miscela che in sole 24-48 ore elimina influenza, raffreddore e tosse Lo zenzero è un rimedio naturale per il trattamento di tosse, raffreddore e influenza. La miscela limone, zenzero e miele è ideale per alleviare i sintomi delle comuni malattie. Basta prendere 2 o 3 cucchiai della miscela naturale, riempire una tazza con acqua calda e lasciare in infusione per 3 o 4 minuti.</p><p>Miscela naturale di limone, zenzero e miele allevia i sintomi di tosse, raffreddore e influenza in pochi giorni.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 6</head><p>Examples of clickbait headlines, along with the automatically generated spoiler and neutralised version.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6.">Conclusion</head><p>This work presents ClickBaIT, a novel Italian dataset for clickbait modelling, as well as a diverse set of experiments to assess the effectiveness of current models for clickbait detection, spoiling and neutralisation. The dataset includes news articles that have been manually annotated to indicate the presence of clickbait, spoilers associated with clickbait headlines, and their respective neutral headlines.</p><p>The experiments explore the effectiveness of current NLP methods for the modelling of clickbait headlines in Italian through ClickBaIT. The evaluation for clickbait detection shows how training data can be augmented in a multilingual setting, which leads to classification improvements that are in line with previous research <ref type="bibr" target="#b28">[28]</ref>. The generation experiments, for both spoiling and neutralisation, evidence that the evaluated model does benefit from in-domain knowledge extracted from the proposed dataset. As seen, these informed generations are more accurate and align better with the golden text.</p><p>Considering the effect of clickbait, we argue that while there are initially harmless articles, lack of accuracy can have a detrimental effect on readers. This is clear when considering certain sensitive domains such as health. Thus, we hope that this work facilitates future research on the topic for example, by addressing the link between clickbait and misinformation, considering both in a unified framework. scienza insetti, animali, AI, scienza, smartphone, Spazio, tecnologia, TECNOLOGIE, SCIENZA, ufo, biochimica, eclissi, bomba atomica, terra piatta, idroelettrico, temperatura, coltivazione, robot, fisica quantistica, macchie solari, ricerca, vulcano, titanio, universo, fotovoltaico, intelligenza, iPhone, hacker, microonde, motori di ricerca, onde elettromagnetiche, tecnologia, sole, scienza, radioterapia, pesticidi, armi chimiche, comete, case farmaceutiche, psichiatria, smartphone, formiche, elettrodomestici, solare, macrobiologi, mondo, lampadine a basso consumo, tecnologia, scienze-e-tech, scienza, scienza, innovazione, scienza, tecnologia-2, animali intelligenti, funzione cognitiva, microchip, cani, samsung, wi fi, tecnologia-e-tv, SCIENZE, TECNOLOGIA, bioetica, biologia, fisica, covid, coronavirus salute Salute, CORONAVIRUS, VAIOLO SCIMMIE, TUBERCOLOSI, SALUTE, SCABBIA, AIDS, salute, hiv, cocaina, antidepressivi, veleni, infezioni, carne, tabacco, infibulazione, fluoro, alcool, alimentari, aids, antibatterico, dieta, insetticida, cibo, benessere, farmaci, digitopressione, caffè, sigarette, ministero della salute, autismo, limoni, cure naturali, paracetamolo, cancro, antiossidante, droga, olio, medicina alternativa, fragole, vegetariano, eroina, dislessia, veleno, zenzero, virus, psicologia, biologico, magnesio, frutta, psicofarmaci, pollo al cloro, fiori di bach, medico, sonno, birra, vitamina e, ulivi, proteine, stress, banana, pensieri negativi, tumori, benzodiazepine, latte, miele, cuore, epilessia, longevità, marijuana, diabete, sale, ibernazione, vecchiaia, fegato, vegan, prevenzione, dentifricio, cervello, sistema immunitario, sodio, suicidio, rimedi naturali, maltempo, canapa, pillola, mal di gola, depressione, psiche, alimentazione, ebola, aspartame, dentifricio senza fluoro, tiroide, mangiare, cure proibite, Alzheimer, smog, gas, malattie, calamità, mammografia, verdura, aloe, masticazione, farmaco, igiene, batteri, medicina, vitamina c, epatite c, forfora, energia, vaccini, ormoni, flora batterica, sorbitolo, antibiotici, piedi, obesità, arsenico, cortisolo, chemioterapia, contraccezione, Neurotrasmettitori, semi, melograno, celiachia, Coca cola, salute-benessere, salute, salute-e-benessere, bellezza, dimagrante, benessere, salute-benessere, rimedi-naturali, pianeta-mamma, grano antico, acqua ossigenata, alimetnazione, ansia, dentisti, curcuma, casa-e-cucina, hobby-e-sport, SPORT, crescita-consapevolezza, la-salute-che-viene, sport, stile-di-vita, consigli, lifestyle, pomodori ambiente Cambiamenti climatici, energia, energia elettrica, Natura, AMBIENTE, ECOLOGIA, global warming, geoingegneria, alberi, pianeta terra, natura, inquinamento, mare, terra, manipolazione climatica, clima, rinnovabili, Dissesto idrogeologico, ecologia, ambiente, green, ambiente-attuale, ecologia, salute-benessere, natura, ambiente, METEO, tempesta solare, astronomia, acido economia affari-online, economia, ECONOMIA, consumi-risparmi, microchip r-fid, bollo auto, tasso d'interesse, finanza, bollette, banche, profitto, spese, economia-finanza, economia, economia, economia-dellanima, fisco-e-tasse, economia, economia, economia, economia-e-finanza</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 7</head><p>Split of the categories into the four macro-categories.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A. Dataset Creation Details</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A.1. Category Assessment</head><p>In Table <ref type="table">7</ref> we report how the heterogeneous categories scraped directly from the misleading websites were divided into the four macro-category of scienza (science), salute (well-being), ambiente (environment), economia (economy).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A.2. Annotation Guidelines</head><p>Three components of our datasets were subject to human intervention to: (i) determine if the headline was clickbait, (ii) identify the related article's spoiler, that is, the information required to satisfy the curiosity gap within the headline, and (iii) revise the headline to include the spoiler information, thereby neutralizing it. During all three annotation stages, we employed a machine-human collaboration to expedite the work of annotators. The an-notators received both a score indicating how much the headline was clickbait and automatic ChatGPT gpt-3.5turbo-0125 generated suggestions for the spoilers and the neutralized versions of the headlines. Below, we have outlined the annotation guidelines that the annotators were to follow.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Clickbait labelling</head><p>In order to select the clickbait headlines present in the scraped data, the annotators were provided with specific guidelines. Table <ref type="table">8</ref> provides the main key points taken into consideration in order to label the data.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Spoiler post-editing</head><p>For the post-editing of the spoiler the annotator was required to spot in the headline the information gap and to check if the generated spoiler was providing that information checking the related article. If the model failed to find the proper spoiler, the annotator had to rewrite it sticking as much as possible to </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Zanzare, ecco come eliminarle senza insetticidi</head><p>Mosquitoes, this is how to eliminate them without insecticide</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Use of quotations that do not give information</head><p>Omicron, Ilaria Capua: "Ecco perché i vaccinati si infettano di più rispetto a prima" Omicron, Ilaria Capua: "This is why the vaccinated get more infected than before"</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 8</head><p>Key points used for the annotation of the dataset. Please note that some instances can exemplify more than one point. the document's text. If the spoiler was correct but added extra info, the annotator had to keep those extra information only if those were essential for having a complete headline. If the spoiler was correct, then the annotator could leave it as it was.</p><p>Neutralised Clickbait Post-Editing The annotator was required to check if the neutralised forms comprises both the headline and the spoiler information. If the spoiler was very long (e.g., long listing), then the annotator had to summarise the spoiler as much as possible aiming to embed in the final novel headline enough information to reduce or remove the information gap. If the model failed at addressing the spoiler information in the neutralised version of the headline, then the annotator had to manually add it. Moreover, the annotator was required to remove sensationalist tones as much as possible, if this tone was still creating useless curiosity in the reader.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A.3. Author Component Instruction</head><p>Hereafter, we provide the instruction employed to automatically generate spoilers and the neutralised versions of the clickbait headlines through ChaGPT gpt-3.5-turbo-0125.</p><p>I have a clickbait headline and its corresponding article, both written in Italian.</p><p>The clickbait headline typically omits key information to create a curiosity gap for the reader. Your task is to extract this missing information, known as a "spoiler, " from the article's text. The spoiler can be a single keyword, a short text passage, or a list of keywords. Once you have identified the spoiler, rewrite the clickbait headline by incorporating this information to eliminate the curiosity gap. The output must be in JSON format and written in Italian.</p><p>The JSON should include two entries: one called "spoiler" that contains the extracted spoiler(s), and another called "new_headline" that has the revised headline.</p><p>Example Input:</p><p>Clickbait headline: "Questo attore ha fatto qualcosa di incredibile sul set di un famoso film!" Article: "Durante le riprese del film 'Il Gladiatore', l'attore Russell Crowe ha deciso di fare un gesto di grande generosità donando una parte significativa del suo stipendio al fondo per i membri della troupe. "</p><p>Example Output:</p><p>{"spoiler": "Russell Crowe ha donato una parte significativa del suo stipen-    </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>B.2. Dataset Excerpt Translation</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>C. Experimental Design Details</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>C.1. Question Generation</head><p>Questions were generated with ChatGPT gpt-3.5turbo-0125 using the following prompt:</p><p>You will be provided with a clickbait headline written in Italian. Your task is to generate a question that addresses any missing or vague information in the headline. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>C.2. Spoiler Generation</head><p>For the zero-shot spoiler generation task we employed the following prompt: The same instruction was employed with the finetuned model. For few-shot generation of the spoiler, we enriched the instruction with two examples.</p><p>When casting spoiler generation as a Question Answering task, the following instruction was employed: Ti verrà fornita una domanda e un documento. Trova nel documento le informazioni per rispondere alla domanda. La risposta può essere un messaggio conciso oppure un elenco. Formatta la risposta nel seguente modo. "Risposta: &lt;output&gt;"</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>C.3. Fine-Tuning Details</head><p>The LLaMAntino-3-8B <ref type="bibr" target="#b30">[30]</ref> model underwent training on a single Ampere A40 GPU with 48GB of memory, employing the QLoRA strategy with a low-rank approximation of 64, a low-rank adaptation of 16, and a dropout rate of 0.1. It was set to evaluate every 50 steps, with a batch size of 4, across 3 epochs, using a learning rate of 10 −4 .</p><p>In the clickbait detection experiments, the DistilBERT and Llama3-8b models have been fine-tuned on the same GPU. The DistilBERT model has been trained on 10 epochs with a learning rate of 2 ⋅ 10 −4 . For the Llama3 model, we have used QLoRa with the same characteristics as described above, trained on two epochs, with a learning rate of 2 ⋅ 10 −4 .</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>C.4. Neutralised Clickbait Generation</head><p>The following system prompt (enriched with three examples) has been utilised with LLaMAntino-3-8B: Ti verrano forniti due testi: un titolo clickbait e un testo, chiamato spoiler, che contiene le informazioni mancanti nel titolo. Il tuo compito è di riscrivere il titolo clickbait integrando le informazioni dello spoiler. Il nuovo titolo deve essere informativo, privo di toni sensazionalistici, e breve. Se Lo spoiler contine tante informazioni, puoi riassumerle in concetti più generali.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Titolo: {headline}</head><p>Spoiler: {spoiler}</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>D. Ethical Statement</head><p>No specific ethical conflicts have been reported during the development of this work. The dataset was compiled from publicly available sources. It is important to acknowledge that the examples in this document are not indicative of the authors' opinions or beliefs. Additionally, the ideas or assertions contained within these texts may be misleading or harmful; therefore, the dataset should be utilized strictly for research purposes.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1:The experimental design is depicted, encompassing three tasks: clickbait detection, spoiler generation, and clickbait neutralisation. The robot icon represents the language model used for either classification or generation. We utilized DistilBERT and Llama3-8B for task 1, and LLaMAntino-3-8B for tasks 2 and 3. The models were tested for generative tasks using zero-shot, few-shot, and fine-tuning configurations, except for question rewriting, for which we employed a few-shot approach.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head></head><label></label><figDesc>(iii) headline text; (iv) article text; (v) original URL; (vi) macro category inferred from the original category extracted from the source; (vii) image URL associated with the article as specified in the source; (viii) clickbait annotation; (ix) the associated spoiler; and (x) the neutralised version of the title.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_4"><head></head><label></label><figDesc>figli</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>An excerpt of the presented dataset showing the most relevant fields. Article bodies are shortened for space reasons. Translated text can be found in Table9(Appendix B).</figDesc><table><row><cell>Category</cell><cell>Headline</cell><cell>Article</cell><cell>Clickbait</cell><cell>Spoiler</cell><cell>Neutralised title</cell></row><row><cell>Health</cell><cell>Frutto o fiore? gusto-</cell><cell>Tutti la conosciamo, im-</cell><cell>True</cell><cell>La fragola</cell><cell>Fragola: gustosissima e</cell></row><row><cell></cell><cell>sissima e attraente, una</cell><cell>mancabile sulle nostre</cell><cell></cell><cell></cell><cell>attraente, una celebrità</cell></row><row><cell></cell><cell>celebrità sulle nostre tav-</cell><cell>tavole, celebre in tutto</cell><cell></cell><cell></cell><cell>sulle nostre tavole</cell></row><row><cell></cell><cell>ole, sveliamo chi è</cell><cell>il mondo ma misteriosa</cell><cell></cell><cell></cell><cell></cell></row><row><cell></cell><cell></cell><cell>la sua natura, frutto da</cell><cell></cell><cell></cell><cell></cell></row><row><cell></cell><cell></cell><cell>gustare o fiore...</cell><cell></cell><cell></cell><cell></cell></row><row><cell>Science</cell><cell>Scoperto un metallo che</cell><cell>Il recente esperimento</cell><cell>True</cell><cell>Il platino</cell><cell>Il metallo che si auto-</cell></row><row><cell></cell><cell>si auto-ripara. Scienziati</cell><cell>ha rivelato un fenomeno</cell><cell></cell><cell></cell><cell>ripara: il platino</cell></row><row><cell></cell><cell>sbalorditi</cell><cell>straordinario...</cell><cell></cell><cell></cell><cell></cell></row><row><cell>Health</cell><cell>Una malattia che colpisce</cell><cell>Parliamo di una malat-</cell><cell>True</cell><cell>La psoriasi colpisce circa</cell><cell>La psoriasi: una malattia</cell></row><row><cell></cell><cell>500mila persone</cell><cell>tia sistemica cronica me-</cell><cell></cell><cell>500 mila persone</cell><cell>che colpisce circa 500mila</cell></row><row><cell></cell><cell></cell><cell>diata dal sistema immu-</cell><cell></cell><cell></cell><cell>persone in Italia</cell></row><row><cell></cell><cell></cell><cell>nitario che interessa...</cell><cell></cell><cell></cell><cell></cell></row><row><cell>Environment</cell><cell>Zanzare, ecco come elim-</cell><cell>Con l'arrivo del caldo, an-</cell><cell>True</cell><cell>Per eliminare una volta</cell><cell>Zanzare, ecco come elim-</cell></row><row><cell></cell><cell>inarle senza insetticidi</cell><cell>che le zanzare si fanno</cell><cell></cell><cell>per tutte le zanzare dalla</cell><cell>inarle senza insetticidi:</cell></row><row><cell></cell><cell></cell><cell>largo nelle nostre case o</cell><cell></cell><cell>vostra casa, dovreste ac-</cell><cell>basta acquistare un pip-</cell></row><row><cell></cell><cell></cell><cell>nei nostri giardini...</cell><cell></cell><cell>quistare un pipistrello</cell><cell>istrello</cell></row><row><cell cols="3">from scratch [23]. To assess the amount of post-editing</cell><cell></cell><cell></cell><cell></cell></row><row><cell cols="3">required, we employed Human-targeted Translation Edit</cell><cell></cell><cell></cell><cell></cell></row><row><cell cols="3">Rate [HTER; 24]. HTER quantifies the minimum edit</cell><cell></cell><cell></cell><cell></cell></row><row><cell cols="3">distance, which is the least number of editing operations</cell><cell></cell><cell></cell><cell></cell></row><row><cell cols="3">needed, between a machine-generated text and its post-</cell><cell></cell><cell></cell><cell></cell></row><row><cell cols="3">edited counterpart. HTER values exceeding 0.4 indicate</cell><cell></cell><cell></cell><cell></cell></row><row><cell cols="3">low-quality outputs; under such circumstances, rewrit-</cell><cell></cell><cell></cell><cell></cell></row><row><cell cols="3">ing the text from scratch or extensive post-editing would</cell><cell></cell><cell></cell><cell></cell></row><row><cell cols="2">necessitate comparable effort</cell><cell></cell><cell></cell><cell></cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2</head><label>2</label><figDesc>Size of the presented dataset, considering both golden and silver sets.</figDesc><table><row><cell>1,327</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Table 4</head><label>4</label><figDesc>Results for Clickbait detection. The 'Test' and 'Train' columns indicate the languages of the test and train sets, respectively.</figDesc><table><row><cell cols="2">Test Train</cell><cell>Model</cell><cell>Prec.</cell><cell cols="2">Rec. M-F1</cell></row><row><cell>EN</cell><cell>EN</cell><cell>DistilBERT Llama3</cell><cell>67.15 68.42</cell><cell>70.34 66.46</cell><cell>66.94 67.18</cell></row><row><cell></cell><cell>EN+IT</cell><cell>DistilBERT Llama3</cell><cell cols="3">70.28 71.20 71.15 71.15 70.14 70.12</cell></row><row><cell>IT</cell><cell>IT</cell><cell>DistilBERT Llama3</cell><cell>68,85 66.96</cell><cell>70.47 67.19</cell><cell>68.65 67.07</cell></row><row><cell></cell><cell>EN+IT</cell><cell>DistilBERT Llama3</cell><cell cols="3">72.87 76.32 75.51 75.50 74.85 71.77</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_6"><head>Additional Dataset Details B.1. Dataset Visualisation</head><label></label><figDesc>Frequency of words for both clickbait and non-clickbait categories. On the right, most frequent words for each class, and both (Characteristic). An interactive version of the graph can be accessed at the following link https://oaraque.github.io/clickIT/clickbait.html</figDesc><table><row><cell>Search the chart</cell></row><row><cell>Non-Clickbait document count: 481; word count: 5,642</cell></row><row><cell>Clickbait document count: 846; word count: 10,647</cell></row><row><cell>Figure 2: dio al fondo per i membri della troupe",</cell></row><row><cell>"new_headline": "Russell Crowe ha fatto</cell></row><row><cell>qualcosa di incredibile sul set di 'Il Gladi-</cell></row><row><cell>atore': ha donato una parte significativa</cell></row><row><cell>del suo stipendio al fondo per i membri</cell></row><row><cell>della troupe"}</cell></row><row><cell>Please ensure the output is formatted in</cell></row><row><cell>JSON as specified and that all content is</cell></row><row><cell>in Italian.</cell></row><row><cell>Now do it for the following headline.</cell></row><row><cell>Clickbait headline: "{headline}"</cell></row><row><cell>Article:"{article}"</cell></row><row><cell>Figure 2 shows a frequency-based visualization of the</cell></row><row><cell>dataset. It considers the frequency of appearance of rel-</cell></row><row><cell>evant uni and bi-grams for both the clickbait and non-</cell></row><row><cell>clickbait categories. The figure shows common strategies</cell></row><row><cell>that are frequent in clickbait content, such as the use of</cell></row><row><cell>"ecco cosa" (this is what) or "quali sono" (what are) that</cell></row><row><cell>can be seen in the lower right part.</cell></row></table><note>B.</note></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_7"><head>Table 9</head><label>9</label><figDesc></figDesc><table /><note>includes the English translations for the Italian examples presented in Table1.</note></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_8"><head>Table 9</head><label>9</label><figDesc>Translated from the original Italian. An excerpt of the presented dataset showing the most relevant fields. Article bodies are shortened for space reasons.</figDesc><table><row><cell>Here are some examples:</cell></row><row><cell>Headline: Si chiama la benedizione di Dio:</cell></row><row><cell>rimuove l'alta pressione, il diabete e il</cell></row><row><cell>grasso nel sangue Question: Che cosa</cell></row><row><cell>viene chiamato 'benedizione di Dio'?</cell></row><row><cell>Headline: "Emorragia cerebrale". Italia in</cell></row><row><cell>apprensione per il suo campione: ricover-</cell></row><row><cell>ato in condizioni gravissime</cell></row><row><cell>Question: Chi è il campione?</cell></row><row><cell>Please generate the question in Italian, en-</cell></row><row><cell>suring it seeks to clarify the ambiguous or</cell></row><row><cell>incomplete details present in the headline.</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_9"><head></head><label></label><figDesc>Ti verranno forniti un titolo clickbait e il suo articolo corrispondente. Il titolo clickbait di solito omette, o non esplicita, informazioni chiave per creare curiosità nel lettore. Estrai dall'articolo le informazioni mancanti o vaghe nel titolo che servono per colmare questa curiosità.</figDesc><table><row><cell>La risposta</cell></row><row><cell>può essere un messaggio estremamente</cell></row><row><cell>coinciso oppure un elenco. Formatta la</cell></row><row><cell>risposta nel seguente modo. "Risposta:</cell></row><row><cell>&lt;output&gt;"</cell></row><row><cell>Titolo: {headline}</cell></row><row><cell>Articolo: {article}</cell></row></table></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="7" xml:id="foot_0">https://huggingface.co/distilbert-base-multilingual-cased</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="8" xml:id="foot_1">https://huggingface.co/meta-llama/Meta-Llama-3-8B</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="9" xml:id="foot_2">https://huggingface.co/nickprock/sentence-bert-base-italian-xxluncased</note>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgments</head><p>This work was partly supported by: the AI4TRUST project -AI-based-technologies for trustworthy solutions against disinformation (ID: 101070190), the European Union's CERV fund under grant agreement No. 101143249 (HATEDEMICS), the European Union's Horizon Europe research and innovation programme under grant agreement No. 101135437 (AI-CODE). Oscar Araque acknowledges the support of the project UNICO I+D Cloud -AMOR, financed by the Ministry of Economic Affairs and Digital Transformation, and the European Union through Next Generation EU; as well as the support of the project CPP2023-010437 financed by the MCIN / AEI / 10.13039/501100011033 / FEDER, UE.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">You won&apos;t believe what&apos;s in this paper! clickbait, relevance and the curiosity gap</title>
		<author>
			<persName><forename type="first">K</forename><surname>Scott</surname></persName>
		</author>
		<idno type="DOI">10.1016/j.pragma.2020.12.023</idno>
		<ptr target="https://doi.org/10.1016/j.pragma.2020.12.023" />
	</analytic>
	<monogr>
		<title level="j">Journal of Pragmatics</title>
		<imprint>
			<biblScope unit="volume">175</biblScope>
			<biblScope unit="page" from="53" to="66" />
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Click bait: Forwardreference as lure in online news headlines</title>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">N</forename><surname>Blom</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">R</forename><surname>Hansen</surname></persName>
		</author>
		<idno type="DOI">10.1016/j.pragma.2014.11.010</idno>
		<ptr target="https:" />
	</analytic>
	<monogr>
		<title level="j">Journal of Pragmatics</title>
		<imprint>
			<biblScope unit="volume">76</biblScope>
			<biblScope unit="page" from="87" to="100" />
			<date type="published" when="2015">2015</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<monogr>
		<title/>
		<idno type="DOI">10.1016/j.pragma.2014.11.010</idno>
		<idno>.11.010</idno>
		<ptr target="//doi.org/10.1016/j.pragma.2014" />
		<imprint/>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">The psychology of curiosity: A review and reinterpretation</title>
		<author>
			<persName><forename type="first">G</forename><surname>Loewenstein</surname></persName>
		</author>
		<idno type="DOI">10.1037/0033-2909.116.1.75</idno>
	</analytic>
	<monogr>
		<title level="j">Psychological Bulletin</title>
		<imprint>
			<biblScope unit="volume">116</biblScope>
			<biblScope unit="page" from="75" to="98" />
			<date type="published" when="1994">1994</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">When everything stands out, nothing does, Relevance theory, figuration</title>
		<author>
			<persName><forename type="first">K</forename><surname>Scott</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Jackson</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">and continuity in pragmatics</title>
				<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="volume">8</biblScope>
			<biblScope unit="page" from="167" to="192" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">deceptive&quot; clickbait headlines: Relevance, intentions, and lies</title>
		<author>
			<persName><forename type="first">K</forename><surname>Scott</surname></persName>
		</author>
		<idno type="DOI">10.1016/j.pragma.2023.10.004</idno>
		<ptr target="https://doi.org/10.1016/j.pragma.2023.10.004" />
	</analytic>
	<monogr>
		<title level="j">Journal of Pragmatics</title>
		<imprint>
			<biblScope unit="volume">218</biblScope>
			<biblScope unit="page" from="71" to="82" />
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">The web of false information: Rumors, fake news, hoaxes, clickbait, and various other shenanigans</title>
		<author>
			<persName><forename type="first">S</forename><surname>Zannettou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Sirivianos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Blackburn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Kourtellis</surname></persName>
		</author>
		<idno type="DOI">10.1145/3309699</idno>
		<ptr target="https://doi.org/10.1145/3309699.doi:10.1145/3309699" />
	</analytic>
	<monogr>
		<title level="j">J. Data and Information Quality</title>
		<imprint>
			<biblScope unit="volume">11</biblScope>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Fake news, disinformation and misinformation in social media: a review</title>
		<author>
			<persName><forename type="first">E</forename><surname>Aïmeur</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Amri</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Brassard</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Social Network Analysis and Mining</title>
		<imprint>
			<biblScope unit="volume">13</biblScope>
			<biblScope unit="page">30</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Clickbait detection</title>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Köpsel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Stein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Hagen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Advances in Information Retrieval: 38th European Conference on IR Research, ECIR 2016</title>
				<meeting><address><addrLine>Padua, Italy</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2016">March 20-23, 2016. 2016</date>
			<biblScope unit="page" from="810" to="817" />
		</imprint>
	</monogr>
	<note>Proceedings 38</note>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">xlnet or roberta: The best transfer learning model to detect clickbaits</title>
		<author>
			<persName><forename type="first">P</forename><surname>Rajapaksha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Farahbakhsh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Crespi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Bert</forename></persName>
		</author>
		<idno type="DOI">10.1109/ACCESS.2021.3128742</idno>
	</analytic>
	<monogr>
		<title level="j">IEEE Access</title>
		<imprint>
			<biblScope unit="volume">9</biblScope>
			<biblScope unit="page" from="154704" to="154716" />
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Clickbait spoiling via question answering and passage retrieval</title>
		<author>
			<persName><forename type="first">M</forename><surname>Hagen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Fröbe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Jurk</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2022.acl-long.484</idno>
		<ptr target="https://aclanthology.org/2022.acl-long.484.doi:10.18653/v1/2022.acl-long.484" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
				<editor>
			<persName><forename type="first">S</forename><surname>Muresan</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Nakov</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>Villavicencio</surname></persName>
		</editor>
		<meeting>the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)<address><addrLine>Dublin, Ireland</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2022">2022</date>
			<biblScope unit="page" from="7025" to="7036" />
		</imprint>
	</monogr>
	<note>Association for Computational Linguistics</note>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Clickbait detection using deep learning</title>
		<author>
			<persName><forename type="first">A</forename></persName>
		</author>
		<idno type="DOI">10.1109/NGCT.2016.7877426</idno>
	</analytic>
	<monogr>
		<title level="m">2016 2nd International Conference on Next Generation Computing Technologies (NGCT)</title>
				<imprint>
			<date type="published" when="2016">2016</date>
			<biblScope unit="page" from="268" to="272" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Detecting clickbaits using two-phase hybrid cnn-lstm biterm model</title>
		<author>
			<persName><forename type="first">S</forename><surname>Kaur</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Kumar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Kumaraguru</surname></persName>
		</author>
		<idno type="DOI">10.1016/j.eswa.2020.113350</idno>
		<ptr target="https://doi.org/10.1016/j.eswa.2020.113350" />
	</analytic>
	<monogr>
		<title level="j">Expert Systems with Applications</title>
		<imprint>
			<biblScope unit="volume">151</biblScope>
			<biblScope unit="page">113350</biblScope>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">SemEval-2023 task 5: Clickbait spoiling</title>
		<author>
			<persName><forename type="first">M</forename><surname>Fröbe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Stein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Gollub</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Hagen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2023.semeval-1.312</idno>
		<ptr target="https://aclanthology.org/2023.semeval-1.312.doi:10.18653/v1/2023.semeval-1.312" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023), Association for Computational Linguistics</title>
				<editor>
			<persName><forename type="first">A</forename><forename type="middle">K</forename><surname>Ojha</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><forename type="middle">S</forename><surname>Doğruöz</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">G</forename><surname>Da San Martino</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Tayyar</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">R</forename><surname>Madabushi</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">E</forename><surname>Kumar</surname></persName>
		</editor>
		<editor>
			<persName><surname>Sartori</surname></persName>
		</editor>
		<meeting>the 17th International Workshop on Semantic Evaluation (SemEval-2023), Association for Computational Linguistics<address><addrLine>Toronto, Canada</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
			<biblScope unit="page" from="2275" to="2286" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<monogr>
		<ptr target="https://aclanthology.org/2023.semeval-1.0" />
		<title level="m">Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023), Association for Computational Linguistics</title>
				<editor>
			<persName><forename type="first">A</forename><forename type="middle">K</forename><surname>Ojha</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><forename type="middle">S</forename><surname>Doğruöz</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">G</forename><surname>Da San Martino</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Tayyar</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">R</forename><surname>Madabushi</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">E</forename><surname>Kumar</surname></persName>
		</editor>
		<editor>
			<persName><surname>Sartori</surname></persName>
		</editor>
		<meeting>the 17th International Workshop on Semantic Evaluation (SemEval-2023), Association for Computational Linguistics<address><addrLine>Toronto, Canada</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">TohokuNLP at SemEval-2023 task 5: Clickbait spoiling via simple Seq2Seq generation and ensembling</title>
		<author>
			<persName><forename type="first">H</forename><surname>Kurita</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Ito</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Funayama</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Sasaki</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Moriya</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Mengyu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Kokuta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Hatakeyama</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Sone</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Inui</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2023.semeval-1.243</idno>
		<ptr target="https://aclanthology.org/2023.semeval-1.243.doi:10.18653/v1/2023.semeval-1.243" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023), Association for Computational Linguistics</title>
				<editor>
			<persName><forename type="first">A</forename><forename type="middle">K</forename><surname>Ojha</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><forename type="middle">S</forename><surname>Doğruöz</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">G</forename><surname>Da San Martino</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Tayyar</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">R</forename><surname>Madabushi</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">E</forename><surname>Kumar</surname></persName>
		</editor>
		<editor>
			<persName><surname>Sartori</surname></persName>
		</editor>
		<meeting>the 17th International Workshop on Semantic Evaluation (SemEval-2023), Association for Computational Linguistics<address><addrLine>Toronto, Canada</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
			<biblScope unit="page" from="1756" to="1762" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">Clickbait detection on wechat: A deep model integrating semantic and syntactic information</title>
		<author>
			<persName><forename type="first">T</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Yu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Zhou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Wu</surname></persName>
		</author>
		<idno type="DOI">10.1016/j.knosys.2022.108605</idno>
		<ptr target="https://doi.org/10.1016/j.knosys.2022.108605" />
	</analytic>
	<monogr>
		<title level="j">Knowledge-Based Systems</title>
		<imprint>
			<biblScope unit="volume">245</biblScope>
			<biblScope unit="page">108605</biblScope>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Clickbaittr: Dataset for clickbait detection from turkish news sites and social media with a comparative analysis via machine learning algorithms</title>
		<author>
			<persName><forename type="first">E</forename><surname>Şura Genç</surname></persName>
		</author>
		<author>
			<persName><surname>Surer</surname></persName>
		</author>
		<idno type="DOI">10.1177/01655515211007746</idno>
	</analytic>
	<monogr>
		<title level="j">Journal of Information Science</title>
		<imprint>
			<biblScope unit="volume">49</biblScope>
			<biblScope unit="page" from="480" to="499" />
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">A clickbait detection method on news sites</title>
		<author>
			<persName><forename type="first">A</forename><surname>Geçkil</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">A</forename><surname>Müngen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Gündogan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Kaya</surname></persName>
		</author>
		<idno type="DOI">10.1109/ASONAM.2018.8508452</idno>
	</analytic>
	<monogr>
		<title level="m">IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM)</title>
				<imprint>
			<date type="published" when="2018">2018. 2018</date>
			<biblScope unit="page" from="932" to="937" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">Rumor and clickbait detection by combining information divergence measures and deep learning techniques</title>
		<author>
			<persName><forename type="first">C</forename><surname>Oliva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Palacio-Marín</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">F</forename><surname>Lago-Fernández</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Arroyo</surname></persName>
		</author>
		<idno type="DOI">10.1145/3538969.3543791</idno>
		<idno>doi:10.1145/3538969.3543791</idno>
		<ptr target="https://doi.org/10.1145/3538969.3543791" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 17th International Conference on Availability, Reliability and Security, ARES &apos;22</title>
				<meeting>the 17th International Conference on Availability, Reliability and Security, ARES &apos;22<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<monogr>
		<author>
			<persName><forename type="first">I</forename><surname>García-Ferrero</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Altuna</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2404.07611</idno>
		<title level="m">Noticia: A clickbait article summarization dataset in spanish</title>
				<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b21">
	<analytic>
		<title level="a" type="main">Debunker assistant: a support for detecting online misinformation</title>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">E C L</forename><surname>Arthur</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">T</forename><surname>Cignarella</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Frenda</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Lai</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>Stranisci</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Urbinati</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Ninth Italian Conference on Computational Linguistics (CLiC-it 2023)</title>
				<editor>
			<persName><forename type="first">Federico</forename><surname>Boschetti</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">Gianluca</forename><forename type="middle">E</forename><surname>Lebani</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">Bernardo</forename><surname>Magnini</surname></persName>
		</editor>
		<meeting>the Ninth Italian Conference on Computational Linguistics (CLiC-it 2023)</meeting>
		<imprint>
			<publisher>Nicole Novielli</publisher>
			<date type="published" when="2023">2023</date>
			<biblScope unit="page" from="1" to="5" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">Generating counter narratives against online hate speech: Data and strategies</title>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">S</forename><surname>Tekiroğlu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y.-L</forename><surname>Chung</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Guerini</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2020.acl-main.110</idno>
		<ptr target="https://aclanthology.org/2020.acl-main.110.doi:10.18653/v1/2020.acl-main.110" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, Association for Computational Linguistics</title>
				<editor>
			<persName><forename type="first">D</forename><surname>Jurafsky</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">J</forename><surname>Chai</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Schluter</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">J</forename><surname>Tetreault</surname></persName>
		</editor>
		<meeting>the 58th Annual Meeting of the Association for Computational Linguistics, Association for Computational Linguistics</meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="1177" to="1190" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<analytic>
		<title level="a" type="main">Countering misinformation via emotional response generation</title>
		<author>
			<persName><forename type="first">D</forename><surname>Russo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Kaszefski-Yaschuk</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Staiano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Guerini</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2023.emnlp-main.703</idno>
		<ptr target="https://aclanthology.org/2023.emnlp-main.703.doi:10.18653/v1/2023.emnlp-main.703" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, Association for Computational Linguistics</title>
				<editor>
			<persName><forename type="first">H</forename><surname>Bouamor</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">J</forename><surname>Pino</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">K</forename><surname>Bali</surname></persName>
		</editor>
		<meeting>the 2023 Conference on Empirical Methods in Natural Language Processing, Association for Computational Linguistics<address><addrLine>Singapore</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
			<biblScope unit="page" from="11476" to="11492" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b24">
	<analytic>
		<title level="a" type="main">A study of translation edit rate with targeted human annotation</title>
		<author>
			<persName><forename type="first">M</forename><surname>Snover</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Dorr</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Schwartz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Micciulla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Makhoul</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/2006.amta-papers.25" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 7th Conference of the Association for Machine Translation in the Americas: Technical Papers, Association for Machine Translation in the Americas</title>
				<meeting>the 7th Conference of the Association for Machine Translation in the Americas: Technical Papers, Association for Machine Translation in the Americas<address><addrLine>Cambridge, Massachusetts, USA</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2006">2006</date>
			<biblScope unit="page" from="223" to="231" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b25">
	<analytic>
		<title level="a" type="main">Coping with the subjectivity of human judgements in MT quality estimation</title>
		<author>
			<persName><forename type="first">M</forename><surname>Turchi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Negri</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Federico</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/W13-2231" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Eighth Workshop on Statistical Machine Translation, Association for Computational Linguistics</title>
				<meeting>the Eighth Workshop on Statistical Machine Translation, Association for Computational Linguistics<address><addrLine>Sofia, Bulgaria</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2013">2013</date>
			<biblScope unit="page" from="240" to="251" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b26">
	<monogr>
		<author>
			<persName><forename type="first">A</forename><surname>Ramponi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Casula</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Menini</surname></persName>
		</author>
		<idno>arxiv:2406.17647</idno>
		<ptr target="https://arxiv.org/abs/2406.17647" />
		<title level="m">Variationist: Exploring multifaceted variation and bias in written language data</title>
				<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b27">
	<analytic>
		<title level="a" type="main">Crowdsourcing a Large Corpus of Clickbait on Twitter</title>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Gollub</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Komlossy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Schuster</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Wiegmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Garces</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Fernandez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Hagen</surname></persName>
		</author>
		<author>
			<persName><surname>Stein</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/C18-1127/" />
	</analytic>
	<monogr>
		<title level="m">27th International Conference on Computational Linguistics (COLING 2018), Association for Computational Linguistics</title>
				<editor>
			<persName><forename type="first">E</forename><surname>Bender</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">L</forename><surname>Derczynski</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Isabelle</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2018">2018</date>
			<biblScope unit="page" from="1498" to="1507" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b28">
	<analytic>
		<title level="a" type="main">Towards a multilingual system for vaccine hesitancy using a data mixture approach</title>
		<author>
			<persName><forename type="first">O</forename><surname>Araque</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">F L</forename><surname>Corniel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Kalimeri</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 9th Italian Conference on Computational Linguistics</title>
				<meeting>the 9th Italian Conference on Computational Linguistics</meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b29">
	<monogr>
		<author>
			<persName><forename type="first">V</forename><surname>Sanh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Debut</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Chaumond</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Wolf</surname></persName>
		</author>
		<idno type="arXiv">arXiv:1910.01108</idno>
		<title level="m">Distilbert, a distilled version of bert: smaller, faster, cheaper and lighter</title>
				<imprint>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b30">
	<monogr>
		<author>
			<persName><forename type="first">M</forename><surname>Polignano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Basile</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Semeraro</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2405.07101</idno>
		<title level="m">Advanced natural-based interaction for the italian language: Llamantino-3-anita</title>
				<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b31">
	<monogr>
		<author>
			<persName><forename type="first">M</forename><surname>Woźny</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Lango</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2405.16284</idno>
		<title level="m">Generating clickbait spoilers with an ensemble of large language models</title>
				<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b32">
	<analytic>
		<title level="a" type="main">Rouge: A package for automatic evaluation of summaries</title>
		<author>
			<persName><forename type="first">C.-Y</forename><surname>Lin</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Text summarization branches out</title>
				<imprint>
			<date type="published" when="2004">2004</date>
			<biblScope unit="page" from="74" to="81" />
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
