<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Overview of MultiCardioNER Task at BioASQ 2024 on Medical Specialty and Language Adaptation of Clinical NER Systems for Spanish, English and Italian</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Salvador</forename><surname>Lima-López</surname></persName>
							<email>salvador.limalopez@bsc.es</email>
							<affiliation key="aff0">
								<orgName type="department">Barcelona Supercomputing Center</orgName>
								<address>
									<addrLine>Plaça Eusebi Güell, 1-3</addrLine>
									<postCode>08034</postCode>
									<settlement>Barcelona</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Eulàlia</forename><surname>Farré-Maduell</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Barcelona Supercomputing Center</orgName>
								<address>
									<addrLine>Plaça Eusebi Güell, 1-3</addrLine>
									<postCode>08034</postCode>
									<settlement>Barcelona</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Jan</forename><surname>Rodríguez-Miret</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Barcelona Supercomputing Center</orgName>
								<address>
									<addrLine>Plaça Eusebi Güell, 1-3</addrLine>
									<postCode>08034</postCode>
									<settlement>Barcelona</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Miguel</forename><surname>Rodríguez-Ortega</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Barcelona Supercomputing Center</orgName>
								<address>
									<addrLine>Plaça Eusebi Güell, 1-3</addrLine>
									<postCode>08034</postCode>
									<settlement>Barcelona</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Livia</forename><surname>Lilli</surname></persName>
							<email>livia.lilli@policlinicogemelli.it</email>
							<affiliation key="aff1">
								<orgName type="department" key="dep1">Real World Data Facility</orgName>
								<orgName type="department" key="dep2">Gemelli Generator</orgName>
								<orgName type="institution">Fondazione Policlinico Universitario Agostino Gemelli IRCCS</orgName>
								<address>
									<postCode>00168</postCode>
									<settlement>Rome</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="institution">Catholic University of the Sacred Heart</orgName>
								<address>
									<postCode>00168</postCode>
									<settlement>Rome</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Jacopo</forename><surname>Lenkowicz</surname></persName>
							<email>jacopo.lenkowicz@policlinicogemelli.it</email>
							<affiliation key="aff1">
								<orgName type="department" key="dep1">Real World Data Facility</orgName>
								<orgName type="department" key="dep2">Gemelli Generator</orgName>
								<orgName type="institution">Fondazione Policlinico Universitario Agostino Gemelli IRCCS</orgName>
								<address>
									<postCode>00168</postCode>
									<settlement>Rome</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Giovanna</forename><surname>Ceroni</surname></persName>
							<email>g.ceroni@ucl.ac.uk</email>
							<affiliation key="aff3">
								<orgName type="institution">University College London</orgName>
								<address>
									<country key="GB">UK</country>
								</address>
							</affiliation>
							<affiliation key="aff4">
								<orgName type="institution">University College London Hospitals NHS Foundation Trust</orgName>
								<address>
									<country key="GB">UK</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Jonathan</forename><surname>Kossoff</surname></persName>
							<email>j.kossoff@nhs.net</email>
							<affiliation key="aff4">
								<orgName type="institution">University College London Hospitals NHS Foundation Trust</orgName>
								<address>
									<country key="GB">UK</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Anoop</forename><surname>Shah</surname></persName>
							<email>a.shah@ucl.ac.uk</email>
							<affiliation key="aff3">
								<orgName type="institution">University College London</orgName>
								<address>
									<country key="GB">UK</country>
								</address>
							</affiliation>
							<affiliation key="aff4">
								<orgName type="institution">University College London Hospitals NHS Foundation Trust</orgName>
								<address>
									<country key="GB">UK</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Anastasios</forename><surname>Nentidis</surname></persName>
							<affiliation key="aff5">
								<orgName type="institution">National Center for Scientific Research &quot;Demokritos&quot;</orgName>
								<address>
									<settlement>Athens</settlement>
									<country key="GR">Greece</country>
								</address>
							</affiliation>
							<affiliation key="aff6">
								<orgName type="institution">Aristotle University of Thessaloniki</orgName>
								<address>
									<settlement>Thessaloniki</settlement>
									<country key="GR">Greece</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Anastasia</forename><surname>Krithara</surname></persName>
							<email>akrithara@iit.demokritos.gr</email>
							<affiliation key="aff4">
								<orgName type="institution">University College London Hospitals NHS Foundation Trust</orgName>
								<address>
									<country key="GB">UK</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Georgios</forename><surname>Katsimpras</surname></persName>
							<email>gkatsibras@iit.demokritos.gr</email>
							<affiliation key="aff4">
								<orgName type="institution">University College London Hospitals NHS Foundation Trust</orgName>
								<address>
									<country key="GB">UK</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Georgios</forename><surname>Paliouras</surname></persName>
							<email>paliourg@iit.demokritos.gr</email>
							<affiliation key="aff4">
								<orgName type="institution">University College London Hospitals NHS Foundation Trust</orgName>
								<address>
									<country key="GB">UK</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Martin</forename><surname>Krallinger</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Barcelona Supercomputing Center</orgName>
								<address>
									<addrLine>Plaça Eusebi Güell, 1-3</addrLine>
									<postCode>08034</postCode>
									<settlement>Barcelona</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Overview of MultiCardioNER Task at BioASQ 2024 on Medical Specialty and Language Adaptation of Clinical NER Systems for Spanish, English and Italian</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">70399BB2F6D82B17DFFFAB69C4DF29EC</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T17:52+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>named entity recognition, cardiology, subdomain adaptation, multilingual, clinical NLP (M. Krallinger) 0000-0002-7384-1877 (S. Lima-López)</term>
					<term>0009-0000-0793-981X (J. Rodríguez-Miret)</term>
					<term>0009-0000-0188-079X (M. Rodríguez-Ortega)</term>
					<term>0009-0005-3319-7211 (L. Lilli)</term>
					<term>0000-0002-8366-1474 (J. Lenkowicz)</term>
					<term>0000-0002-8907-5724 (A. Shah)</term>
					<term>0000-0002-3782-4412 (A. Nentidis)</term>
					<term>0000-0003-0491-4507 (A. Krithara)</term>
					<term>0000-0003-3697-941X (G. Katsimpras)</term>
					<term>0000-0001-9629-2367 (G. Paliouras)</term>
					<term>0000-0002-2646-8782 (M. Krallinger)</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Transformers and large language models (LLMs) are increasingly used for clinical data analysis, mostly in English, but also in many other languages used within medical care systems. To comply with clinical standards, it is critical to evaluate the generated results by means of benchmarking efforts based on high-quality manually annotated corpora. To foster the adaptation of general clinical natural language processing (NLP) components to the characteristics of medical specialties, as well as exploring cross-language adaptation techniques, we propose the MultiCardioNER task at BioASQ 2024. MultiCardioNER focuses on the adaptation of named entity recognition (NER) systems trained on multispecialty clinical case reports to cardiology, since cardiovascular diseases are the leading cause of death globally. The MultiCardioNER task covered two entity types (diseases and medications) in case reports written in three languages (Spanish, English and Italian). To generate a comparable Gold Standard clinical NER corpus, we used neural translation, annotation projection and manual annotation correction by domain experts. Top scoring teams reached very competitive results for disease (F1-score 0.8199) and medication mentions (0.9277) in Spanish and also obtained very competitive scores for English (F1-score 0.9223) and Italian (F1-score 0.8842). These results suggest that adaptation of general clinical NLP components to a specific clinical specialty can improve the overall results and that cross-language adaptation of clinical NLP components using neural translation and expert-in-the-loop annotation might speed up the implementation of clinical entity extraction systems. The MultiCardioNER corpora, as well as a silver standard made up of predictions of participating systems over the background set, are available at: https://zenodo.org/records/11368861.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>Cardiovascular diseases (CVDs) represent a leading cause of death and morbidity worldwide and, therefore, are responsible for considerable disability costs every year. Cardiology is a medical field with its own concepts and expressions of great relevance, as cardiovascular diseases are highly prevalent worldwide. Analysis of unstructured medical data, such as clinical notes or medical publications, may provide an opportunity to improve the characterization of cardiac pathologies. The extraction of clinical variables from medical content is key to enabling healthcare data analytics, improving patient care and advancing precision medicine. Information contained only as free text within Electronic Medical Records is currently mostly unused due to the difficulty of extracting relevant data from very diversely written data sources. Additionally, the distinctive language of each medical specialty calls for more specialized automatic semantic annotation resources in English and any language used in clinical services.</p><p>Due to its importance and the need to improve the extraction, use, and ultimately exploitation of patient data suffering from cardiovascular conditions, efforts have been made to implement natural language processing (NLP) solutions to classify or extract key variables from cardiology clinical content. Using results generated by NLP technologies might contribute to improving outcomes and understanding disease in cardiology. In order to account for the diversity and heterogeneity of NLP research applied to cardiology, several review articles have tried to systematically characterize the various NLP application scenarios adapted to handle cardiovascular disease medical documents <ref type="bibr" target="#b0">[1]</ref>. These included applications related to heart failure <ref type="bibr" target="#b1">[2]</ref>, coronary artery disease, general cardiology or valvular heart disease <ref type="bibr" target="#b2">[3,</ref><ref type="bibr" target="#b3">4]</ref>. Efforts were also made to extract, by means of NLP tools, symptoms <ref type="bibr" target="#b4">[5,</ref><ref type="bibr" target="#b5">6]</ref>, vital signs <ref type="bibr" target="#b6">[7]</ref>, heart function measurements <ref type="bibr" target="#b7">[8]</ref>, risk factors <ref type="bibr" target="#b3">[4]</ref>, or cardiovascular comorbidities <ref type="bibr" target="#b8">[9]</ref> as well as social risk factors <ref type="bibr" target="#b9">[10]</ref> or diagnostic codes of common cardiovascular diseases <ref type="bibr" target="#b10">[11]</ref>. Some other attempts were also made to explore the use of NLP approaches to extract Framingham criteria <ref type="bibr" target="#b11">[12]</ref> or New York Heart Association classifications from unstructured clinical notes <ref type="bibr" target="#b12">[13,</ref><ref type="bibr" target="#b13">14,</ref><ref type="bibr" target="#b14">15]</ref>.</p><p>General clinical domain pre-training does not necessarily transfer well to all medical sub-specialties or disciplines because of the use of highly specialized medical language, as encountered in cardiology clinical case reports or cardiology clinical notes. Domain adaptation strategies may have a great potential to improve NLP solutions for practical settings, real-world scenarios and industrial applications <ref type="bibr" target="#b15">[16]</ref>. Also, adaptation of clinical NLP solutions across languages other than English is necessary and requires collaboration between researchers to accelerate progress in non-English clinical NLP <ref type="bibr" target="#b16">[17]</ref>.</p><p>In the case of Spanish, general clinical NLP datasets and resources, such as the DisTEMIST, SympTEMIST, PharmaCoNER, and MedProcNER corpora and systems, have been released. However, (a) the interplay and complementarity of multi-label entity extraction approaches were neither targeted nor evaluated, and (b) how such approaches could be adapted to handle multiple languages was not tested.</p><p>To address these issues and promote the development of comparable clinical NLP components adapted to a specific clinical domain across several languages, we have organized the MultiCardioNER shared task. This paper presents an overview of the data, methodologies and results of MultiCardioNER. It is structured as follows: Section 2 introduces the shared task, including its sub-tasks and evaluation methods. Next, Section 3 describes the different corpora used as part of MultiCardioNER, namely DisTEMIST, DrugTEMIST and CardioCCC, as well as other associated resources, while Section 4 presents the participation results and proposed methodologies. Finally, Section 5 concludes the paper with a discussion of some of the most interesting aspects, learned lessons, future work and more.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Task description 2.1. Shared task description</head><p>The MultiCardioNER task participants were asked to implement named entity recognition (NER) systems using a general clinical corpus annotated with disease and medication mentions. They were then required to adapt these NER systems to a particular medical specialty, namely cardiology. In addition, the MultiCardioNER task also explored the creation of clinical multilingual NER components or cross-language adaptation of these systems for three languages: Spanish, English and Italian.</p><p>The MultiCardioNER task relied on a previous resource exploited in a past shared task, called DisTEMIST <ref type="bibr" target="#b17">[18]</ref>. The DisTEMIST corpus is a collection of 1,000 clinical case reports covering a wide range of specialties annotated for diseases by clinical experts. Furthermore, a previously unreleased corpus called DrugTEMIST was published as part of the task. The DrugTEMIST corpus provides drug or medication mention annotations for the same collection of clinical case reports as used for the DisTEMIST dataset.</p><p>For the adaptation to cardiology, we have constructed the CardioCCC corpus, a new dataset that consists of manually selected cardiology clinical case reports showing similar characteristics as cardiology discharge summaries. This resource was provided to participants to enable the exploration of different clinical subdomain/specialty adaptation strategies and to benchmark the resulting systems. To foster the generation of multilingual clinical NER corpora, DrugTEMIST and CardioCCC were automatically translated from Spanish into both English and Italian. The Gold Standard drug mention annotations were then mapped into both target languages and validated manually by clinical experts (native speakers of English and Italian). The three corpora and the underlying annotation projection process are described in more detail in Section 3.</p><p>The evaluation process relied on the comparison of participating team predictions against the manual annotations previously done by the clinical experts. Each team was allowed to submit up to 5 runs for each subtrack and language. The evaluation process and metrics are reported in Section 2.3.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2.">Subtracks</head><p>MultiCardioNER was structured into two different subtracks:</p><p>• Subtrack 1 (CardioDis). This track focuses on the adaptation of disease recognition systems to the cardiology specialty in Spanish. Participants could use the DisTEMIST corpus <ref type="bibr" target="#b17">[18]</ref> as a base training set, together with a new collection of cardiology-specific clinical case reports annotated with diseases (CardioCCC) that could be used to fine-tune or adapt their systems to cardiology case reports. • Subtrack 2 (MultiDrug). This subtrack focuses on the multilingual or cross-language adaptation (Spanish, English and Italian) of medication recognition systems, specifically for cardiology clinical case reports. For this track, participants could use the DrugTEMIST dataset as NER training resource. This corpus can be seen as a complementary dataset to the previously-released DisTEMIST, ProcTEMIST and SympTEMIST corpora, as it incorporates annotations of medications for the same document collection. To enable adaptation to cardiology, the CardioCCC corpus was annotated with medication mentions and divided into development and test subsets. While the original versions of both datasets were created using Spanish texts, a machine-translated version in English and Italian was revised by hand and annotated by clinical experts.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.3.">Evaluation</head><p>The task was divided into distinct phases: training and test set prediction (evaluation). During the training phase, participants were provided with the DisTEMIST and DrugTEMIST datasets, as well as a subset of the CardioCCC corpus made up of 258 documents. The second batch of the CardioCCC collection was used as test set and released together with a larger background set to make sure that no manual post-editing was carried out by the teams and that the submitted systems could scale up to process larger data collections. These collections (test and background set) were released approximately one month after the start of the training phase. Participants were given two weeks to generate predictions for all documents. They were then evaluated using the Gold Standard annotations of the CardioCCC test set, reserving the predictions for the background set to create a participants' Silver Standard (discussed in Section 3.4). It was not mandatory to submit results for all three languages. Both MultiCardioNER subtracks were evaluated using micro-averaged precision, recall and F1-score. These metrics are calculated as follows: As part of the task, an official MultiCardioNER evaluation library was released and is available on GitHub<ref type="foot" target="#foot_0">1</ref> . After the task results were released, the test set Gold Standard annotations were shared with participating teams to enable them to perform extra experiments and facilitate error analysis of their systems.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.4.">Baseline</head><p>To provide a baseline system for comparison, we used a simple vocabulary transfer approach that relied on generating a gazetteer of entities from the training sets (DisTEMIST/DrugTEMIST corpora), and carrying out dictionary look-up of these terms in the test set. Specifically, the system is a lexical lookup approach that tries to find the annotated strings in both corpora within the cardiology test set. The baseline results are shown in Table <ref type="table" target="#tab_0">1</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Corpus and resources</head><p>The MultiCardioNER task leverages an already-existing corpus, DisTEMIST, as well as two new releases, DrugTEMIST and CardioCCC. DisTEMIST and DrugTEMIST share the same document collection, which consists of clinical case reports from various clinical specialties such as oncology, infectious diseases, urology and psychiatry. This collection of texts has also been used for the procedures corpus MedProcNER/ProcTEMIST <ref type="bibr" target="#b18">[19]</ref> and the signs and symptoms corpus SympTEMIST <ref type="bibr" target="#b19">[20]</ref>. These corpora could be considered complementary since they have been annotated by the same clinical experts using the same methodology, which includes the creation of dedicated annotation guidelines. They were released as part of previous shared tasks in an effort to promote the development and accessibility of annotated resources for clinical information extraction in Spanish validated by clinical experts. Other resources resulting from this initiative include PharmaCoNER <ref type="bibr" target="#b20">[21]</ref>, LivingNER <ref type="bibr" target="#b21">[22]</ref>, MEDDOPROF <ref type="bibr" target="#b22">[23]</ref> or MEDDOPLACE <ref type="bibr" target="#b23">[24]</ref>.</p><p>The CardioCCC corpus consists of cardiology-specific clinical case reports. It includes annotations for diseases and drugs created using the same guidelines as DisTEMIST and DrugTEMIST. Although all three corpora were created originally in Spanish, the texts and annotations related to drugs were translated into English and Italian and released for this task. Table <ref type="table">2</ref> provides some statistics for the different datasets that make up MultiCardioNER, which are explained in detail in this section. All datasets described in this section are openly available on Zenodo<ref type="foot" target="#foot_1">2</ref> .</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 2</head><p>Statistics for the datasets provided for MultiCardioNER. "Annot." stands for "annotations", while "Chars" stands for "characters". Unique annotations refer to the number of distinct annotated strings after converting all annotations to lowercase. The number of tokens has been calculated using the following spaCy models: "es_core_news_sm", "en_core_web_sm" and "it_core_news_sm".</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Dataset</head><p>Lang  </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.">DisTEMIST</head><p>DisTEMIST is a Gold Standard manually annotated corpus of disease mentions in Spanish clinical case documents normalized or mapped to SNOMED CT concept identifiers. It consists of 1,000 clinical case reports written in Spanish from miscellaneous medical specialties. Figure <ref type="figure" target="#fig_0">1</ref> shows an example of an annotated document.</p><p>The texts in the corpus were derived from SciELO (Scientific Electronic Library Online) <ref type="foot" target="#foot_2">3</ref> , an electronic library that contains publications from scientific journals. The texts were manually selected by clinical experts so that their structure and content were clinically relevant and representative. The texts were then pre-processed to extract the appropriate sections of the clinical cases and to remove embedded figure references and citations to be as close as possible to real medical records. These texts were originally released under the name SpaCCC (Spanish Clinical Case Corpus). As shown in Table <ref type="table">2</ref>, the text collection includes a total of 406,137 tokens and 2,335,968 characters. In terms of annotations, the corpus includes a total of 10,664 entities, out of which 6,739 are unique after converting them into lowercase.</p><p>The DisTEMIST corpus was annotated and standardized by two clinical experts from a Spanish tertiary hospital. The annotated mentions and their normalization were post-processed and revised afterwards by a third physician. The annotations were created using the brat tool <ref type="bibr" target="#b24">[25]</ref>. Annotation and normalization guidelines were created specifically for this task. The annotation involved discussions between physicians, particularly regarding complex mentions. This, together with multiple rounds of inter-annotator agreement (IAA) through parallel annotation of a section of the corpus (around 20%), resulted in an iterative refinement of the guidelines. After several rounds, a total IAA score of 82.3 (computed as the pairwise agreement between two independent annotators) for the disease mentions was achieved. The result of this process is the DisTEMIST guidelines, openly available on Zenodo <ref type="foot" target="#foot_3">4</ref> . The document contains a total of 28 pages describing how to annotate diseases in clinical texts. There are a total of 52 rules divided into various types, such as general, positive or negative. There is also a set of rules specific to oncology mentions that was added as the language used in clinical cases related to this specialty proved to be more specific and harder to annotate. These are partially based on the CANTEMIST corpus <ref type="bibr" target="#b25">[26]</ref>. The guidelines also include a discussion of the task's importance, a corpus characterization, basic information about the task and the annotation process, as well as indications and resources for the annotators. It is noteworthy that the DisTEMIST guidelines have been adapted to other domains, such as social media <ref type="bibr" target="#b26">[27]</ref>.</p><p>The DisTEMIST text documents are in plain text format with UTF-8 encoding. The annotations are presented in two different stand-off versions. The first version includes the original annotation files as outputted by brat <ref type="bibr" target="#b24">[25]</ref>. These are .ann files, one for each text file, where each line represents an annotation, including its label, its start and end position and its associated text. The second version is a single tab-separated file (.tsv) which includes all annotations in the corpus. Similarly to the .ann files, this version includes one annotation per row with an additional field for the corresponding filename.</p><p>For MultiCardioNER, all 1,000 documents in the corpus are presented together. For anyone who wishes to use the original train/test split of the corpus (consisting of 750 and 250 documents, respectively), we advise downloading the original DisTEMIST Gold Standard<ref type="foot" target="#foot_4">5</ref> to retrieve the list of filenames belonging to each split. The original repository also includes the SNOMED CT mappings for the annotated mentions, as well as some additional data, such as a background set of related clinical documents and a Silver Standard of the corpus in 6 languages (English, Portuguese, Catalan, Italian, French and Romanian), created using annotation projection. The annotation projection methodology is described in Section 3.2, as well as in the original paper <ref type="bibr" target="#b17">[18]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.">DrugTEMIST</head><p>DrugTEMIST is a collection of 1,000 clinical case reports from various clinical specialties annotated with mentions of medications. Figure <ref type="figure" target="#fig_1">2</ref> shows an excerpt of an annotated document from the corpus.</p><p>The corpus uses the same collection of texts as DisTEMIST, which is also shared by MedProcN-ER/ProcTEMIST <ref type="bibr" target="#b18">[19]</ref> and SympTEMIST <ref type="bibr" target="#b19">[20]</ref>. Unlike those corpora, DrugTEMIST hadn't been previously released and is one of the novelties of the MultiCardioNER task. Again, the corpus includes a total of 406,137 tokens and 2,335,968 characters, as well as 2,778 annotated entities (925 unique after converting them to lowercase).</p><p>Similarly to the DisTEMIST corpus, dedicated annotation guidelines were written to define what should be considered a medication and how to perform the annotations. These guidelines were created and refined using the same methodology used for DisTEMIST, including thorough discussions between physicians and the annotation of a sample of the corpus (around 20%). The final IAA of the corpus is 0.955. The DrugTEMIST annotation guidelines are also available in Zenodo <ref type="foot" target="#foot_5">6</ref> . They contain 17 pages and are quite similar to the DisTEMIST guidelines, with a total of 29 rules. The release format of the corpus is the same as that of DisTEMIST. The original Gold Standard of the corpus was created in Spanish. For the multilingual part of the task, we created versions of the corpus in English and Italian using annotation projection techniques. These two languages were chosen due to their relevance for other related projects and the availability of clinical experts fluent in each language, who performed a manual revision of all documents to validate the annotation and the quality of the translation. Specifically, the annotation projection methodology consisted of the following steps:</p><p>1. An automatic translation of the Spanish documents was carried out (for the previous DisTEMIST task) using high-quality commercial machine translation systems. In a separate step, the Gold Standard annotations were translated without context (i.e. as a plain list of strings). 2. The translated annotations were next transferred into each document using a look-up system.</p><p>For each document, only the annotations that existed in the original Gold Standard were looked up to prevent introducing false positives. The result of this step is an automatically annotated version of the corpus in each language, which could be considered a Silver Standard. 3. In order for the corpus to be used as a Gold Standard, a manual revision was performed. Experts compared the original Spanish version of the documents with the version in English and Italian using brat's side-by-side comparison mode. They were tasked with correcting existing and adding new mentions if necessary to make the annotation as close as possible to the original. Additionally, these experts were asked to provide alternative translations to annotated entities that were incorrectly translated. 4. A post-processing step incorporated the alternative translations suggested by the annotators.</p><p>These translations replaced the original annotated entity both in the text and in the annotation files.</p><p>Statistics about the English and Italian versions of DrugTEMIST are also provided in Table <ref type="table">2</ref>. We should underscore that the different versions of the corpus do not contain the exact same number of annotations. This is mostly due to translation differences and errors introduced by the machine translation system.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3.">CardioCCC</head><p>CardioCCC (which stands for Cardiology Clinical Case Corpus) is a collection of 508 cardiology clinical case reports. The documents were retrieved from open-access cardiology journals in Spanish. Within these journals, we tried to manually locate clinical case reports that would have a similar structure to real clinical health records. The candidates were then extracted and, in a similar fashion to the other two corpora presented so far, pre-processed to keep only the relevant article sections and to remove references to figures and tables. The cases were then revised by a clinical expert to confirm their validity. Figure <ref type="figure" target="#fig_3">3</ref> shows a parallel example of the drug annotations in Spanish, English and Italian.  </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 3</head><p>Statistics for the two splits of the CardioCCC corpus. CardioCCC_dev refers to the first batch of the corpus, which participants were allowed to use freely during the training phase. CardioCCC_test refers to the held-out test set used for evaluation. "Annot." stands for "annotations", while "Chars" stands for "characters". Unique annotations refer to the number of distinct annotated strings after converting all annotations to lowercase. The number of tokens has been calculated using the following spaCy models: "es_core_news_sm", "en_core_web_sm" and "it_core_news_sm".</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Dataset</head><p>Lang. Entity Docs Tokens Chars Annot. Unique Annot. The corpus contains annotations for diseases and drugs, which were created following the same guidelines used for DisTEMIST and DrugTEMIST. The main annotator for CardioCCC was the same clinical expert who did the final annotation and revision step for the other two corpora, which was a big asset in accelerating the corpus annotation process. As with DrugTEMIST, the corpus's texts were translated from Spanish into English and Italian using machine translation. The Gold Standard drug annotations were also transferred into English and Italian via annotation projection and revised by clinical experts who are native speakers of each language.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Mean</head><p>As explained in Section 2.3, CardioCCC was released in two batches: one for training/development and another for evaluation. The statistics for these two parts are presented in Table <ref type="table">3</ref>, while Table <ref type="table">2</ref> presents the statistics of the complete corpus. In terms of content, as shown by Table <ref type="table">2</ref>, the corpus contains 568,297 tokens and 3,215,774 characters. Despite having about half the documents as the SpaCCC corpus (i.e. the texts in DisTEMIST/DrugTEMIST), CardioCCC contains over 150,000 more tokens and one million more characters, meaning the documents are quite longer. This is also reflected in the number of annotations, with CardioCCC having around 8,000 more annotated diseases and 1,500 more drugs. Notably, despite the higher total number of annotations, CardioCCC contains fewer unique drug mentions (which is calculated by converting all annotations to lowercase). This might be due to the fact that in CardioCCC, drug mentions are usually more limited to cardiology-specific medications, while in DrugTEMIST, there is a wider variety of medications mentioned due to the varied clinical specialties it contains. As for the length of annotations themselves, all corpora seem to have a similar distribution in terms of character and token length. The high standard deviation with respect to the mean, especially for diseases, indicates that there's a number of long annotations in the datasets.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.4.">Background set</head><p>In addition to the three annotated corpora, an additional dataset was released as a background set. This dataset contains 7,625 text documents, both from the cardiology subdomain and other clinical specialties. While most documents were originally written in Spanish, some of them were also originally in English and Italian. All documents were translated to the other languages to have a comparable background set in all three languages. Together with the background set, we release a tab-separated values (.tsv) file that specifies the original language of each document and whether they belong to the cardiology domain or not.</p><p>As part of the task's evaluation period, participants were asked to create predictions for diseases and drugs using their systems. Their predictions were then used to create a Silver Standard, which we release in three different versions:</p><p>1. All mentions are kept, with the label name reflecting the team and run the prediction belongs to.</p><p>This version inevitably includes many incorrect and redundant annotations. 2. Only predictions that have some overlap with the predictions of a different run are used. The overlapping annotations are then merged under a single annotation and a new label name. This version should have a reduced number of incorrect annotations, although some of the "correct" annotations might have extension problems, such as being too short or too long. 3. Only predictions that have a complete overlap with another prediction of a different run are used. This should, in theory, contain the highest number of correct annotations.</p><p>Table <ref type="table" target="#tab_3">4</ref> shows some basic statistics about the text documents included within the Silver Standard. This new dataset can have multiple uses, such as bootstrapping manual annotations, system training using semi-supervised learning techniques or errors and data analysis, amongst others. 4. Results</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Participation overview</head><p>A total of 31 teams registered for the MultiCardioNER task, out of which 7 teams submitted at least one run of their predictions. The participating teams originate from 8 different countries (some include collaborations between teams from different countries), and except for one group from the industry, the rest belong to academia. Table <ref type="table" target="#tab_4">5</ref> shows the complete list of participating teams, along with their affiliation and the reference to their task paper.</p><p>As for the participation in each subtrack, 6 teams participated in the CardioDis subtrack, while 5 teams participated in the MultiDrug subtrack (with one of those teams participating only in the Spanish part). Overall, a total of 70 runs were submitted, with each team allowed up to 5 runs per subtrack and language: 20 for the CardioDis subtrack, 18 for the Spanish MultiDrug, 16 for the English MultiDrug and 16 for the Italian MultiDrug.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">System results</head><p>All in all, the top scores for each subtrack were:</p><p>• Subtrack CardioDis. The team BIT.UA attained the top position with an ensemble of RoBERTabased models (roberta-es-clinical-trials-ner) that also uses a multi-head-CRF approach <ref type="bibr" target="#b34">[35]</ref>. Their runs integrated the provided datasets in different ways, with the highest scores achieved by the models that use both the DisTEMIST and CardioCCC data. Their best run achieved an F1-score of 0.8199 and a recall of 0.8243. The team with the next best F1-score (0.8049) is Enigma, which uses a CLIN-X-ES model also fine-tuned on the DisTEMIST and CardioCCC data. Interestingly, the team PICUSLab achieves the best precision (0.8886) by a wide margin by combining the predictions of multiple models trained on different parts of the data (including an augmented version of the CardioCCC corpus) and then using string matching techniques to enhance the final predictions. • Subtrack MultiDrug. In Spanish, the best F1-score is achieved by the ICUE team (0.9277), who also achieved the best recall (0.9412). Meanwhile, in English and Italian, the winning team is Enigma, with an F1-score of 0.9223 and 0.8842, respectively.</p><p>The results for the CardioDis subtrack are shown in Table <ref type="table" target="#tab_5">6</ref>, while the results for the MultiDrug subtrack are presented in Table <ref type="table" target="#tab_6">7</ref> for Spanish, Table <ref type="table" target="#tab_7">8</ref> for English and Table <ref type="table" target="#tab_8">9</ref> for Italian.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.">Methodologies</head><p>This section describes the methodologies used by each team, which are also summarized in Table <ref type="table" target="#tab_0">10</ref>. • Team BIT.UA.</p><p>For subtrack CardioDis, this team builds on some of their previous work, namely the Multi-Head-CRF approach <ref type="bibr" target="#b34">[35]</ref>, which introduces a Multi-Head Conditional Random Field (CRF) classifier on top of a multi-class NER system. Starting from the "roberta-es-clinical-trials-ner" pre-trained model<ref type="foot" target="#foot_6">7</ref> , they present 5 runs of ensembled models, with some runs consisting on models fine-tuned only with the DisTEMIST dataset and others with DisTEMIST plus CardioCCC. Their best run is an ensemble of 17 systems trained on both corpora, which achieves the highest F1-score of the subtrack (0.8199). • Team Data Science TUW.</p><p>This team uses four main strategies throughout their experiments for both subtracks: pre-training via MLM (Masked Language Modelling), data augmentation, sliding windows with overlap and additional pre-training on general diseases and drugs using other corpora. The pre-trained models they use include the multilingual mDeBERTa <ref type="bibr" target="#b35">[36,</ref><ref type="bibr" target="#b36">37]</ref>, the Spanish "roberta-es-clinical-trials-ner",</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 10</head><p>General overview of the approaches presented by participants for the MultiCardioNER task. "*TEMIST corpora" refers to the joint version of the DisTEMIST, SympTEMIST, ProcTEMIST and DrugTEMIST corpora. the English "biobert_chemical_ner"<ref type="foot" target="#foot_7">8</ref> and the Italian BioBIT <ref type="bibr" target="#b37">[38]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Team</head><p>An important note about this team's results is that they had some problems with their submission that caused the overall low results. This is addressed in their system description, in which they re-evaluate their models with much better results, comparable to some of the task's best. • Team Enigma.</p><p>For subtrack CardioDis, team Enigma fine-tuned a CLIN-X-ES model <ref type="bibr" target="#b38">[39]</ref> on the DisTEMIST and CardioCCC corpora for a different number of epochs. One of their runs further pre-trains the model using Spanish Wikipedia pages and datasets from different challenges, achieving them a spot in the subtrack's top three F1-scores (0.8049). For subtrack MultiDrug, the team uses a combination of different models, including a multilingual XLM-RoBERTa <ref type="bibr" target="#b39">[40]</ref> and language-specific models such as a Spanish RoBERTa <ref type="bibr" target="#b40">[41]</ref> (which they also use for Italian) and BioLinkBERT for English <ref type="bibr" target="#b41">[42]</ref>. Their first run, which uses the multilingual XLM-RoBERTa, pre-trains the model on a custom multi-lingual dataset (including biomedical challenge data, European drug description data, Wikipedia) and then fine-tuned for token classification on all data for all languages. For Italian, this approach achieves them the highest F1-score of the Italian part of the subtrack (0.8842).</p><p>Their second run uses the same system but adds a classifier before it, which determines if there are any drugs in the sentence. For Spanish and English, their best run is the third one, which uses a language-specific model. This is not the case, however, for their third Italian run, which uses a Spanish model pre-trained on Italian data. Another interesting contribution by this team is the combination of neural systems and drug dictionaries obtained from resources such as DrugBank, ATC, DrugCentral or the NIHS. The two runs that use this approach achieve very good results, although not as good as their other ones.</p><p>• Team ICUE.</p><p>For the MultiDrug subtrack, this team compares the effectiveness of multilingual and monolingual BERT models. They also experiment with the inclusion of post-processing rules (specifically for composite drug mentions in Spanish), as well as with using Large Language Models (LLMs) such as GPT-3.5 <ref type="bibr" target="#b42">[43]</ref> to translate predictions in Spanish to the other two languages. Their methodology achieves very good results, especially when they use monolingual models. In Spanish, they achieve the best F1-score (0.9277). It is noteworthy that some of their runs in the results table are repeated since they presented the same system with changes only for some languages.</p><p>Team ICUE also includes some additional experiments in their system description paper, such as using GPT-3.5 and LLaMA <ref type="bibr" target="#b43">[44]</ref> for entity recognition with competitive results. • Team NOVALINCS.</p><p>For CardioDis, this team fine-tunes the "bsc-bio-ehr-es" pre-trained RoBERTa<ref type="foot" target="#foot_8">9</ref> using the Dis-TEMIST corpus. They prepared two runs: one in which they only use the DisTEMIST annotations and another in which they also incorporate the other 3 entities from the complementary corpora (that is, procedures from MedProcNER/ProcTEMIST, symptoms from SympTEMIST and medications from DrugTEMIST). For MultiDrug, they only participated in the Spanish part using the same methodology, exchanging DisTEMIST with DrugTEMIST. Their overall results for both tasks are remarkable for their high precision and low recall, which may indicate the difficulty of the systems to adapt to the cardiology subdomain using only the general clinical domain data. • Team PICUSLab.</p><p>For the CardioDis subtrack, this team employs an ensemble transfer learning strategy. They train different models on DisTEMIST, CardioCCC and an augmented version of CardioCCC (created with the help of sentence similarity techniques and a gazetteer), and then fuse the predictions of the different models. To further improve their predictions, they use string matching to postprocess them. Their best run earns them a spot in the subtrack's top 5 with an F1-score of 0.791. • Team Siemens.</p><p>This team participated in both CardioDis and MultiDrug with the same methodology. They use general domain BERT models ("bert-spanish-cased-finetuned-ner" <ref type="foot" target="#foot_9">10</ref> , "bert-base-NER" <ref type="foot" target="#foot_10">11</ref> and "bert-italian-finetuned-ner" <ref type="foot" target="#foot_11">12</ref> ) and fine-tune them for multi-label token classification using the different MultiCardioNER datasets. Despite not using clinical models, their results are quite good, especially for the MultiDrug subtrack (e.g. 0.8789 F1-score in the Italian part). In their overview paper, they also perform additional experiments that were not evaluated during the task's evaluation phase.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Discussion</head><p>Comparison with previous tasks.</p><p>MultiCardioNER is a novel task built upon the foundation of previous tasks and resources. In recent years, tasks such as DisTEMIST <ref type="bibr" target="#b17">[18]</ref>, PharmaCoNER <ref type="bibr" target="#b20">[21]</ref> or MedProcNER <ref type="bibr" target="#b18">[19]</ref> have provided the Spanish NLP community with a variety of corpora for the recognition (and normalization) of named entities in clinical texts. These corpora have progressively become reference corpora used to benchmark and model pre-training efforts <ref type="bibr" target="#b38">[39,</ref><ref type="bibr" target="#b44">45,</ref><ref type="bibr" target="#b45">46,</ref><ref type="bibr" target="#b46">47,</ref><ref type="bibr" target="#b47">48,</ref><ref type="bibr" target="#b48">49]</ref>. MultiCardioNER is different from these previous tasks in that it uses data from a single clinical specialty, rather than a general medical dataset. The CardioCCC corpus could become a reference for cardiology and subdomain adaptation in clinical NLP in Spanish. The corpus is expected to expand with the addition of case reports, more entity types (such as procedures and symptoms), and more languages. Subdomain adaptation is a major goal of MultiCardioNER. The task's results indicate the importance of using subdomain data to build systems with specific application fields. All top-performing systems incorporate the released 258 documents from the CardioCCC corpus. In contrast, participants that only use the DisTEMIST and DrugTEMIST corpora (consisting of clinical case reports from various specialties) achieve high precision but fail to recall, thus obtaining a comparatively lower F1-score. This suggests that, while these systems are able to retrieve many clinical entities correctly (i.e. high precision), they fail to recover concepts specific to the cardiology subdomain (i.e. low recall). Furthermore, comparing the results of the DisTEMIST shared task <ref type="bibr" target="#b17">[18]</ref> with the CardioDis subtrack, the overall results are somewhat better in the latter task: DisTEMIST's winning team obtained an F1-score of 0.77, while the winning team of MultiCardioNER obtained an F1 of 0.81. This might point to the importance of using specialty-specific data, even within very similar clinical domains.</p><p>We should underline that compared with DisTEMIST, this task offers a higher volume of training data. While there seems to be a positive correlation with the use of subdomain-specific data, it remains a question whether these improvements can actually be attributed to subdomain adaptation, to differences in each of the tasks' test sets, or to simply having more data.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Similarity between the general domain and the cardiology corpora.</head><p>Given the task's focus on subdomain adaptation, and in order to further characterise the cardiology and SpaCCC datasets (i.e. the DisTEMIST and DrugTEMIST texts) of the shared task, a comparison analysis was conducted between these clinical case reports and documents belonging to other medical disciplines. These documents consist of a collection of clinical cases categorised into 22 different specialities with varying text structures and content, including oncology, COVID-specific reports, primary health care, neurology, etc. The data for the other specialties was extracted using the same methodology as for the CardioCCC (cardiology) corpus (explained in Section 3.3).</p><p>For the analysis, we tried to create a mathematical representation of the different document specialties and their subsequent visualisation in a two-dimensional space. To this purpose, the document embeddings were extracted using the pre-trained language model "roberta-base-biomedical-clinical-es" (RoBERTa-based and trained on a large Spanish biomedical corpus from different sources), resulting in tensors of 𝑛 × 𝑚 dimensions, where 𝑛 is the number of sentences in the document and 𝑚 is the size of the language model (768 for the RoBERTa model). Subsequently, a vector composition technique was employed to process the extracted document embeddings, as described in the work of Amigó et al. <ref type="bibr" target="#b49">[50]</ref>. This involved utilising the proposed generalised composition function in Amigó et al. <ref type="bibr" target="#b49">[50]</ref> and illustrated in Equation 1. In this expression, the first component determines the vector direction of the sum of two vectors (𝑣 1 ⃗ and 𝑣 2 ⃗ ), while the second component represents its magnitude, which depends on the norm of single vectors and their inner product. By applying this function to pairs of successive sentences in a document and representing them as vectors, we are able to compute and represent each document as a single vector (embedding).</p><p>In this study we implemented two different composition functions derived from Equation 1, the summation (𝐹 𝑠𝑢𝑚 ), obtained when the constants 𝜆 and 𝜇 are equal to 1 and −2 respectively, and 𝐹 𝑖𝑛𝑑 , a particularization of Equation 1 when 𝜆 is equals to 1 and 𝜇 to 0.</p><formula xml:id="formula_0">𝐹 𝜆,𝜇 (𝑣 1 ⃗ , 𝑣 2 ⃗ ) = 𝑣 1 ⃗ + 𝑣 2 ⃗ ‖𝑣 1 ⃗ + 𝑣 2 ⃗ ‖ • √︁ 𝜆(‖𝑣 1 ⃗ 2 ‖ + ‖𝑣 2 ⃗ 2 ‖) − 𝜇⟨𝑣 1 ⃗ , 𝑣 2 ⃗ ⟩<label>(1)</label></formula><p>Following the document vector representation and the composition function technique, we implemented a t-Distributed Stochastic Neighbour Embedding (t-SNE) algorithm with a perplexity of 30 and a maximum number of iterations of 800 to reduce the dimensionality of the document embeddings. This statistical method enables the visualisation of high-dimensional document embeddings in lower-dimensional spaces, in this case, two dimensions.</p><p>Figures <ref type="figure" target="#fig_6">4 and 5</ref> illustrate the scatter plots generated by the applied methodology, utilising the two composition functions previously mentioned, 𝐹 𝑠𝑢𝑚 and 𝐹 𝑖𝑛𝑑 respectively. Both figures reveal distinct clustering patterns depending on the specialty. Documents belonging to specific specialties form a well-defined cluster (see cardiology i.e. CardioCCC in black), highlighting the fact that each of them  possesses unique features in terms of content and structure. In contrast, documents from the SpaCCC corpus (red points) are scattered across the plot, reflecting their diverse nature. This is due to the fact that they cover a wide range of medical disciplines, such as cardiology (CardioCCC), oncology, urology, pneumology or infectious diseases, among many others.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Future work and conclusions.</head><p>There is a pressing need to promote the development of annotated datasets to generate automatic clinical concept detection tools, not only for a single language but for several languages, following comparable annotation criteria and consistent results across multiple languages. Due to the complexity and considerable workload associated with the manual corpus construction process of clinical content, the use of creative solutions such as neural translation and annotation projection strategies might provide an alternative solution to traditional corpus construction attempts. The results of the MultiCardioNER task indicate that it is feasible to create multilingual clinical corpora and use them to train and generate very competitive clinical NER systems with comparable results across several languages.</p><p>Moreover, an adaptation of clinical NLP components to specific medical specialties can improve the quality of the resulting systems for real-world scenarios. Typically clinical NLP application scenarios or use cases focus on content related to a particular medical discipline, disease or patient type. In this regard, the MultiCardioNER task also provides useful insights on how to adapt general-purpose clinical NLP systems to the characteristics of a medical specialty of interest.</p><p>We foresee that the results, resources, and strategies generated through the MultiCardioNER task (both by organizers and participants) might potentially promote also the creation of clinical NLP resources beyond the three chosen languages covered in this track. The MultiCardioNER silver standard corpus of predictions for Spanish, English and Italian could also constitute a valuable resource for data augmentation or corpus construction by manually validating the generated system predictions.</p><p>The presented annotation projection strategy obviously relies on the sufficient quality of the used medical translation systems. Therefore, systematic efforts to evaluate the quality of neural medical machine translation systems are critical. Initiatives like the Workshop on Machine Translation (WMT) Biomedical Translation shared task has provided insights on the quality and potential of neural translation technologies adapted to translate healthcare documents <ref type="bibr" target="#b50">[51,</ref><ref type="bibr" target="#b51">52]</ref>.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: Excerpt from the DisTEMIST corpus with various annotated diseases. Translation with annotated entities in italics: "A 37-year-old woman diagnosed with AML (acute myeloblastic leukemia) in 2003 following a spontaneous right hemopneumothorax that required surgery with evacuation of the hemothorax and resection of bullous dystrophy. She was followed up on an outpatient basis without incident until 2009 when he presented with chylous ascites and a large retroperitoneal cystic lymphangioma was detected on an abdominal computed tomography (CT) scan. In February 2011 she was admitted for exertional dyspnea and extensive right pleural effusion. Pleural fluid showed characteristics of chylothorax: [...]".</figDesc><graphic coords="5,72.00,294.94,451.27,121.20" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Figure 2 :</head><label>2</label><figDesc>Figure 2: Excerpt from the DrugTEMIST corpus with various annotated medications. Translation with annotated entities in italics: "An 82-year-old woman with a history of breast neoplasia treated with surgery and hormone therapy 20 years ago, hypertensive cardiomyopathy in sinus rhythm, hypercholesterolemia and moderate chronic hyponatremia around 133 mmol/L. She was treated with torasemide 5 mg/24h, isosorbide mononitrate 50 mg/24h, acetylsalicylic acid 100 mg/24h, pravastatin 20 mg/24h, candesartan 32 mg/24h, hydrochlorothiazide 12.5 mg/24h, atenolol 50 mg/24h and spironolactone 25 mg/24h".</figDesc><graphic coords="7,72.00,65.61,451.26,84.63" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>( a )</head><label>a</label><figDesc>Example in English. (b) Example in Spanish. (c) Example in Italian.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_3"><head>Figure 3 :</head><label>3</label><figDesc>Figure 3: Excerpt from the CardioCCC drug annotations in all three languages taken from the same document.</figDesc><graphic coords="8,74.49,275.44,451.29,86.02" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_4"><head></head><label></label><figDesc>models with multi-head CRF and differences in the data used for training (only DisTEMIST or DisTEMIST + CardioCCC) Data Science TUW CardioDis Transformer-based models with different pretraining settings, data augmentation and window sliding MultiDrug Multilingual and language-specific Transformers with different pretraining settings, data augmentation and window sliding Enigma CardioDis CLIN-X-ES model fine-tuned on the entire task data + custom clinical dataset MultiDrug Multilingual and language-specific Transformers fine-tuned on the entire task data + custom drug dictionary ICUE MultiDrug Multilingual and language-specific BERT models with re-training, post-processing rules + GPT 3.5 NOVALINCS CardioDis RoBERTa model fine-tuned on the standalone DisTEMIST corpus vs. joint *TEMIST corpora MultiDrug RoBERTa model fine-tuned on the standalone DrugTEMIST corpus vs. joint *TEMIST corpora PICUSLab CardioDis Ensemble of Transformer-based models trained on different datasets, including an augmented version of CardioCCC + post-processing via string matching Siemens CardioDis Fine-tuned general domain BERT model MultiDrug Fine-tuned language-specific general domain BERT models</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_5"><head>Figure 4 :</head><label>4</label><figDesc>Figure 4: Document embeddings representation per each discipline after reduction of their dimensionality to 2-dimensions by applying the t-SNE algorithm and using 𝐹 𝑠𝑢𝑚 as the composition function.</figDesc><graphic coords="16,72.00,65.61,451.27,232.34" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_6"><head>Figure 5 :</head><label>5</label><figDesc>Figure 5: Document embeddings representation per each discipline after reduction of their dimensionality to 2-dimensions by applying the t-SNE algorithm and using 𝐹 𝑖𝑛𝑑 as the composition function.</figDesc><graphic coords="16,72.00,343.34,451.27,232.34" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>Results of the baseline system (vocabulary transfer) for the two MultiCardioNER subtracks</figDesc><table><row><cell cols="2">Subtrack Language</cell><cell cols="2">System Name</cell><cell cols="2">Precision Recall</cell><cell>F1</cell></row><row><cell>CardioDis</cell><cell>Spanish</cell><cell cols="2">DisTEMIST vocabulary transfer</cell><cell>0.5178</cell><cell>0.3681 0.4303</cell></row><row><cell>MultiDrug</cell><cell>Spanish</cell><cell cols="2">DrugTEMIST vocabulary transfer</cell><cell>0.6366</cell><cell>0.7148 0.6734</cell></row><row><cell>MultiDrug</cell><cell>English</cell><cell cols="2">DrugTEMIST vocabulary transfer</cell><cell>0.3317</cell><cell>0.7269 0.4556</cell></row><row><cell>MultiDrug</cell><cell>Italian</cell><cell cols="2">DrugTEMIST vocabulary transfer</cell><cell>0.3320</cell><cell>0.6844 0.4471</cell></row><row><cell></cell><cell cols="2">Precision (P) =</cell><cell cols="2">True Positives True Positives + False Positives</cell></row><row><cell></cell><cell cols="2">Recall (R) =</cell><cell cols="2">True Positives True Positives + False Negatives</cell></row><row><cell></cell><cell cols="2">F1 score (F1) =</cell><cell>2 * (𝑃 * 𝑅) (𝑃 + 𝑅)</cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>. Entity Docs Tokens Chars Annot. Unique Annot.</head><label></label><figDesc></figDesc><table><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>Mean</cell><cell>Mean</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>Annot.</cell><cell>Annot.</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>Tokens</cell><cell>Chars</cell></row><row><cell>DisTEMIST</cell><cell cols="3">ES Diseases 1,000 406,137 2,335,968 10,664</cell><cell>6,739</cell><cell>3.20 ± 2.98 24.76 ± 18.89</cell></row><row><cell cols="2">DrugTEMIST ES</cell><cell cols="2">Drugs 1,000 406,137 2,335,968 2,778</cell><cell>925</cell><cell>1.19 ± 0.56 11.34 ± 4.46</cell></row><row><cell></cell><cell>EN</cell><cell cols="2">Drugs 1,000 404,194 2,230,631 2,814</cell><cell>875</cell><cell>1.25 ± 0.66 11.26 ± 0.52</cell></row><row><cell></cell><cell>IT</cell><cell cols="2">Drugs 1,000 421,251 2,393,002 2,808</cell><cell>893</cell><cell>1.25 ± 0.69 11.49 ± 4.73</cell></row><row><cell>CardioCCC</cell><cell cols="3">ES Diseases 508 568,297 3,215,774 18,232</cell><cell>7,692</cell><cell>3.32 ± 2.84 26.28 ± 19.06</cell></row><row><cell></cell><cell>ES</cell><cell>Drugs</cell><cell>508 568,297 3,215,774 4,227</cell><cell>755</cell><cell>1.19 ± 0.71 11.60 ± 5.25</cell></row><row><cell></cell><cell>EN</cell><cell>Drugs</cell><cell>508 576,772 3,114,833 4,231</cell><cell>734</cell><cell>1.21 ± 0.64 11.37 ± 4.74</cell></row><row><cell></cell><cell>IT</cell><cell>Drugs</cell><cell>508 595,332 3,345,466 4,385</cell><cell>752</cell><cell>1.23 ± 0.72 11.85 ± 5.25</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Table 4</head><label>4</label><figDesc>Statistics of the documents in the background set.</figDesc><table><row><cell cols="4">Language Documents Tokens Characters</cell></row><row><cell>Spanish</cell><cell>7,625</cell><cell>3,863,801</cell><cell>22,066,533</cell></row><row><cell>English</cell><cell>7,625</cell><cell>3,857,831</cell><cell>21,130,044</cell></row><row><cell>Italian</cell><cell>7,625</cell><cell>4,015,920</cell><cell>22,782,246</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_4"><head>Table 5</head><label>5</label><figDesc>Overview of the teams that participated in MultiCardioNER. In the Affiliation column, A/I stands for academic or industry institution. In the Tasks column, C stands for the CardioDis subtrack and M for the MultiDrug subtrack.</figDesc><table><row><cell>Team Name</cell><cell>Affiliation</cell><cell cols="2">Tasks Ref.</cell></row><row><cell>BIT.UA</cell><cell>IEETA, University of Aveiro, Portugal [A]</cell><cell>C</cell><cell>[28]</cell></row><row><cell></cell><cell>Technische Universität Wien, Austria &amp; Spanish National Research</cell><cell></cell><cell></cell></row><row><cell>DataScienceTUW</cell><cell></cell><cell>C/M</cell><cell>[29]</cell></row><row><cell></cell><cell>Council (CSIC), Spain [A]</cell><cell></cell><cell></cell></row><row><cell>Enigma</cell><cell>OntoText, Bulgary &amp; Sofia University, Bulgary [I/A]</cell><cell>C/M</cell><cell>[30]</cell></row><row><cell>ICUE</cell><cell>University of Edinburgh, UK &amp; Imperial College London, UK [A]</cell><cell>M</cell><cell>[31]</cell></row><row><cell>NOVALINCS</cell><cell>NOVA School of Science And Technology, Portugal [A]</cell><cell>C/M</cell><cell>[32]</cell></row><row><cell>PICUSLab</cell><cell>Università degli Studi di Napoli Federico II, Italy [A]</cell><cell>C</cell><cell>[33]</cell></row><row><cell></cell><cell>Siemens Advanta, Romania &amp; Transilvania University of Brasov,</cell><cell></cell><cell></cell></row><row><cell>Siemens</cell><cell></cell><cell>C/M</cell><cell>[34]</cell></row><row><cell></cell><cell>Romania [I/A]</cell><cell></cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_5"><head>Table 6</head><label>6</label><figDesc>Results of the MultiCardioNER CardioDis subtrack, sorted by F1-score. The best result is bolded, and the second-best is underlined.</figDesc><table><row><cell>Team Name</cell><cell>Run name</cell><cell>Precision</cell><cell>Recall</cell><cell>F1</cell></row><row><cell>BIT.UA</cell><cell>run1-all-full</cell><cell>0.8155</cell><cell>0.8243</cell><cell>0.8199</cell></row><row><cell>BIT.UA</cell><cell>run0-top5-full</cell><cell>0.811</cell><cell>0.8181</cell><cell>0.8145</cell></row><row><cell>Enigma</cell><cell>3-system-CLIN-X-ES-pretrained</cell><cell>0.8016</cell><cell>0.8082</cell><cell>0.8049</cell></row><row><cell>Enigma</cell><cell>2-system-CLIN-X-ES-14</cell><cell>0.8052</cell><cell>0.8007</cell><cell>0.803</cell></row><row><cell>PICUSLab</cell><cell>aug_fus_sub2</cell><cell>0.7794</cell><cell>0.803</cell><cell>0.791</cell></row><row><cell>BIT.UA</cell><cell>run4-all</cell><cell>0.7981</cell><cell>0.7827</cell><cell>0.7903</cell></row><row><cell>Enigma</cell><cell>1-system-CLIN-X-ES-12</cell><cell>0.7827</cell><cell>0.7938</cell><cell>0.7882</cell></row><row><cell>PICUSLab</cell><cell>aug_fus_sub1</cell><cell>0.7346</cell><cell>0.7799</cell><cell>0.7566</cell></row><row><cell>BIT.UA</cell><cell>run3-all-val</cell><cell>0.7544</cell><cell>0.7588</cell><cell>0.7566</cell></row><row><cell>BIT.UA</cell><cell>run2-best-val</cell><cell>0.748</cell><cell>0.7542</cell><cell>0.7511</cell></row><row><cell>DataScienceTUW</cell><cell>run4-roberta-dg</cell><cell>0.6565</cell><cell>0.7376</cell><cell>0.6947</cell></row><row><cell>DataScienceTUW</cell><cell>run5-roberta-dg-windows</cell><cell>0.6546</cell><cell>0.7244</cell><cell>0.6877</cell></row><row><cell>Siemens</cell><cell>run1_SDR</cell><cell>0.6758</cell><cell>0.6437</cell><cell>0.6593</cell></row><row><cell>PICUSLab</cell><cell>aug_fus_sm_sub2</cell><cell>0.8919</cell><cell>0.4897</cell><cell>0.6323</cell></row><row><cell>DataScienceTUW</cell><cell>run1_mdeberta-ct-mlm-dg</cell><cell>0.5928</cell><cell>0.6715</cell><cell>0.6297</cell></row><row><cell>PICUSLab</cell><cell>aug_fus_sm_sub1</cell><cell>0.8886</cell><cell>0.4744</cell><cell>0.6185</cell></row><row><cell>DataScienceTUW</cell><cell>run2-mdeberta-ct</cell><cell>0.5027</cell><cell>0.6884</cell><cell>0.581</cell></row><row><cell>DataScienceTUW</cell><cell>run3_mdeberta-ct-dg</cell><cell>0.48</cell><cell>0.6773</cell><cell>0.5618</cell></row><row><cell>NOVALINCS</cell><cell>1_bsc-bio-ehr-es_distemist_4</cell><cell>0.8018</cell><cell>0.3525</cell><cell>0.4897</cell></row><row><cell>NOVALINCS</cell><cell>2_bsc-bio-ehr-es_distemist_1</cell><cell>0.8183</cell><cell>0.3398</cell><cell>0.4802</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_6"><head>Table 7</head><label>7</label><figDesc>Results of the MultiCardioNER MultiDrug subtrack in Spanish, sorted by F1-score. The best result is bolded, and the second-best is underlined.</figDesc><table><row><cell>Team Name</cell><cell>Run name</cell><cell>Precision</cell><cell>Recall</cell><cell>F1</cell></row><row><cell>ICUE</cell><cell>run2_single_pp</cell><cell>0.9146</cell><cell>0.9412</cell><cell>0.9277</cell></row><row><cell>ICUE</cell><cell>run4_GPT_translation</cell><cell>0.9146</cell><cell>0.9412</cell><cell>0.9277</cell></row><row><cell>ICUE</cell><cell>run5_GPT_translation_all</cell><cell>0.9146</cell><cell>0.9412</cell><cell>0.9277</cell></row><row><cell>Enigma</cell><cell>3-system-SpanishRoBERTa</cell><cell>0.913</cell><cell>0.9348</cell><cell>0.9238</cell></row><row><cell>Enigma</cell><cell>1-system-XLMR</cell><cell>0.904</cell><cell>0.9208</cell><cell>0.9123</cell></row><row><cell>Enigma</cell><cell>2-system-XLMR-filtering</cell><cell>0.9148</cell><cell>0.9005</cell><cell>0.9076</cell></row><row><cell>ICUE</cell><cell>run3_single</cell><cell>0.8777</cell><cell>0.9272</cell><cell>0.9018</cell></row><row><cell>Siemens</cell><cell>run1_SMR</cell><cell>0.8928</cell><cell>0.8778</cell><cell>0.8852</cell></row><row><cell>ICUE</cell><cell>run1_multilingual_pp</cell><cell>0.8287</cell><cell>0.9348</cell><cell>0.8786</cell></row><row><cell>Enigma</cell><cell>5-system-XLMR-filtering-dict2</cell><cell>0.7654</cell><cell>0.8871</cell><cell>0.8218</cell></row><row><cell>NOVALINCS</cell><cell>3_bsc-bio-ehr-es_drugtemist_4</cell><cell>0.9242</cell><cell>0.4965</cell><cell>0.646</cell></row><row><cell>NOVALINCS</cell><cell>4_bsc-bio-ehr-es_drugtemist_1</cell><cell>0.9076</cell><cell>0.4919</cell><cell>0.638</cell></row><row><cell>DataScienceTUW</cell><cell>run3_roberta-ct-multilingual</cell><cell>0.8705</cell><cell>0.4342</cell><cell>0.5794</cell></row><row><cell>Enigma</cell><cell>4-system-XLMR-filtering-dict1</cell><cell>0.4351</cell><cell>0.7899</cell><cell>0.5611</cell></row><row><cell>DataScienceTUW</cell><cell>run5_roberta-ct-mlm</cell><cell>0.8421</cell><cell>0.3912</cell><cell>0.5342</cell></row><row><cell>DataScienceTUW</cell><cell>run4_mdeberta_ct_mlm_dg</cell><cell>0.6815</cell><cell>0.3836</cell><cell>0.4909</cell></row><row><cell>DataScienceTUW</cell><cell>run2_mdeberta-ct-multilingual</cell><cell>0.7647</cell><cell>0.3556</cell><cell>0.4855</cell></row><row><cell>DataScienceTUW</cell><cell>run1_mdeberta-multilingual</cell><cell>0.3914</cell><cell>0.1531</cell><cell>0.2201</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_7"><head>Table 8</head><label>8</label><figDesc>Results of the MultiCardioNER MultiDrug subtrack in English, sorted by F1-score. The best result is bolded, and the second-best is underlined.</figDesc><table><row><cell>Team Name</cell><cell>Run name</cell><cell>Precision</cell><cell>Recall</cell><cell>F1</cell></row><row><cell>Enigma</cell><cell>3-system-BioLinkBERT</cell><cell>0.8981</cell><cell>0.9477</cell><cell>0.9223</cell></row><row><cell>ICUE</cell><cell>run2_single_pp</cell><cell>0.9086</cell><cell>0.9128</cell><cell>0.9107</cell></row><row><cell>ICUE</cell><cell>run4_GPT_translation</cell><cell>0.9086</cell><cell>0.9128</cell><cell>0.9107</cell></row><row><cell>Enigma</cell><cell>1-system-XLMR</cell><cell>0.8823</cell><cell>0.9233</cell><cell>0.9023</cell></row><row><cell>Enigma</cell><cell>2-system-XLMR-filtering</cell><cell>0.9031</cell><cell>0.8989</cell><cell>0.901</cell></row><row><cell>Enigma</cell><cell>5-system-XLMR-filtering-dict2</cell><cell>0.8698</cell><cell>0.9047</cell><cell>0.8869</cell></row><row><cell>ICUE</cell><cell>run3_single</cell><cell>0.8734</cell><cell>0.8977</cell><cell>0.8854</cell></row><row><cell>ICUE</cell><cell>run1_multilingual_pp</cell><cell>0.8314</cell><cell>0.9343</cell><cell>0.8799</cell></row><row><cell>Siemens</cell><cell>run1_EMR</cell><cell>0.8685</cell><cell>0.8791</cell><cell>0.8738</cell></row><row><cell>Enigma</cell><cell>4-system-XLMR-filtering-dict1</cell><cell>0.8298</cell><cell>0.921</cell><cell>0.873</cell></row><row><cell>ICUE</cell><cell>run5_GPT_translation_all</cell><cell>0.8767</cell><cell>0.8635</cell><cell>0.87</cell></row><row><cell>DataScienceTUW</cell><cell>run3_roberta-ct-multilingual</cell><cell>0.8632</cell><cell>0.4364</cell><cell>0.5797</cell></row><row><cell>DataScienceTUW</cell><cell>run4-mdeberta-windows</cell><cell>0.7955</cell><cell>0.4317</cell><cell>0.5597</cell></row><row><cell>DataScienceTUW</cell><cell>run5-biobert-mlm-windows</cell><cell>0.6771</cell><cell>0.441</cell><cell>0.5341</cell></row><row><cell>DataScienceTUW</cell><cell>run2_mdeberta-ct-multilingual</cell><cell>0.8453</cell><cell>0.3777</cell><cell>0.5221</cell></row><row><cell>DataScienceTUW</cell><cell>run1_mdeberta-multilingual</cell><cell>0.5648</cell><cell>0.2481</cell><cell>0.3448</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_8"><head>Table 9</head><label>9</label><figDesc>Results of the MultiCardioNER MultiDrug subtrack in Italian, sorted by F1-score. The best result is bolded, and the second-best is underlined.</figDesc><table><row><cell>Team Name</cell><cell>Run name</cell><cell>Precision</cell><cell>Recall</cell><cell>F1</cell></row><row><cell>Enigma</cell><cell>1-system-XLMR</cell><cell>0.884</cell><cell>0.8844</cell><cell>0.8842</cell></row><row><cell>Enigma</cell><cell>3-system-Italian-Spanish-RoBERTa</cell><cell>0.8723</cell><cell>0.8956</cell><cell>0.8838</cell></row><row><cell>Enigma</cell><cell>2-system-XLMR-filtering</cell><cell>0.9016</cell><cell>0.8606</cell><cell>0.8806</cell></row><row><cell>Siemens</cell><cell>run1_IMR</cell><cell>0.8891</cell><cell>0.8689</cell><cell>0.8789</cell></row><row><cell>ICUE</cell><cell>run4_GPT_translation</cell><cell>0.9114</cell><cell>0.8461</cell><cell>0.8776</cell></row><row><cell>ICUE</cell><cell>run5_GPT_translation_all</cell><cell>0.9114</cell><cell>0.8461</cell><cell>0.8776</cell></row><row><cell>ICUE</cell><cell>run2_single_pp</cell><cell>0.8186</cell><cell>0.9</cell><cell>0.8574</cell></row><row><cell>ICUE</cell><cell>run1_multilingual_pp</cell><cell>0.8139</cell><cell>0.8867</cell><cell>0.8487</cell></row><row><cell>ICUE</cell><cell>run3_single</cell><cell>0.7879</cell><cell>0.8894</cell><cell>0.8356</cell></row><row><cell>Enigma</cell><cell>4-system-XLMR-filtering-dict1</cell><cell>0.5693</cell><cell>0.8578</cell><cell>0.6844</cell></row><row><cell>Enigma</cell><cell>5-system-XLMR-filtering-dict2</cell><cell>0.5707</cell><cell>0.845</cell><cell>0.6813</cell></row><row><cell>DataScienceTUW</cell><cell>run3_roberta-ct-multilingual</cell><cell>0.8264</cell><cell>0.4206</cell><cell>0.5574</cell></row><row><cell>DataScienceTUW</cell><cell>run4-mdeberta</cell><cell>0.7481</cell><cell>0.3928</cell><cell>0.5151</cell></row><row><cell>DataScienceTUW</cell><cell>run5-biobit-mlm</cell><cell>0.7922</cell><cell>0.3517</cell><cell>0.4871</cell></row><row><cell>DataScienceTUW</cell><cell>run2_mdeberta-ct-multilingual</cell><cell>0.7433</cell><cell>0.3394</cell><cell>0.4661</cell></row><row><cell>DataScienceTUW</cell><cell>run1_mdeberta-multilingual</cell><cell>0.5074</cell><cell>0.2094</cell><cell>0.2965</cell></row></table></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="1" xml:id="foot_0">https://github.com/nlp4bia-bsc/multicardioner_evaluation_library</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_1">https://zenodo.org/doi/10.5281/zenodo.10948354</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_2">http://www.scielo.org</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_3">https://zenodo.org/doi/10.5281/zenodo.6458078</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="5" xml:id="foot_4">https://zenodo.org/doi/10.5281/zenodo.6408476</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="6" xml:id="foot_5">https://zenodo.org/doi/10.5281/zenodo.11065432</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="7" xml:id="foot_6">https://huggingface.co/lcampillos/roberta-es-clinical-trials-ner</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="8" xml:id="foot_7">https://huggingface.co/alvaroalon2/biobert_chemical_ner</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="9" xml:id="foot_8">https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="10" xml:id="foot_9">https://huggingface.co/mrm8488/bert-spanish-cased-finetuned-ner</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="11" xml:id="foot_10">https://huggingface.co/dslim/bert-base-NER</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="12" xml:id="foot_11">https://huggingface.co/nickprock/bert-italian-finetuned-ner</note>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgments</head><p>The MultiCardioNER track was funded by Spanish and European projects such as DataTools4Heart (Grant Agreement No. 101057849), AI4HF (Grant Agreement No. 101080430), BARITONE (Proyectos de Transición Ecológica y Transición Digital 2021. Expediente Nº TED2021-129974B-C21) and AI4ProfHealth (PID2020-119266RA-I00 MICIU/AEI/10.13039/501100011033).</p><p>Google was a proud sponsor of the BioASQ Challenge in 2023. Ovid is also sponsoring this edition of BioASQ. The twelfth edition of BioASQ is also sponsored by Elsevier. Atypon Systems Inc. is also sponsoring this edition of BioASQ.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Natural language processing for cardiovascular applications</title>
		<author>
			<persName><forename type="first">A</forename><surname>Tariq</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Santos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Banerjee</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Artificial Intelligence in Cardiothoracic Imaging</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2022">2022</date>
			<biblScope unit="page" from="231" to="243" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Multiscale classification of heart failure phenotypes by unsupervised clustering of unstructured electronic medical record data</title>
		<author>
			<persName><forename type="first">T</forename><surname>Nagamine</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Gillette</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Pakhomov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Kahoun</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Mayer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Burghaus</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Lippert</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Saxena</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Scientific reports</title>
		<imprint>
			<biblScope unit="volume">10</biblScope>
			<biblScope unit="page">21340</biblScope>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Systematic review of current natural language processing methods and applications in cardiology</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">R</forename><surname>Turchioe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Volodarskiy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Pathak</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">N</forename><surname>Wright</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">E</forename><surname>Tcheng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Slotwiner</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Heart</title>
		<imprint>
			<biblScope unit="volume">108</biblScope>
			<biblScope unit="page" from="909" to="916" />
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Artificial intelligence: revolutionizing cardiology with large language models</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">J</forename><surname>Boonstra</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Weissenbacher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">H</forename><surname>Moore</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Gonzalez-Hernandez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">W</forename><surname>Asselbergs</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">European Heart Journal</title>
		<imprint>
			<biblScope unit="volume">45</biblScope>
			<biblScope unit="page" from="332" to="345" />
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Prevalence of heart failure signs and symptoms in a large primary care population identified through the use of text and data mining of the electronic health record</title>
		<author>
			<persName><forename type="first">R</forename><surname>Vijayakrishnan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">R</forename><surname>Steinhubl</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Ng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Sun</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">J</forename><surname>Byrd</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Daar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">A</forename><surname>Williams</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Defilippi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Ebadollahi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><forename type="middle">F</forename><surname>Stewart</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of cardiac failure</title>
		<imprint>
			<biblScope unit="volume">20</biblScope>
			<biblScope unit="page" from="459" to="464" />
			<date type="published" when="2014">2014</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Identifying heart failure symptoms and poor self-management in home healthcare: a natural language processing study</title>
		<author>
			<persName><forename type="first">S</forename><surname>Chae</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Song</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Ojo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Topaz</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Nurses and Midwives in the Digital Age</title>
				<imprint>
			<publisher>IOS Press</publisher>
			<date type="published" when="2021">2021</date>
			<biblScope unit="page" from="15" to="19" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Cohort design and natural language processing to reduce bias in electronic health records research</title>
		<author>
			<persName><forename type="first">S</forename><surname>Khurshid</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Reeder</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">X</forename><surname>Harrington</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Singh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Sarma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">F</forename><surname>Friedman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Di Achille</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Diamant</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">W</forename><surname>Cunningham</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">C</forename><surname>Turner</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Npj Digital Medicine</title>
		<imprint>
			<biblScope unit="volume">5</biblScope>
			<biblScope unit="page">47</biblScope>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Unlocking echocardiogram measurements for heart disease research through natural language processing</title>
		<author>
			<persName><forename type="first">O</forename><forename type="middle">V</forename><surname>Patterson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">S</forename><surname>Freiberg</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Skanderson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">J</forename><surname>Fodeh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">A</forename><surname>Brandt</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">L</forename><surname>Duvall</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">BMC cardiovascular disorders</title>
		<imprint>
			<biblScope unit="volume">17</biblScope>
			<biblScope unit="page" from="1" to="11" />
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Natural language processing for the assessment of cardiovascular disease comorbidities: The cardio-canary comorbidity project</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">N</forename><surname>Berman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">W</forename><surname>Biery</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Ginder</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><forename type="middle">L</forename><surname>Hulme</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Marcusa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Leiva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><forename type="middle">Y</forename><surname>Wu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Cardin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Hainer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">L</forename><surname>Bhatt</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Clinical Cardiology</title>
		<imprint>
			<biblScope unit="volume">44</biblScope>
			<biblScope unit="page" from="1296" to="1304" />
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Information extraction from electronic health records to predict readmission following acute myocardial infarction: does natural language processing using clinical notes improve prediction of readmission?</title>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">R</forename><surname>Brown</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><forename type="middle">M</forename><surname>Ricket</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">M</forename><surname>Reeves</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">U</forename><surname>Shah</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">A</forename><surname>Goodrich</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Gobbel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">E</forename><surname>Stabler</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">M</forename><surname>Perkins</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Minter</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">C</forename><surname>Cox</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of the American Heart Association</title>
		<imprint>
			<biblScope unit="volume">11</biblScope>
			<biblScope unit="page">e024198</biblScope>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Structuring clinical text with ai: Old versus new natural language processing techniques evaluated on eight common cardiovascular diseases</title>
		<author>
			<persName><forename type="first">X</forename><surname>Zhan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Humbert-Droz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Mukherjee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Gevaert</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Patterns</title>
		<imprint>
			<biblScope unit="volume">2</biblScope>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Applications of natural language processing in cardiology using text clinical data: A systematic review</title>
		<author>
			<persName><forename type="first">H</forename><forename type="middle">A</forename><surname>Alhakimi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">E</forename><surname>Magzoub</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Advances in Clinical and Experimental Medicine</title>
		<imprint>
			<biblScope unit="volume">10</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Automatic methods to extract new york heart association classification from clinical notes</title>
		<author>
			<persName><forename type="first">R</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Ma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Shanahan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Munroe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Horn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Speedie</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">2017 ieee international conference on bioinformatics and biomedicine (bibm)</title>
				<imprint>
			<publisher>IEEE</publisher>
			<date type="published" when="2017">2017</date>
			<biblScope unit="page" from="1296" to="1299" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Discovering and identifying new york heart association classification from electronic health records</title>
		<author>
			<persName><forename type="first">R</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Ma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Shanahan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Munroe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Horn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Speedie</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">BMC medical informatics and decision making</title>
		<imprint>
			<biblScope unit="volume">18</biblScope>
			<biblScope unit="page" from="5" to="13" />
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<monogr>
		<author>
			<persName><forename type="first">P</forename><surname>Adejumo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Thangaraj</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">S</forename><surname>Dhingra</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Aminorroaya</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Zhou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Brandt</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Xu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><forename type="middle">M</forename><surname>Krumholz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Khera</surname></persName>
		</author>
		<title level="m">A deep learning approach for automated extraction of functional status and new york heart association class for heart failure patients during clinical encounters</title>
				<imprint>
			<publisher>medRxiv</publisher>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Domain adaptation: challenges, methods, datasets, and applications</title>
		<author>
			<persName><forename type="first">P</forename><surname>Singhal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Walambe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Ramanna</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Kotecha</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE access</title>
		<imprint>
			<biblScope unit="volume">11</biblScope>
			<biblScope unit="page" from="6973" to="7020" />
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">A review of recent work in transfer learning and domain adaptation for natural language processing of electronic health records</title>
		<author>
			<persName><forename type="first">E</forename><surname>Laparra</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Mascio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Velupillai</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Miller</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Yearbook of medical informatics</title>
		<imprint>
			<biblScope unit="volume">30</biblScope>
			<biblScope unit="page" from="239" to="244" />
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<monogr>
		<author>
			<persName><forename type="first">A</forename><surname>Miranda-Escalada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Gascó</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Lima-López</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Farré-Maduell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Estrada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Nentidis</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Krithara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Katsimpras</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Paliouras</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krallinger</surname></persName>
		</author>
		<title level="m">Overview of DisTEMIST at BioASQ: Automatic detection and normalization of diseases from clinical texts: results, methods, evaluation and multilingual resources</title>
				<imprint>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">Overview of medprocner task on medical procedure detection and entity linking at bioasq</title>
		<author>
			<persName><forename type="first">S</forename><surname>Lima-López</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Farré-Maduell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Gascó</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Nentidis</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Krithara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Katsimpras</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Paliouras</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krallinger</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Working Notes of CLEF 2023</title>
				<imprint>
			<date type="published" when="2023">2023. 2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">Overview of SympTEMIST at BioCreative VIII: Corpus, Guidelines and Evaluation of Systems for the Detection and Normalization of Symptoms, Signs and Findings from Text</title>
		<author>
			<persName><forename type="first">S</forename><surname>Lima-López</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Farré-Maduell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Gasco-Sánchez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Rodríguez-Miret</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krallinger</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the BioCreative VIII Challenge and Workshop: Curation and Evaluation in the era of Generative Models</title>
				<meeting>the BioCreative VIII Challenge and Workshop: Curation and Evaluation in the era of Generative Models</meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">Pharmaconer: Pharmacological substances, compounds and proteins named entity recognition track</title>
		<author>
			<persName><forename type="first">A</forename><surname>Gonzalez-Agirre</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Marimon</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Intxaurrondo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Rabal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Villegas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krallinger</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of The 5th Workshop on BioNLP Open Shared Tasks</title>
				<meeting>The 5th Workshop on BioNLP Open Shared Tasks</meeting>
		<imprint>
			<date type="published" when="2019">2019</date>
			<biblScope unit="page" from="1" to="10" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<analytic>
		<title level="a" type="main">Mention detection, normalization &amp; classification of species, pathogens, humans and food in clinical documents: Overview of livingner shared task and resources</title>
		<author>
			<persName><forename type="first">A</forename><surname>Miranda-Escalada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Farré-Maduell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Lima-López</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Estrada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Gascó</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krallinger</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Procesamiento del Lenguaje Natural</title>
		<imprint>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">Nlp applied to occupational health: Meddoprof shared task at iberlef 2021 on automatic recognition, classification and normalization of professions and occupations from medical texts</title>
		<author>
			<persName><forename type="first">S</forename><surname>Lima-López</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Farré-Maduell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Miranda-Escalada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Brivá-Iglesias</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krallinger</surname></persName>
		</author>
		<ptr target="http://journal.sepln.org/sepln/ojs/ojs/index.php/pln/article/view/6393" />
	</analytic>
	<monogr>
		<title level="j">Procesamiento del Lenguaje Natural</title>
		<imprint>
			<biblScope unit="volume">67</biblScope>
			<biblScope unit="page" from="243" to="256" />
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<analytic>
		<title level="a" type="main">MEDDOPLACE Shared Task overview: recognition, normalization and classification of locations and patient movement in clinical texts</title>
		<author>
			<persName><forename type="first">S</forename><surname>Lima-López</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Farré-Maduell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Brivá-Escalada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Gascó</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krallinger</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Procesamiento del Lenguaje Natural</title>
		<imprint>
			<biblScope unit="volume">71</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b24">
	<analytic>
		<title level="a" type="main">Brat: a web-based tool for nlp-assisted text annotation</title>
		<author>
			<persName><forename type="first">P</forename><surname>Stenetorp</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Pyysalo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Topić</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Ohta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Ananiadou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Tsujii</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Demonstrations at the 13th Conference of the European Chapter of the Association for Computational Linguistics</title>
				<meeting>the Demonstrations at the 13th Conference of the European Chapter of the Association for Computational Linguistics</meeting>
		<imprint>
			<date type="published" when="2012">2012</date>
			<biblScope unit="page" from="102" to="107" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b25">
	<analytic>
		<title level="a" type="main">Named entity recognition, concept normalization and clinical coding: Overview of the cantemist track for cancer text mining in spanish, corpus, guidelines, methods and results</title>
		<author>
			<persName><forename type="first">A</forename><surname>Miranda-Escalada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Farré-Maduell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krallinger</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Iberian Languages Evaluation Forum (IberLEF 2020)</title>
				<meeting>the Iberian Languages Evaluation Forum (IberLEF 2020)</meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
	<note>CEUR Workshop Proceedings</note>
</biblStruct>

<biblStruct xml:id="b26">
	<analytic>
		<title level="a" type="main">The SocialDisNER shared task on detection of disease mentions in health-relevant content from social media: methods, evaluation, guidelines and corpora</title>
		<author>
			<persName><forename type="first">L</forename><surname>Gasco Sánchez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">Estrada</forename><surname>Zavala</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Farré-Maduell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Lima-López</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Miranda-Escalada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krallinger</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/2022.smm4h-1.48" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of The Seventh Workshop on Social Media Mining for Health Applications, Workshop &amp; Shared Task, Association for Computational Linguistics</title>
				<meeting>The Seventh Workshop on Social Media Mining for Health Applications, Workshop &amp; Shared Task, Association for Computational Linguistics<address><addrLine>Gyeongju, Republic of Korea</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2022">2022</date>
			<biblScope unit="page" from="182" to="189" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b27">
	<analytic>
		<title level="a" type="main">UA at MultiCardioNER: Adapting a Multi-head CRF for Cardiology</title>
		<author>
			<persName><forename type="first">R</forename><surname>Jonker</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Almeida</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Matos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Bit</forename></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF Working Notes</title>
				<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Galuščáková</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>García Seco De Herrera</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b28">
	<analytic>
		<title level="a" type="main">Cross-Linguistic Disease and Drug Detection in Cardiology Clinical Texts: Methods and Outcomes</title>
		<author>
			<persName><forename type="first">P</forename><surname>Styll</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Campillos-Llanos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Kusa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Hanbury</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF Working Notes</title>
				<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Galuščáková</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>García Seco De Herrera</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b29">
	<analytic>
		<title level="a" type="main">Transformer-Based Disease and Drug Named Entity Recognition in Multilingual Clinical Texts: MultiCardioNER challenge</title>
		<author>
			<persName><forename type="first">A</forename><surname>Aksenova</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Datseris</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Vassileva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Boytcheva</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF Working Notes</title>
				<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Galuščáková</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>García Seco De Herrera</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b30">
	<analytic>
		<title level="a" type="main">Comparative Analyses of Multilingual Drug Entity Recognition Systems for Clinical Case Reports In Cardiology</title>
		<author>
			<persName><forename type="first">C</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">I</forename><surname>Simpson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">M</forename><surname>Posma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">D</forename><surname>Lain</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF Working Notes</title>
				<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Galuščáková</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>García Seco De Herrera</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b31">
	<analytic>
		<title level="a" type="main">Team NOVA LINCS @ BIOASQ12 MultiCardioNER Track: Entity Recognition with Additional Entity Types</title>
		<author>
			<persName><forename type="first">R</forename><surname>Gonçalves</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Lamúrias</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF Working Notes</title>
				<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Galuščáková</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>García Seco De Herrera</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b32">
	<analytic>
		<title level="a" type="main">Identifying Cardiological Disorders in Spanish via Data Augmentation and Fine-Tuned Language Models</title>
		<author>
			<persName><forename type="first">A</forename><surname>Romano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Riccio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Postiglione</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Moscato</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF Working Notes</title>
				<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Galuščáková</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>García Seco De Herrera</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b33">
	<analytic>
		<title level="a" type="main">Multilingual Clinical NER for Diseases and Medications Recognition in Cardiology Texts using BERT Embeddings</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">D</forename><surname>Danu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><forename type="middle">G</forename><surname>Marica</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Suciu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">M</forename><surname>Itu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Farri</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF Working Notes</title>
				<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Galuščáková</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>García Seco De Herrera</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b34">
	<analytic>
		<title level="a" type="main">Multi-head CRF classifier for biomedical multi-class named entity recognition on Spanish clinical notes</title>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">A A</forename><surname>Jonker</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Almeida</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Antunes</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">R</forename><surname>Almeida</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Matos</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Database</title>
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b35">
	<analytic>
		<title level="a" type="main">Deberta: Decoding-enhanced bert with disentangled attention</title>
		<author>
			<persName><forename type="first">P</forename><surname>He</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Gao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Chen</surname></persName>
		</author>
		<ptr target="https://openreview.net/forum?id=XPZIaotutsD" />
	</analytic>
	<monogr>
		<title level="m">International Conference on Learning Representations</title>
				<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b36">
	<monogr>
		<author>
			<persName><forename type="first">P</forename><surname>He</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Gao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Chen</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2111.09543</idno>
		<title level="m">DeBERTaV3: Improving DeBERTa using ELECTRA-Style Pre-Training with Gradient-Disentangled Embedding Sharing</title>
				<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b37">
	<analytic>
		<title level="a" type="main">Localizing in-domain adaptation of transformer-based biomedical language models</title>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">M</forename><surname>Buonocore</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Crema</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Redolfi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Bellazzi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Parimbelli</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Biomedical Informatics</title>
		<imprint>
			<biblScope unit="volume">144</biblScope>
			<biblScope unit="page">104431</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b38">
	<analytic>
		<title level="a" type="main">Clin-x: pre-trained language models and a study on cross-task transfer for concept extraction in the clinical domain</title>
		<author>
			<persName><forename type="first">L</forename><surname>Lange</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Adel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Strötgen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Klakow</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Bioinformatics</title>
		<imprint>
			<biblScope unit="volume">38</biblScope>
			<biblScope unit="page" from="3267" to="3274" />
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b39">
	<monogr>
		<title level="m" type="main">Unsupervised cross-lingual representation learning at scale</title>
		<author>
			<persName><forename type="first">A</forename><surname>Conneau</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Khandelwal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Goyal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Chaudhary</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Wenzek</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Guzmán</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Grave</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Ott</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Zettlemoyer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Stoyanov</surname></persName>
		</author>
		<idno>CoRR abs/1911.02116</idno>
		<ptr target="http://arxiv.org/abs/1911.02116.arXiv:1911.02116" />
		<imprint>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b40">
	<analytic>
		<title level="a" type="main">Pretrained Biomedical Language Models for Clinical NLP in Spanish</title>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">P</forename><surname>Carrino</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Llop</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Pàmies</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Gutiérrez-Fandiño</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Armengol-Estapé</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Silveira-Ocampo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Valencia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Gonzalez-Agirre</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Villegas</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2022.bionlp-1.19</idno>
		<ptr target="https://aclanthology.org/2022.bionlp-1.19.doi:10.18653/v1/2022.bionlp-1.19" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 21st Workshop on Biomedical Language Processing, Association for Computational Linguistics</title>
				<meeting>the 21st Workshop on Biomedical Language Processing, Association for Computational Linguistics<address><addrLine>Dublin, Ireland</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2022">2022</date>
			<biblScope unit="page" from="193" to="199" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b41">
	<analytic>
		<title level="a" type="main">LinkBERT: Pretraining Language Models with Document Links</title>
		<author>
			<persName><forename type="first">M</forename><surname>Yasunaga</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Leskovec</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Liang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Association for Computational Linguistics (ACL)</title>
				<imprint>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b42">
	<monogr>
		<author>
			<persName><surname>Openai</surname></persName>
		</author>
		<ptr target="https://www.openai.com" />
		<title level="m">Gpt-3.5 model</title>
				<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b43">
	<monogr>
		<author>
			<persName><forename type="first">H</forename><surname>Touvron</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Lavril</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Izacard</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Martinet</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M.-A</forename><surname>Lachaux</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Lacroix</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Rozière</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Goyal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Hambro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Azhar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Rodriguez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Joulin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Grave</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Lample</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2302.13971</idno>
		<title level="m">Llama: Open and efficient foundation language models</title>
				<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b44">
	<analytic>
		<title level="a" type="main">A comparative analysis of spanish clinical encoder-based models on ner and classification tasks</title>
		<author>
			<persName><forename type="first">G</forename><surname>García Subies</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Á</forename><surname>Barbero</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">Martínez</forename><surname>Jiménez</surname></persName>
		</author>
		<author>
			<persName><surname>Fernández</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of the American Medical Informatics Association</title>
		<imprint>
			<biblScope unit="page">e054</biblScope>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b45">
	<analytic>
		<title level="a" type="main">Clinical text mining in spanish enhanced by negationdetection and named entity recognition</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">J</forename><surname>Tamayo Herrera</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">A</forename><surname>Burgos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Gelbukh</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Computación y Sistemas</title>
		<imprint>
			<biblScope unit="volume">27</biblScope>
			<biblScope unit="page" from="1169" to="1181" />
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b46">
	<monogr>
		<author>
			<persName><forename type="first">H</forename><surname>Verma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Bergler</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Tahaei</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2305.19120</idno>
		<title level="m">Comparing and combining some popular ner approaches on biomedical tasks</title>
				<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b47">
	<monogr>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">V</forename><surname>Serrano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">G</forename><surname>Subies</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><forename type="middle">M</forename><surname>Zamorano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><forename type="middle">A</forename><surname>Garcia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Samy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">B</forename><surname>Sanchez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">M</forename><surname>Sandoval</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">G</forename><surname>Nieto</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">B</forename><surname>Jimenez</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2205.10233</idno>
		<title level="m">Rigoberta: a state-of-the-art language model for spanish</title>
				<imprint>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b48">
	<analytic>
		<title level="a" type="main">Clinlinker: Medical entity linking of clinical concept mentions in spanish</title>
		<author>
			<persName><forename type="first">F</forename><surname>Gallego</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>López-García</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Gasco-Sánchez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krallinger</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">J</forename><surname>Veredas</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">International Conference on Computational Science</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2024">2024</date>
			<biblScope unit="page" from="266" to="280" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b49">
	<analytic>
		<title level="a" type="main">Information Theory-based Compositional Distributional Semantics</title>
		<author>
			<persName><forename type="first">E</forename><surname>Amigó</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Ariza-Casabona</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Fresno</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>Martí</surname></persName>
		</author>
		<idno type="DOI">10.1162/coli_a_00454</idno>
		<ptr target="https://doi.org/10.1162/coli_a_00454.doi:10.1162/coli_a_00454" />
	</analytic>
	<monogr>
		<title level="j">Computational Linguistics</title>
		<imprint>
			<biblScope unit="volume">48</biblScope>
			<biblScope unit="page" from="907" to="948" />
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b50">
	<analytic>
		<title level="a" type="main">Findings of the wmt 2019 biomedical translation shared task: Evaluation for medline abstracts and biomedical terminologies</title>
		<author>
			<persName><forename type="first">R</forename><surname>Bawden</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">B</forename><surname>Cohen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Grozea</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">J</forename><surname>Yepes</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Kittner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krallinger</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Mah</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Neveol</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Neves</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Soares</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">ACL 2019 Fourth Conference on Machine Translation, Association for Computational Linguistics</title>
				<imprint>
			<date type="published" when="2019">2019</date>
			<biblScope unit="page" from="29" to="53" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b51">
	<analytic>
		<title level="a" type="main">Findings of the WMT 2022 biomedical translation shared task: Monolingual clinical case reports</title>
		<author>
			<persName><forename type="first">M</forename><surname>Neves</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">J</forename><surname>Yepes</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Siu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Roller</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Thomas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">V</forename><surname>Navarro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Yeganova</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Wiemann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">M</forename><surname>Di Nunzio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Vezzani</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">WMT22-Seventh Conference on Machine Translation</title>
				<imprint>
			<date type="published" when="2022">2022</date>
			<biblScope unit="page" from="694" to="723" />
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
