<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Semantic Enrichment of Pretrained Embedding Output for Unsupervised IR</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Edmund</forename><surname>Dervakos</surname></persName>
							<email>eddiedervakos@islab.ntua.gr</email>
							<affiliation key="aff0">
								<orgName type="department">School of Electrical and Computer Engineering</orgName>
								<orgName type="laboratory">Artificial Intelligence and Learning Systems Laboratory</orgName>
								<orgName type="institution">National Technical University of Athens</orgName>
								<address>
									<settlement>Athens</settlement>
									<country key="GR">Greece</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Giorgos</forename><surname>Filandrianos</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">School of Electrical and Computer Engineering</orgName>
								<orgName type="laboratory">Artificial Intelligence and Learning Systems Laboratory</orgName>
								<orgName type="institution">National Technical University of Athens</orgName>
								<address>
									<settlement>Athens</settlement>
									<country key="GR">Greece</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Konstantinos</forename><surname>Thomas</surname></persName>
							<email>konstantinos.thomas@gmail.com</email>
							<affiliation key="aff0">
								<orgName type="department">School of Electrical and Computer Engineering</orgName>
								<orgName type="laboratory">Artificial Intelligence and Learning Systems Laboratory</orgName>
								<orgName type="institution">National Technical University of Athens</orgName>
								<address>
									<settlement>Athens</settlement>
									<country key="GR">Greece</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Alexios</forename><surname>Mandalios</surname></persName>
							<email>amandalios@islab.ntua.gr</email>
							<affiliation key="aff0">
								<orgName type="department">School of Electrical and Computer Engineering</orgName>
								<orgName type="laboratory">Artificial Intelligence and Learning Systems Laboratory</orgName>
								<orgName type="institution">National Technical University of Athens</orgName>
								<address>
									<settlement>Athens</settlement>
									<country key="GR">Greece</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Chrysoula</forename><surname>Zerva</surname></persName>
							<email>chrysoula.zerva@manchester.ac.uk</email>
							<affiliation key="aff0">
								<orgName type="department">School of Electrical and Computer Engineering</orgName>
								<orgName type="laboratory">Artificial Intelligence and Learning Systems Laboratory</orgName>
								<orgName type="institution">National Technical University of Athens</orgName>
								<address>
									<settlement>Athens</settlement>
									<country key="GR">Greece</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">School of Computer Science</orgName>
								<orgName type="laboratory">National Centre for Text Mining</orgName>
								<orgName type="institution">University of Manchester</orgName>
								<address>
									<settlement>Manchester</settlement>
									<country key="GB">UK</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Giorgos</forename><surname>Stamou</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">School of Electrical and Computer Engineering</orgName>
								<orgName type="laboratory">Artificial Intelligence and Learning Systems Laboratory</orgName>
								<orgName type="institution">National Technical University of Athens</orgName>
								<address>
									<settlement>Athens</settlement>
									<country key="GR">Greece</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff2">
								<orgName type="institution">Stanford University</orgName>
								<address>
									<addrLine>March 22-24</addrLine>
									<postCode>2021</postCode>
									<settlement>Palo Alto</settlement>
									<region>California</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Semantic Enrichment of Pretrained Embedding Output for Unsupervised IR</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">325FB1349E10C9764F901A7A8C4D8C8E</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-24T08:17+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>BERT</term>
					<term>SNOMED-CT</term>
					<term>Semantic enrichment</term>
					<term>scientific IR</term>
					<term>NLP</term>
					<term>CORD-19</term>
					<term>Covid-19 pandemic</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>The rapid growth of scientific literature in the biomedical and clinical domain has significantly complicated the identification of information of interest by researchers as well as other practitioners. More importantly, the rapid emergence of new topics and findings, often hinders the performance of supervised approaches, due to the lack of relevant annotated data. The global COVID-19 pandemic further highlighted the need to query and navigate uncharted ground in the scientific literature in a prompt and efficient way.</p><p>In this paper we investigate the potential of semantically enhancing deep transformer architectures using SNOMED-CT in order to answer user queries in an unsupervised manner. Our proposed system attempts to filter and re-rank documents related to a query that were initially retrieved using BERT models. To achieve that, we enhance queries and documents with SNOMED-CT concepts and then impose filters on concept co-occurrence between them. We evaluate this approach on OHSUMED dataset and show competitive performance and we also present our approach for adapting such an approach to full papers, such as kaggle's CORD-19 full-text dataset challenge.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>The first weeks of the COVID-19 crisis brought together several researchers from a wide range of domains, who combined their efforts in fighting the pandemic. At the same time, a significant issue in biomedical text mining was brought to the surface; while machine learning methods keep improving, boosting the performance of supervised models in the biomedical natural language processing field (biomedical NLP or BioNLP), the domain topics change rapidly and so do the related textual resources (scientific publications, reports, clinical trials). Thus, while gold standard, annotated datasets provide a solid basis for training, improving and evaluating new methods, they cannot account for emerging topics, new entities and terminology.</p><p>Indeed, navigating existing and upcoming literature, on a variety of COVID-related topics was identified as a critical task early on. The CORD-19 dataset and the kaggle CORD-19 challenge, reflected this need and indicated the path to addressing it. The CORD-19 dataset is an ongoing initiative (further described in Section 4.2) to collect resources that could be informative and helpful in coronavirus-related research. The kaggle challenge(s) built upon the early versions of the dataset, and invited research teams to submit systems that would address a set of key-questions spanning across domains and ranging from very information specific ones ('What do we know about vaccines and therapeutics?') to rather generic ones ('What has been published about information sharing and inter-sectoral collaboration?'). At the time no existing resources could account for COVID-specific annotations in text, calling for either unsupervised approaches or models trained on other domains. Deep neural architectures such as BERT-based models have shown great potential in information retrieval (IR) and question-answering (QA), rendering them strong vanilla models. Since COVID-19 related concepts were already incorporated in large knowledge bases such as SNOMED-CT, MeSH and UMLS, we wanted to explore the potential of using such knowledge sources in a post-processing manner in order to enhance such pre-trained models.</p><p>Since this is a preliminary study, we focused on different ways to enhance BERT-based embeddings with knowledge extracted from SNOMED-CT. BERT (Bidirectional Encoder Representations from Transformers) is a family of high performance pre-trained language models which produce state-of-the-art results in a wide variety of NLP tasks <ref type="bibr" target="#b0">[1]</ref>. BERT's key technical innovation is applying the bidirectional training of Transformers <ref type="bibr" target="#b1">[2]</ref> to language modelling. By using multiple attention mechanisms (multi-head attention), the model is able to capture a broader range of relationships between words than would be possible with a single attention mechanism. Moreover, BERT stacks multiple layers of attention, each of which operates on the output of the layer that came before. Through this repeated composition of word embeddings, BERT is able to form very rich representations as it gets to the deepest layers of the model. The choice of knowledge source was also dictated by performance and wide coverage of concepts; SNOMED-CT<ref type="foot" target="#foot_0">1</ref> is the most comprehensive clinical healthcare terminology, consisting of more than 350,000 concepts and covering clinical findings, symptoms, diagnoses, procedures, body structures, organisms and other etiologies, substances, pharmaceuticals, devices and specimens among others.</p><p>For evaluation we use a subset of the OHSUMED dataset (see Section 4.1.1) to assess the improvement out method can achieve for document retrieval compared to different BERT models, used as baselines. Specifically, we are interested in comparing the potential for improvement across BERT-based models trained on different domains (generic, clinical, biomedical and combinations). To this purpose, given a query we retrieve the initial set of relevant documents calculating similarity of the BERT-based representations between query and document. We then re-rank the document set using SNOMED-CT to define a new concept co-occurrence based metric. We show that SNOMED-based filtering and re-ranking can consistently boost performance across different BERT baseline models in the IR-OHSUMED task. We show that the performance improvement is consistent across models but higher for generic domain models. Additionally, we show that we can obtain results that compare against and even outperform other semantic enhancement approaches. We provide a detailed analysis of the results and discuss how this preliminary study can set the basis for the development of further unsupervised methods, incorporating semantic knowledge in pre-trained embeddings using semantic knowledge graphs. Additionally, we demonstrate how this paradigm can be applied to answer the kaggle CORD-19 challenge questions, and present our modification to maintain robustness on large texts via summarisation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Related Work</head><p>While there is a range of work that inspired and relates to the work presented in this paper, the main line line of research concerns the use of external knowledge sources (ontologies, knowledge graphs or knowledge-bases) in order to semantically enhance a natural language processing (NLP) model, in a pre-processing, feature-extraction, joint learning or a post-processing fashion. We position our work in the post-processing approaches based on that classification, but we present below an overview of the core approaches for each category, with a focus on the biomedical/clinical domain.</p><p>Early on, Zhang et al. <ref type="bibr" target="#b2">[3]</ref> proposed a method for semantic relatedness (SR) calculation between terms, showing how we can combine information from Wikipedia and WordNet in an enhanced graph that can then be traversed to obtain a relatedness score. They then showed that the SR extracted form these graphs can improved performance in named entity disambiguation tasks. More recent work, has focused in the potential of using an external knowledge source (ontologies, graphs or knowledge-bases) to identify key concepts in text and then link textual information from different documents <ref type="bibr" target="#b3">[4]</ref>.</p><p>In many downstream tasks, and especially textual classification, significant performance boosts can be obtained by using external knowledge sources to complement the textual representations and provide more informative features <ref type="bibr" target="#b4">[5]</ref>. In this approach the extracted features are used with an SVM to obtain the classification output. While such feature engineering was the standard method to infuse external knowledge to supervised models in traditional ML, these are static features that are integrated in a uniform manner for all instances. Deep neural networks (DNN) can better address this limitation; there have been recent attempts to directly exploit external knowledge sources during the training of DNN models, either using "knowledge focused" attention mechanisms that use the external knowledge to obtain better representations of concepts in text <ref type="bibr" target="#b5">[6,</ref><ref type="bibr" target="#b6">7]</ref>, or by retrofitting information from the knowledge graph to pre-trained language models <ref type="bibr" target="#b7">[8]</ref> in a post-processing fashion.</p><p>Focusing on the IR task, several publications use external ontologies and knowledge sources such as MESH in order to improve IR performance via semantic query expansion <ref type="bibr" target="#b8">[9,</ref><ref type="bibr" target="#b9">10,</ref><ref type="bibr" target="#b10">11,</ref><ref type="bibr" target="#b11">12]</ref>. Agosti et al. <ref type="bibr" target="#b12">[13]</ref> considers the relation between text and queries and aims to reduce the semantic gap between queries and documents, by incorporating polysemy and synonymy information during the training of neural networks.</p><p>Another strand of work related to our paper concerns the unsupervised or semi-supervised IR in the biomedical domain, as well as IR approaches on the main dataset we are experiment- ing with, namely OHSUMED. More specifically, Liu et al. <ref type="bibr" target="#b13">[14]</ref>, use UMLS to identify word relations and use this information to retrofit pre-trained word embeddings, enforcing the representations of related words to be closer together. Rais et al., compare different strategies of enriching document representations, using concepts from external knowledge sources in combinations with WSD approaches <ref type="bibr" target="#b14">[15]</ref>. They specifically employ UMLS for the concept extraction (using a similar MetaMap enrichment to the one we describe in Section 3.4.1) and show that the replacement of terms with their respective concepts can boost IR performance on OHSUMED when used in combination with WSD approaches. We show in this work that when used in combination with pretrained DNN models (which are more robust in terms of contextualisation of terms) the conceptualisation of terms can boost performance even without the use of WSD approaches and outperform the aforementioned approach. Oh et al. <ref type="bibr" target="#b15">[16]</ref> proposes CBEEM which exploits external dataset collections in building a feedback model to improve relevance ranking for biomedical IR. Note that instead of using a hierarchical resource in this case, Oh et al. use large external document collections to cluster documents and thus better contextualise query relevance by incorporating an additional term in the traditional feedback model related to the collection relevance estimation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Research Methods</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.">Overall system architecture</head><p>Our system assumes a large document resource that is checked against a specific user query. We use the process described in Figure <ref type="figure" target="#fig_0">1</ref> to obtain relevant documents that could answer the user's query. Specifically, we apply the following:</p><p>1. Document formatting: Summarisation of full-text documents to obtain a representative set of sentences 2. Semantic text enrichment based on SNOMED-CT: Identifying and aligning concepts mentioned in documents and/or queries with their respective descriptions and neighbour nodes in the knowledge-base.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Vectorised text representations:</head><p>We use vectorized text representations to compare documents and queries. We employ BERT models for this purpose (see Section 3.3).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.">Document formatting</head><p>We distinguish two main categories of papers when searching the scientific literature, for which we apply different pre-processing before obtaining the document vectors.</p><p>1. Abstract only: Refers to articles for which only the title and abstract (and perhaps some metadata) is available to be processed, typically due to licensing. For these papers the document to be vectorised, is the concatenation of the title and abstract. 2. Full-text documents: Typically open publications where the title, the abstract and the main body and full metadata of the paper is available to any reader and/or text mining system. If the abstract is invalid (e.g., &lt;= 3 sentences) using the full text representation can negatively influence the performance of similarity ranking against the query, due to the length discrepancy. To account for such cases (frequent in the early version of CORD-19 dataset), we opt for an extractive summarisation method, described below. The method was applied on the CORD-19 dataset only, since OHSUMED consists exclusively of abstracts.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.1.">Extractive summarisation for scientific publications</head><p>To reduce the gap between the abstract only and full-text documents, and be able to apply the same IR models, we opted to extracting only the most representative sentences for documents without a clearly specified abstract, in other words, we used single-document extractive summarisation for scientific documents. We thus fine-tune BERTSUM <ref type="bibr" target="#b16">[17]</ref>, which has been demonstrated to achieve high performance in scientific summarisation tasks <ref type="bibr" target="#b17">[18,</ref><ref type="bibr" target="#b18">19,</ref><ref type="bibr" target="#b19">20]</ref>.</p><p>The BERTSUM model provided by the authors was initially trained on summaries for news articles, whose document characteristics differ significantly compared to those of scientific articles. One of the main underlying differences, concerns the language and vocabulary used, as well as the sentence structure (longer, more complicated sentences). Additionally, the summary size in the newswire domain is significantly smaller compared to the typical scientific summary. We thus construct a new summarisation dataset based on CORD-19 dataset and fine-tune BERTSUM to this specific task, to obtain a model that can better distinguish the important sentences in a long scientific article.</p><p>The training dataset was constructed exclusively from papers which have valid and clearly distinguished abstract and main body text as they are defined above. Sentence splitting was applied on both the abstract and the body text of the papers using StandfordNlp <ref type="bibr" target="#b20">[21]</ref>. Additional denoising was applied to remove highly frequent abbreviations. After the tokenisation and preprocessing of the texts, each paper consists of 2 parts: 𝑎𝑏𝑠𝑡𝑎𝑐𝑡 = [𝑎𝑠𝑒𝑛𝑡 1 , 𝑎𝑠𝑒𝑛𝑡 2 , ..., 𝑎𝑠𝑒𝑛𝑡 𝑚 ] and 𝑏𝑜𝑑𝑦 = [𝑏𝑠𝑒𝑛𝑡 1 , 𝑏𝑠𝑒𝑛𝑡 2 , ..., 𝑏𝑠𝑒𝑛𝑡 𝑚 ] where 𝑎𝑠𝑒𝑛𝑡 𝑖 , 𝑏𝑠𝑒𝑛𝑡 𝑖 is the 𝑖 𝑡ℎ sentence of the abstract and body text respectively.</p><p>The training dataset was constructed considering that the abstract contains the most important information of the paper (inductive bias). We thus score the 𝑏𝑜𝑑𝑦 sentences against the 𝑎𝑏𝑠𝑡𝑟𝑎𝑐𝑡 sentences using ROUGE-L score to obtain the 𝑛 sentences. For the specific dataset and based on the distribution of the abstract sentences we selected 𝑛 = 3 , since in this value it appeared that there was a golden ratio between redundancy and noise. We assigned label 1 to sentences selected in the oracle summary and 0 otherwise, and fine-tuned the initial BERTSUM model on this dataset.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3.">Vectorised embedding representations for IR</head><p>To retrieve documents relevant to a query, we employ bert-as-a-service <ref type="bibr" target="#b21">[22]</ref> and calculate the BERT representation of the user query and each candidate document. We then rank the documents' relevance for each query by calculating the distance between two vector representations. We use cosine similarity for the distance estimation, so if we assume that 𝑑 𝑖 is the document vector and 𝑞 𝑗 is the query vector then the relevance score is calculated as:</p><formula xml:id="formula_0">𝑟𝑒𝑙𝑒𝑣𝑎𝑛𝑐𝑒(𝑑 𝑖 , 𝑞 𝑗 ) = 𝑑 𝑖 ⋅ 𝑞 𝑗 ||𝑑 𝑖 || ⋅ ||𝑞 𝑗 || (1)</formula></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3.1.">BERT-based embeddings</head><p>We opted for BERT embeddings trained on different domains, with demonstrated high performance in downstream classification tasks. Specifically we chose the following: (1) the original BERT model <ref type="bibr" target="#b0">[1]</ref>, trained on Wikipedia and BookCorpus, hence fine-tuned for the generic domain, (2) BioBERT <ref type="bibr" target="#b22">[23]</ref> trained on Pubmed papers, hence fine-tuned on the biomedical domain and (3) BlueBERT <ref type="bibr" target="#b23">[24]</ref>, trained in a multi-task setting on a combination of biomedical and clinical data, hence still fine-tuned in the biomedical domain, but accounting for a wider scope of text. We expect the latter to have broader coverage and thus better performance on the queries, and generally we expect BioBERT and BlueBERT to outperform BERT due to the expected overlap between the documents they was trained on and the scientific papers in the evaluation datasets. The base model was used across cases (12-layer, 768-hidden, 12-head). <ref type="bibr" target="#b24">[25]</ref> is a collection of medical terms, and their synonyms, descriptions, etc., with an underlying description logic formal model. It contains clinical knowledge that can complement textual information, and help us process new documents. Its core components include concept hierarchy, descriptions, relations and reference sets, of which we focus on:</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.4.">Semantic text enrichment</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.4.1.">External knowledge source: SNOMED-CT</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>SNOMED-CT</head><p>• Concept hierarchy: encoded clinical terms, organised in hierarchies. The hierarchical structure is particularly useful in the case of searching in a corpus of texts with multilevel information (high-level, general concepts vs more technical/specific information). One can move down the hierarchy in order to fetch specific results, or move up the hierarchy in the case of queries that do not match exactly any of the documents' terms.</p><p>• Descriptions: textual descriptions of concepts. These can be either synonyms or periphrastic definitions of the corresponding terms. Given that a term can appear with multiple surface forms, enhancing a concept with associated description (and the terms mentioned in in) we can map additional relevant text spans to a given concept.</p><p>For the purposes of our work, we make use of the concept hierarchy, where we collect the parents of medical concepts in the SNOMED CT hierarchy and the descriptions, where we incorporate alternative, equivalent ways of describing the same medical concept. One of the challenges in terms of transferring these rich SNOMED-CT concepts to raw text, is to be able to identify the relevant terms in text. For this purpose, we employ the MetaMap tool <ref type="bibr" target="#b25">[26]</ref>, which maps biomedical text to the UMLS metathesaurus. Upon identifying the text spans that correspond to UMLS concepts, we use a mapping between UMLS and SNOMED concepts in order to incorporate the SNOMED knowledge.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.4.2.">Filtering</head><p>For this preliminary study we focus on the existence of SNOMED related concepts in the text as a re-ranking and filtering method for the vector based similarity ranking. More specifically, assume a given user query 𝑞 𝑗 and an ordered list of documents 𝐷, ranked by the cosine similarity scoring described in Eq 1. Upon obtaining the list, we identify a set of text spans 𝐶𝐷 𝑖 in each document 𝑑 𝑖 ∈ 𝐷 that correspond to SNOMED concepts, using the process described in Section 3.4.1. Similarly, we identify a set of SNOMED concepts 𝐶𝑄 𝑗 corresponding to the query 𝑞 𝑗 . For each identified SNOMED concept 𝑐 such that 𝑐 ∈ 𝐶𝐷 𝑖 and/or 𝑐 ∈ 𝐶𝑄 𝑗 , we navigate the SNOMED-CT hierarchy to identify the hyper-concepts (parents) 𝑐 ℎ𝑦𝑝 as well as the terms contained in the concept 𝑐 description, 𝑐 𝑑𝑒𝑠𝑐 and then expand the 𝐶𝐷 𝑖 and 𝐶𝑄 𝑗 sets with the respective 𝑐 ℎ𝑦𝑝 and 𝑐 𝑑𝑒𝑠𝑐 concepts. We then calculate the concept intersection between the two sets as specified in Eq 2 and attribute each document with a renewed score, calculated as:</p><formula xml:id="formula_1">𝑐𝑜𝑛𝑐𝑒𝑝𝑡_𝑓 𝑖𝑙𝑡𝑒𝑟(𝑑 𝑖 , 𝑞 𝑗 ) = ‖𝐶𝐷 𝑖 ∩ 𝐶𝑄 𝑗 ‖<label>(2)</label></formula><p>𝑑𝑜𝑐𝑢𝑚𝑒𝑛𝑡_𝑠𝑐𝑜𝑟𝑒(𝑑 𝑖 , 𝑞 𝑗 ) = 𝑟𝑒𝑙𝑒𝑣𝑎𝑛𝑐𝑒(𝑑 𝑖 , 𝑞 𝑗 ) ⋅ 𝑐𝑜𝑛𝑐𝑒𝑝𝑡_𝑓 𝑖𝑙𝑡𝑒𝑟(𝑑 𝑖 , 𝑞 𝑗 )</p><p>We then investigate the use of different thresholds on the value of the 𝑐𝑜𝑛𝑐𝑒𝑝𝑡_𝑓 𝑖𝑙𝑡𝑒𝑟 to filter the initial set of documents 𝐷. For the experiments presented in the following sections, when the 𝑐𝑜𝑛𝑐𝑒𝑝𝑡_𝑓 𝑖𝑙𝑡𝑒𝑟 falls below the threshold the value is set set to 1 in Eq 3. We show that this simplified filtering technique can consistently boost performance on different BERT models, and we identify the optimal threshold on the OHSUMED dataset, based on the NDCG metric (Eq. 4).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Results and Discussion</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Intrinsic evaluation results on OHSUMED dataset</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.1.">OHSUMED dataset</head><p>The OHSUMED test collection is a subset of the MEDLINE database. We consider a commonly used subset <ref type="bibr" target="#b26">[27]</ref>, consisting of the first 20,000 documents from the 50,216 medical abstracts  <ref type="table" target="#tab_0">1</ref> we present the a more detailed breakdown of query-document sets. As we discuss in the results, the low number of related documents for some queries, has a negative impact on performance which we aim to address in the future work.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.2.">Evaluation</head><p>We base our evaluation on the normalised discounted cumulative gain (NDCG) metric, used to assess the model's ranking of relevant papers pertaining to a set of queries Q. It is defined for position 𝑝 ∈ {0, 1, ..., 𝑁 } as described in Eq. 4:</p><formula xml:id="formula_3">nDCG = 1 𝑄 𝑄 ∑ 𝑞=1 𝐼 𝐷𝐶𝐺 𝑝 (𝑞) 𝐷𝐶𝐺 𝑝 (𝑞) , for DCG p (q) = 𝑟𝑒𝑙 1 (𝑞) + 𝑁 ∑ 𝑖=2 𝑟𝑒𝑙 𝑖 (𝑞) 𝑙𝑜𝑔 2 (𝑖)<label>(4)</label></formula><p>where 𝐼 𝐷𝐶𝐺 denotes the ideal and highest possible 𝐷𝐶𝐺 and 𝑟𝑒𝑙 𝑖 (𝑞) refers to the relevance of the 𝑖 𝑡 ℎ result ranked according to query q.</p><p>While NDCG is our main metric, we also discuss the results for Precision@N and Recall@N, to provide better insights on the performance of the proposed methods (see Section 4.1.3). The Precision and Recall functions are presented below, assuming an ordered set of relevant documents 𝑅𝑒𝑙𝐷 = {𝑟𝑒𝑙 1 , ..., 𝑟𝑒𝑙 𝑘 } and an ordered set of retrieved documents 𝑅𝑒𝑡𝐷 = {𝑟𝑒𝑡 1 , ..., 𝑟𝑒𝑡 𝑙 }</p><formula xml:id="formula_4">𝑃𝑟𝑒𝑐𝑖𝑠𝑖𝑜𝑛@𝑁 = 𝑅𝑒𝑙𝐷 ∩ 𝑅𝑒𝑡𝐷 𝑁 1 𝑁 , 𝑅𝑒𝑐𝑎𝑙𝑙@𝑁 = 𝑅𝑒𝑙𝐷 ∩ 𝑅𝑒𝑡𝐷 𝑁 1 |𝑅𝑒𝑙𝐷|<label>(5)</label></formula></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.3.">Results on OHSUMED</head><p>In the experiments, we first identified the optimal threshold value for filtering, which is consistently 𝑡ℎ𝑟𝑒𝑠ℎ𝑜𝑙𝑑 = 5 for all models. We can then see, that for the application of filtering with this threshold, NDCG performance improves consistently across models. As explained in Section 4.1.1, the OHSUMED-91 dataset, contains only a subset of documents of the original dataset, hence some queries contain only a single relevant document in the ground truth set, rendering the IR problem more demanding. However, as we show in Figure <ref type="figure" target="#fig_1">2</ref>, once we constrain the dataset to contain only queries with a higher number of relevant documents (𝐺𝑇 𝐷), performance improves further for all models. Thus we reach NDCG@100 performance approximating to 0.25 which is comparable with other unsupervised IR methods on the same dataset <ref type="bibr" target="#b15">[16]</ref>. BioBERT and BlueBERT models outperform BERT in all versions, with BlueBERT reaching the best performance for most dataset subsets, confirming our initial hypothesis. If we cast a closer glance at the increase over the plain model version compared to the SNOMED-CT filtering, we can see that we get a greater performance increase for the less domain specific models (see Figure <ref type="figure" target="#fig_2">3a</ref>). Moreover, we can see that this improvement boost is consistent for all filtering thresholds (Figure <ref type="figure" target="#fig_2">3b</ref>, and dataset subsets (Figure <ref type="figure" target="#fig_3">4</ref>). This observation underlines the potential of semantic enhancement across domains. More importantly, it demonstrates that a variation of semantic filtering based on a knowledge graph, can successfully be applied to adapt out-of-domain models to a new domain, thus motivating our future research into transfer learning via knowledge graphs.</p><p>Regarding the impact of the filtering threshold, we can see that the initial performance of all BERT models improves consistently with the application of filtering with increasing thresholds, until we reach the optimal threshold value.The improvement holds across different dataset subsets (GTD) even if we apply the more relaxed filtering option (𝑡ℎ𝑟𝑒𝑠ℎ𝑜𝑙𝑑 = 1) which demands that there is at least one concept co-occurence between the document and the query, for the document to be considered valid. Moreover, performance increases consistently until 𝑡ℎ𝑟𝑒𝑠ℎ𝑜𝑙𝑑 = 5 for all models, with a sole exception to the trend for BlueBERT 𝑡ℎ𝑟𝑒𝑠ℎ𝑜𝑙𝑑 = 2. Performance drops for larger thresholds but we have to note that the optimal threshold is related to the length of the queries and documents and needs to be studied separately for different dataset setups. Additionally, the low number of related documents per query significantly impacts the Recall@N values shown in Figure <ref type="figure" target="#fig_5">6</ref> which start from low values for all model and threshold variations. However, we can see that still, comparing the baseline BERT models when there is no concept-based filtering and the case where we use the previously identified optimal threshold for filtering (𝑡ℎ𝑟𝑒𝑠ℎ𝑜𝑙𝑑 = 5), we get a significant improvement for recall, with emphasis on 𝑁 &lt; 200 (see 6a). Similar observations were seen in precision, presented in table <ref type="table" target="#tab_1">2</ref>, where we should note that for the BlueBERT model we obtain better performance than the one reported in <ref type="bibr" target="#b14">[15]</ref> for P@5 and comparable for P@10. We expect that we would see considerably higher values for datasets with a larger number of related documents per query,    </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Exploratory results on COVID-19 queries</head><p>To demonstrate the direct applicability of the described methodology on other datasets without further fine-tuning, we also experiment with an early version of the CORD-19 dataset, as used for the respective kaggle challenge<ref type="foot" target="#foot_1">2</ref> . The dataset is a snapshot of the 10th of April 2020, and contains 51045 documents accompanied with their full text and metadata information.</p><p>Where available, the abstract is provided as a separate metadata element, however approx. 27K articles had invalid abstracts, based on the criterion described in Section 3.2. Documents are selected based on their expected relevance to the COVID-19 pandemic, covering a wide range of biomedical, clinical and socioeconomic aspects, spanning the period from 1985 to 2020. Since our initial motivation was the kaggle CORD-19 challenge and associated queries, we present below representative examples of query outputs showing also the difference between the highlighted queries. We show the process and output for identifying the relevant passages for one of the main kaggle questions in Figure <ref type="figure" target="#fig_6">7</ref>. We added the coronavirus concept to the query when it was not explicitly stated to further adapt to the domain. We show the identified concepts and sample SNOMED-CT mappings (Figure <ref type="figure" target="#fig_6">7</ref>), as well as the start of the produced BERTSUM summaries. We provide an interactive query UI on kaggle<ref type="foot" target="#foot_2">3</ref> .</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Conclusions and Future Work</head><p>Motivated by the rapid evolution of covid-related publications and query topics, we explored options for improving unsupervised IR on emerging queries in the biomedical domain. This is preliminary work, exploring the use of SNOMED-CT to further filter the relevant documents, ranked by BERT model variations. We showed that even with a simple co-occurrence filtering method, we can significantly improve the initial results and achieve comparative performance to other unsupervised work on the same dataset. Specifically, we show that for multi-document queries and using the BlueBERT model as a basis, the filtering method reaches 0.23 for the NDCG@100 metric. Additionally, we show that we can get meaningful gains across different metrics even for models trained on generic data. Indeed, BERT-based results filtered using SNOMED-CT surpass the performance of unfiltered BioBERT results.</p><p>The aforementioned outcomes provide solid basis for further experimentation into better exploitation of knowledge graphs and concept hierarchies as a means of boosting IR on new topics in an unsupervised manner. We intend to further establish our findings by applying the described approach to the full OHSUMED corpus, as well as other biomedical datasets (CLEF, TREC CDS, TREC CORD-19, etc). Moreover, we inted to experiment with neural network architectures other than BERT, such as XLNET and ELECTRA <ref type="bibr" target="#b27">[28]</ref>. Upon completion, our future work will be focused on two main tracks. Firstly, explore in more detail the potential of the SNOMED-CT hierarchy. More specifically, in this work we only incorporated the description and first parent node of each identified concept, without further traversing the concept graph. We hypothesise that the position of a concept in the hierarchy as well as the neighbourhood size and type we consider for each concept would impact the size of intersecting concepts between queries and documents (hence impacting the threshold value), and would potentially allow us to identify further connections between missed documents. Additionally, the incorporation of different types of neighbours for each concept would allow us to come up with more elaborate re-ranking formulas, taking into account multiple variables to produce the final document score. The second line of future work, concerns the use of external knowledge sources such as SNOMED-CT not in order to obtain a re-ranking functionality, but as a means to achieve transfer learning and distant supervision to better adapt deep neural networks and pretrained embeddings to new domains.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: Summary of system architecture</figDesc><graphic coords="4,89.29,84.19,416.70,134.33" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Figure 2 :</head><label>2</label><figDesc>Figure 2: Performance improvement for NDCG@100 over initial BERT models with the addition of SNOMED-based filtering. We can see the improvement (light blue) for the full dataset (left) and different subsets of the dataset (right) split based on the minimum number of ground truth documents for each query (GTD).</figDesc><graphic coords="9,130.96,84.19,333.36,199.42" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>Figure 3 :</head><label>3</label><figDesc>Figure 3: Analysis of (%) performance improvement for different models for the NDGC@100 metric</figDesc><graphic coords="9,289.30,452.58,216.68,123.74" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_3"><head>Figure 4 :</head><label>4</label><figDesc>Figure 4: Percentage increase for NDCG@100 varying by the min number of ground-truth documents per query (GTD)</figDesc><graphic coords="10,130.96,75.13,333.36,232.53" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_4"><head>Figure 5 :</head><label>5</label><figDesc>Figure 5: Performance for NDCG@100 for dataset subsets varying by the minimum number of ground truth documents per query (GTD).</figDesc><graphic coords="11,130.96,177.59,333.36,201.16" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_5"><head>Figure 6 :</head><label>6</label><figDesc>Figure 6: Recall comparison between different models for the optimal threshold (5) and the baseline, no concept-filtering version</figDesc><graphic coords="11,89.29,414.12,204.18,123.35" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_6"><head>Figure 7 :</head><label>7</label><figDesc>Figure 7: Processing sample and output for Covid-19 query related to vaccines</figDesc><graphic coords="12,89.29,84.19,416.70,251.12" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>Query -document set size distribution for the OHSUMED-91 dataset It comprises 13,929 abstracts focusing on cardiovascular disease, classified under a set of 23 Medical Subject Headings (MeSH) categories. We henceforth refer to this subset of the dataset as OHSUMED-91 dataset. The TREC-09 IR task on the OHSUMED dataset contains a total of 108 queries, each query associated with a set of relevant documents. Out of the 108 OHSUMED queries, 56 had at least one document belonging in the OHSUMED-91 dataset. Hence we use these 56 queries for the evaluation. In Table</figDesc><table><row><cell cols="2">max #documents in OHSUMED-91 1</cell><cell>2</cell><cell>3 4 5 6 &gt;6 Total</cell></row><row><cell>queries</cell><cell cols="3">11 8 10 5 6 5 11</cell><cell>56</cell></row><row><cell>percentage (%)</cell><cell cols="3">20 15 17 9 10 8 20</cell><cell>100</cell></row><row><cell>published in the year 1991.</cell><cell></cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2</head><label>2</label><figDesc>Precision comparison for different models between no threshold and the optimal threshold (5) version</figDesc><table><row><cell></cell><cell cols="2">Precision@5</cell><cell cols="2">Precision@10</cell></row><row><cell></cell><cell cols="4">no threshold threshold=5 no threshold threshold=5</cell></row><row><cell>BERT</cell><cell>0.036</cell><cell>0.063</cell><cell>0.022</cell><cell>0.054</cell></row><row><cell>BioBERT</cell><cell>0.054</cell><cell>0.100</cell><cell>0.036</cell><cell>0.090</cell></row><row><cell>BlueBERT</cell><cell>0.127</cell><cell>0.163</cell><cell>0.090</cell><cell>0.113</cell></row></table></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="1" xml:id="foot_0">https://www.snomed.org/snomed-ct/five-step-briefing</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_1">https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_2">https://www.kaggle.com/georgefila/semantic-enrichment-of-pre-trained-embeddings</note>
		</body>
		<back>
			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<monogr>
		<author>
			<persName><forename type="first">J</forename><surname>Devlin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M.-W</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Toutanova</surname></persName>
		</author>
		<idno type="arXiv">arXiv:1810.04805</idno>
		<title level="m">Bert: Pre-training of deep bidirectional transformers for language understanding</title>
				<imprint>
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b1">
	<monogr>
		<title level="m" type="main">Attention is all you need</title>
		<author>
			<persName><forename type="first">A</forename><surname>Vaswani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Shazeer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Parmar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Uszkoreit</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Jones</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">N</forename><surname>Gomez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Kaiser</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Polosukhin</surname></persName>
		</author>
		<idno type="arXiv">arXiv:1706.03762</idno>
		<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Harnessing different knowledge sources to measure semantic relatedness under a uniform model</title>
		<author>
			<persName><forename type="first">Z</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">L</forename><surname>Gentile</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Ciravegna</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2011 Conference on Empirical Methods in Natural Language Processing</title>
				<meeting>the 2011 Conference on Empirical Methods in Natural Language Processing</meeting>
		<imprint>
			<date type="published" when="2011">2011</date>
			<biblScope unit="page" from="991" to="1002" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Semantic enrichment and exploration of open dataset tags</title>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">P C</forename><surname>De Castro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><forename type="middle">F</forename><surname>Rodrigues</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">R</forename><surname>Lopes</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">L M</forename><surname>Campos</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 25th Brazillian Symposium on Multimedia and the Web</title>
				<meeting>the 25th Brazillian Symposium on Multimedia and the Web</meeting>
		<imprint>
			<date type="published" when="2019">2019</date>
			<biblScope unit="page" from="417" to="424" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Classification of short-texts by utilizing an external knowledge source</title>
		<author>
			<persName><forename type="first">M</forename><surname>Calisan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">O</forename><surname>Sakar</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Science and Engineering</title>
		<imprint>
			<biblScope unit="volume">19</biblScope>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Bio-semantic relation extraction with attention-based external knowledge reinforcement</title>
		<author>
			<persName><forename type="first">Z</forename><surname>Li</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Lian</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Ma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Li</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">BMC Bioinformatics</title>
		<imprint>
			<biblScope unit="volume">21</biblScope>
			<biblScope unit="page" from="1" to="18" />
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Deep short text classification with knowledge powered attention</title>
		<author>
			<persName><forename type="first">J</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Hu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Xiao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Jiang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the AAAI Conference on Artificial Intelligence</title>
				<meeting>the AAAI Conference on Artificial Intelligence</meeting>
		<imprint>
			<date type="published" when="2019">2019</date>
			<biblScope unit="volume">33</biblScope>
			<biblScope unit="page" from="6252" to="6259" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Enriching contextualized language model from knowledge graph for biomedical information extraction</title>
		<author>
			<persName><forename type="first">H</forename><surname>Fei</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Ren</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Ji</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Liang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Briefings in Bioinformatics</title>
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Multimodal medical information retrieval with unsupervised rank fusion</title>
		<author>
			<persName><forename type="first">A</forename><surname>Mourão</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Martins</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Magalhaes</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Computerized Medical Imaging and Graphics</title>
		<imprint>
			<biblScope unit="volume">39</biblScope>
			<biblScope unit="page" from="35" to="45" />
			<date type="published" when="2015">2015</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Integration of association rules and ontologies for semantic query expansion</title>
		<author>
			<persName><forename type="first">M</forename><surname>Song</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I.-Y</forename><surname>Song</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Hu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">B</forename><surname>Allen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Data &amp; Knowledge Engineering</title>
		<imprint>
			<biblScope unit="volume">63</biblScope>
			<biblScope unit="page" from="63" to="75" />
			<date type="published" when="2007">2007</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">A novel fuzzy-pso term weighting automatic query expansion approach using combined semantic filtering</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Gupta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Saini</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Knowledge-Based Systems</title>
		<imprint>
			<biblScope unit="volume">136</biblScope>
			<biblScope unit="page" from="97" to="120" />
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">A taxonomy and survey of semantic approaches for query expansion</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>Raza</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Mokhtar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Ahmad</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Pasha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">U</forename><surname>Pasha</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE Access</title>
		<imprint>
			<biblScope unit="volume">7</biblScope>
			<biblScope unit="page" from="17823" to="17833" />
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Learning unsupervised knowledge-enhanced representations to reduce the semantic gap in information retrieval</title>
		<author>
			<persName><forename type="first">M</forename><surname>Agosti</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Marchesin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Silvello</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">ACM Transactions on Information Systems (TOIS)</title>
		<imprint>
			<biblScope unit="volume">38</biblScope>
			<biblScope unit="page" from="1" to="48" />
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Constraining word embeddings by prior knowledgeapplication to medical information retrieval</title>
		<author>
			<persName><forename type="first">X</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J.-Y</forename><surname>Nie</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Sordoni</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Asia information retrieval symposium</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2016">2016</date>
			<biblScope unit="page" from="155" to="167" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">An empirical study of word sense disambiguation for biomedical information retrieval system</title>
		<author>
			<persName><forename type="first">M</forename><surname>Rais</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Lachkar</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">International Conference on Bioinformatics and Biomedical Engineering</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2018">2018</date>
			<biblScope unit="page" from="314" to="326" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Cluster-based query expansion using external collections in medical information retrieval</title>
		<author>
			<persName><forename type="first">H.-S</forename><surname>Oh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Jung</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of biomedical informatics</title>
		<imprint>
			<biblScope unit="volume">58</biblScope>
			<biblScope unit="page" from="70" to="79" />
			<date type="published" when="2015">2015</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<monogr>
		<author>
			<persName><forename type="first">Y</forename><surname>Liu</surname></persName>
		</author>
		<idno type="arXiv">arXiv:1903.10318</idno>
		<title level="m">Fine-tune bert for extractive summarization</title>
				<imprint>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Enhancing pre-trained language representation for multi-task learning of scientific summarization</title>
		<author>
			<persName><forename type="first">R</forename><surname>Jia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Cao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Fang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Li</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Yin</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">International Joint Conference on Neural Networks (IJCNN), IEEE</title>
				<imprint>
			<date type="published" when="2020">2020. 2020</date>
			<biblScope unit="page" from="1" to="8" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">Cited text span identification for scientific summarisation using pre-trained encoders</title>
		<author>
			<persName><forename type="first">C</forename><surname>Zerva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M.-Q</forename><surname>Nghiem</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><forename type="middle">T</forename><surname>Nguyen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Ananiadou</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Scientometrics</title>
		<imprint>
			<biblScope unit="page" from="1" to="29" />
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<monogr>
		<author>
			<persName><forename type="first">A</forename><surname>Nikiforovskaya</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Kapralov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Vlasova</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Shpynov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Shpilman</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2010.04147</idno>
		<title level="m">Automatic generation of reviews of scientific papers</title>
				<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">The Stanford CoreNLP natural language processing toolkit</title>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">D</forename><surname>Manning</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Surdeanu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Bauer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Finkel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">J</forename><surname>Bethard</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Mcclosky</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Association for Computational Linguistics (ACL) System Demonstrations</title>
				<imprint>
			<date type="published" when="2014">2014</date>
			<biblScope unit="page" from="55" to="60" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<monogr>
		<author>
			<persName><forename type="first">H</forename><surname>Xiao</surname></persName>
		</author>
		<ptr target="https://github.com/hanxiao/bert-as-service" />
		<title level="m">bert-as-service</title>
				<imprint>
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">Biobert: a pre-trained biomedical language representation model for biomedical text mining</title>
		<author>
			<persName><forename type="first">J</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Yoon</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Kim</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Kim</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Kim</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">H</forename><surname>So</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Kang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Bioinformatics</title>
		<imprint>
			<biblScope unit="volume">36</biblScope>
			<biblScope unit="page" from="1234" to="1240" />
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<monogr>
		<author>
			<persName><forename type="first">Y</forename><surname>Peng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Q</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Lu</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2005.02799</idno>
		<title level="m">An empirical study of multi-task learning on bert for biomedical text mining</title>
				<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b24">
	<analytic>
		<title level="a" type="main">Snomed-ct: The advanced terminology and coding system for ehealth</title>
		<author>
			<persName><forename type="first">K</forename><surname>Donnelly</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Studies in health technology and informatics</title>
		<imprint>
			<biblScope unit="volume">121</biblScope>
			<biblScope unit="page">279</biblScope>
			<date type="published" when="2006">2006</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b25">
	<analytic>
		<title level="a" type="main">An overview of metamap: historical perspective and recent advances</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">R</forename><surname>Aronson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F.-M</forename><surname>Lang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of the American Medical Informatics Association</title>
		<imprint>
			<biblScope unit="volume">17</biblScope>
			<biblScope unit="page" from="229" to="236" />
			<date type="published" when="2010">2010</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b26">
	<analytic>
		<title level="a" type="main">Incorporating knowledge graph embeddings into topic modeling</title>
		<author>
			<persName><forename type="first">L</forename><surname>Yao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Wei</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Jin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Q</forename><surname>Chen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">31st AAAI conference</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b27">
	<monogr>
		<author>
			<persName><forename type="first">K</forename><surname>Clark</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M.-T</forename><surname>Luong</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Q</forename><forename type="middle">V</forename><surname>Le</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">D</forename><surname>Manning</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2003.10555</idno>
		<title level="m">Electra: Pre-training text encoders as discriminators rather than generators</title>
				<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
