<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Context aware Named Entity Recognition and Relation Extraction with Domain-specific language model</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Youngrok</forename><surname>Jang</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">LG AI Research</orgName>
								<address>
									<addrLine>30, Magokjungang 10-ro, Gangseo-gu</addrLine>
									<postCode>07796</postCode>
									<settlement>Seoul</settlement>
									<country key="KR">Korea</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Hosung</forename><surname>Song</surname></persName>
							<email>hosung.song@lgresearch.ai</email>
							<affiliation key="aff0">
								<orgName type="institution">LG AI Research</orgName>
								<address>
									<addrLine>30, Magokjungang 10-ro, Gangseo-gu</addrLine>
									<postCode>07796</postCode>
									<settlement>Seoul</settlement>
									<country key="KR">Korea</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Junho</forename><surname>Lee</surname></persName>
							<affiliation key="aff1">
								<orgName type="department">LG Display</orgName>
								<address>
									<addrLine>30, Magokjungang 10-ro, Gangseo-gu</addrLine>
									<postCode>07796</postCode>
									<settlement>Seoul</settlement>
									<country key="KR">Korea</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Gyeonghun</forename><surname>Kim</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">LG AI Research</orgName>
								<address>
									<addrLine>30, Magokjungang 10-ro, Gangseo-gu</addrLine>
									<postCode>07796</postCode>
									<settlement>Seoul</settlement>
									<country key="KR">Korea</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Yireun</forename><surname>Kim</surname></persName>
							<email>yireun.kim@lgresearch.ai</email>
							<affiliation key="aff0">
								<orgName type="institution">LG AI Research</orgName>
								<address>
									<addrLine>30, Magokjungang 10-ro, Gangseo-gu</addrLine>
									<postCode>07796</postCode>
									<settlement>Seoul</settlement>
									<country key="KR">Korea</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Stanley</forename><forename type="middle">Jungkyu</forename><surname>Choi</surname></persName>
							<email>stanleyjk.choi@lgresearch.ai</email>
							<affiliation key="aff0">
								<orgName type="institution">LG AI Research</orgName>
								<address>
									<addrLine>30, Magokjungang 10-ro, Gangseo-gu</addrLine>
									<postCode>07796</postCode>
									<settlement>Seoul</settlement>
									<country key="KR">Korea</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Honglak</forename><surname>Lee</surname></persName>
							<email>honglak@lgresearch.ai</email>
							<affiliation key="aff0">
								<orgName type="institution">LG AI Research</orgName>
								<address>
									<addrLine>30, Magokjungang 10-ro, Gangseo-gu</addrLine>
									<postCode>07796</postCode>
									<settlement>Seoul</settlement>
									<country key="KR">Korea</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Kyunghoon</forename><surname>Bae</surname></persName>
							<email>k.bae@lgresearch.ai</email>
							<affiliation key="aff0">
								<orgName type="institution">LG AI Research</orgName>
								<address>
									<addrLine>30, Magokjungang 10-ro, Gangseo-gu</addrLine>
									<postCode>07796</postCode>
									<settlement>Seoul</settlement>
									<country key="KR">Korea</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff2">
								<orgName type="department">Evaluation Forum</orgName>
								<address>
									<addrLine>September 5-8</addrLine>
									<postCode>2022</postCode>
									<settlement>Bologna</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Context aware Named Entity Recognition and Relation Extraction with Domain-specific language model</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">F02F9720F515A77B8A0BD1F70B7B524C</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-24T03:27+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Language Model</term>
					<term>Named Entity Recognition</term>
					<term>Relation Extraction</term>
					<term>Event Extraction</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>ChEMU 2022 tasks 1a and 1b aim to NER (Named Entity Recognition) and EE (Event Extraction) benchmarks. EE is RE (relation extraction) between trigger word and entity. We develop context-aware NER and RE models based on the domain-specific language model and achieve the state-of-the-art performance in ChEMU 2022, the public exact match f1 score of tasks 1a is 96.33, and task 1b is 92.82. For the domain-specific language model, we post-train the Bio-linkBert model with various corpora. We then select the best performing model from domain-specific benchmark datasets consisting of BLURB (Biomedical Language Understanding &amp; Reasoning Benchmark) and ChEMU 2020. For the NER model, we choose a sequence tagging model that outperforms the span-based model in CHEMU 2022 task 1a. For the RE model, we train the model to classify the relation types or no relation between every pair of trigger words and entities in the snippet. Furthermore, we train both models using inputs that contain multiple sentences rather than a single sentence so that the model can utilize contextual information. For the ensemble, we train the best-performing model with 10-fold cross-validation and then predict the results with soft-voting. Finally, we apply rule-based post-processing to the prediction results.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>Named Entity Recognition (NER) <ref type="bibr" target="#b0">[1]</ref> and Relation Extraction (RE) <ref type="bibr" target="#b1">[2]</ref> are well-known tasks in the field of information extraction research. Previous research has focused on diverse domain datasets, such as ACE05 1 from Newswire and online forums, and SciERC <ref type="bibr" target="#b2">[3]</ref> from scientific papers. Both NER and RE models are based on either a general domain language model <ref type="bibr" target="#b3">[4]</ref> or a domain-specific language model <ref type="bibr" target="#b4">[5]</ref>, <ref type="bibr" target="#b5">[6]</ref>, <ref type="bibr" target="#b6">[7]</ref>, depending on the dataset. And most of the works employ either a pipeline approach or a joint approach. A pipeline approach is training one model to extract entities and another model to classify relations between them. A joint approach is training the model for both tasks simultaneously.</p><p>ChEMU (Cheminformatics Elsevier Melbourne University) 2022 introduces 5 tasks to extract information from the snippet of chemical patents to support the drug discovery process. Among these tasks, we focus on NER task 1a and EE (event extraction) task 1b. Task 1a aims to extract chemical entities from the snippet. Task 1b aims to extract trigger words and relations between trigger words and entities from the snippet. Task 1b includes both NER and RE tasks. The extraction of trigger words is the NER task and the relation between trigger word and entity is the RE task. We develop context-aware NER and RE models based on the domain-specific language model and achieve state-of-the-art performance in ChEMU 2022, the exact match f1 score of task 1a is 96.33, and task 1b is 92.82. <ref type="foot" target="#foot_0">2</ref> In this paper, we explain our contributions to improving the performance of ChEMU 2022 : <ref type="bibr" target="#b0">(1)</ref> domain-specific language model, (2) best performing NER and RE models, (3) context-aware model with input consisting of multiple sentences, (4) post-processing to the model prediction, <ref type="bibr" target="#b4">(5)</ref> cross-validation and ensemble. Finally, we experiment and analyze our contributions in section 4.</p><p>For the domain-specific language model, we post-train the Bio-linkBert <ref type="bibr" target="#b6">[7]</ref> model with various chemical corpora. We then select the best performing model from domain-specific benchmark datasets consisting of BLURB (Biomedical Language Understanding &amp; Reasoning Benchmark) <ref type="bibr" target="#b7">[8]</ref> and ChEMU 2020 <ref type="bibr" target="#b8">[9]</ref>. Among the pipeline approach and the joint approach, we choose the pipeline approach because PURE <ref type="bibr" target="#b9">[10]</ref> reports it gets higher performance than the joint approach. For the NER model, we experiment with two popular approaches, the sequence tagging approach <ref type="bibr" target="#b3">[4]</ref>, <ref type="bibr" target="#b10">[11]</ref> and the span-based approach <ref type="bibr" target="#b9">[10]</ref>, <ref type="bibr" target="#b11">[12]</ref>. And finally, we choose the sequence tagging approach that shows higher performance in ChEMU 2022 task 1a. The NER model is trained to predict both entities in task 1a and trigger words in task 1b. For the RE model, we train the model to classify the relation types or no relation between every pair of trigger words and entities in the snippet. In task 1b, the RE model predicts the relation between entities and trigger words predicted by the NER model. Furthermore, we train both models using inputs that contain multiple sentences rather than a single sentence so that the model can utilize contextual information. For the ensemble, we train the best-performing model with 10-fold cross-validation and then predict the results with soft-voting. Finally, we apply rule-based post-processing to the prediction results.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Related Work</head><p>Named Entity Recognition (NER) <ref type="bibr" target="#b0">[1]</ref> and Relation Extraction (RE) <ref type="bibr" target="#b1">[2]</ref> are well-known tasks in the field of information extraction research. These tasks have lots of applications in various domains such as news, social media, biomedical and chemical domains. There are two areas of study. The first is to improve the language model, and the second is to improve the NER and RE models based on that model.</p><p>Recently the pre-trained language model such as BERT <ref type="bibr" target="#b3">[4]</ref> and Roberta <ref type="bibr" target="#b12">[13]</ref> have improved all NLP task performance. To improve the language model for NER and RE tasks, LUKE <ref type="bibr" target="#b13">[14]</ref> and KeBioLM <ref type="bibr" target="#b14">[15]</ref> use additional information such as named entity labels to pre-train the model. However, it is not easy to prepare a lot of label data in the chemical domain. Others approach such as BioBert <ref type="bibr" target="#b4">[5]</ref>, PubmedBert <ref type="bibr" target="#b5">[6]</ref> and Bio-linkBert <ref type="bibr" target="#b6">[7]</ref> are pre-trained using domain-specific corpora. Similarly, we train a domain-specific language model with a chemical domain corpus and ultimately improve the performance of the ChEMU 2022 task.</p><p>For NER tasks, there are the sequence tagging approach such as BERT <ref type="bibr" target="#b3">[4]</ref> and the span-based approach such as PURE <ref type="bibr" target="#b9">[10]</ref> and PL-Marker <ref type="bibr" target="#b11">[12]</ref>. For the sequence tagging approach, BERT uses a BIO scheme to encode each token into a tag and trains a model to classify each token into its tag. For the span-based approach, PURE and PL-Marker generate entity span candidates whose length is shorter than the maximum span length and then train a model to classify them as entity type or no-entity. We experiment with both approaches and then select the model with the best performance in the ChEMU 2022 task.</p><p>For NER and RE tasks, there are the pipeline approach <ref type="bibr" target="#b9">[10]</ref>, <ref type="bibr" target="#b11">[12]</ref> and joint approach <ref type="bibr" target="#b15">[16]</ref>. In a pipeline approach, the NER model predicts entities and then the RE model predicts the relation between them. On the other hand, in the joint approach, a single model learns the NER and RE tasks simultaneously. Because PURE <ref type="bibr" target="#b9">[10]</ref> reports the pipeline approach performs better than the joint approach, we adopt the pipeline approach.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Method</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.">Domain-specific language model</head><p>We use transformer encoder-based models and these models are published in huggingface <ref type="foot" target="#foot_1">3</ref> , such as BERT, Roberta, BioBert, PubmedBert, Bio-megatron <ref type="bibr" target="#b16">[17]</ref>, and Bio-linkBert. Most of the publicly available pre-trained language models (PLM) are trained using general domain or biomedical domain knowledge. However, ChEMU 2022 data is composed of text based on chemical patents. Therefore, when fine-tuning publicly available pre-trained models, the gap between the chemical domain and other domains reduces the utilization of pre-trained knowledge. For example, if a word such as chemical compound is split into several tokens because the tokenizer is not trained with chemical domain texts, the language model may not capture its original meaning. And the understanding of the context is lowered due to a homonym problem in the different domains.</p><p>To overcome this problem, the one way is to learn a model from scratch using only the chemical domain texts, as PubmedBert and Bio-linkBert did in the biomedical domain, but it was difficult due to the lack of time. We try to solve the problems mentioned above by applying domain transfer to the set using the post-training method. In this paper, we post-train Bio-linkBert with various chemical corpora and then select the best performing model from domain-specific benchmark datasets consisting of BLURB (Biomedical Language Understanding &amp; Reasoning Benchmark) and ChEMU 2020. Since we believe that the Pubmed dataset used by the pre-trained Bio-linkBERT has some chemical information, we want to put additional data information into the model without losing the already learned information. The same methodology as Mix-Review <ref type="bibr" target="#b17">[18]</ref>, a rehearsal-based continual learning approach, is applied for domain transfer.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.">Named Entity Recognition</head><p>Using the domain-specific language model mentioned above, we experiment with the sequence tagging approach and the span-based approach. And then we compare which is better for the entity and trigger word recognition of ChEMU 2022 tasks 1a and 1b. In the case of the sequence tagging approach, a bio scheme is used, each token is encoded with the BEGIN and IN tag of a specific entity or OUT tag. And then the model is trained to classify each token into its corresponding tag. To classify tags, the output representation of each token is simply fed into a linear layer. We also experiment with CRF or Bi-LSTM+CRF layers in appendix A.1, but there is no performance improvement. In the case of the span-based approach, we consider token sequences shorter than the maximum span length <ref type="foot" target="#foot_3">4</ref> as entity span candidates and then train the model to classify them to corresponding entity type or no entity. However, there are chemical entities much longer than the maximum sequence length in ChEMU 2022 task 1a. Therefore, we use several heuristic approaches to add long entity span candidates. One simple way we used is to add a space-split sequence of tokens. A span representation for classifying an entity is a concatenation of the first token, last token representation and width embedding to capture entity length information. <ref type="foot" target="#foot_4">5</ref> After experiments, we finally decide to go with the sequence tagging approach that shows higher performance.</p><p>According to the error analysis part of the ChEMU2020 <ref type="bibr" target="#b8">[9]</ref>, in some cases, contextual information from other sentences is necessary to extract trigger words. So we train the context-aware model with input including multiple sentences rather than a single sentence. It goes through several processing steps to generate the input data. First, we split the snippet into sentences with spacy 6 library. Second, by sliding the sentences from left to right, we generate inputs that contain as many sentences as possible without exceeding the maximum sequence length of the model. and trigger span, special tokens are inserted before and after the entity and trigger span. Each special token indicates the type of entity or trigger and whether it is inserted before or after the span. After input is fed to the model, output representations of special tokens before entity and before trigger are concatenated. And then, this concatenated representation is fed to the linear layer to classify relations.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.4.">Ensemble</head><p>Ensemble methods combine predictions from multiple models to improve performance. We train NER and RE models using 10-fold cross-validation on the merged training and development datasets. We apply the soft voting ensemble method to output results from 10 models.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.5.">Post-processing</head><p>We apply three post-processing methods to correct the results mispredicted by NER &amp; RE models. The first method is to correct the entity misclassification of STARTING_MATERIAL as REAGENT_CATALYST. According to ChEMU 2020 <ref type="bibr" target="#b8">[9]</ref>, misclassifying STARTING_MATERIAL as REAGENT_CATALYST is one of the most common errors in the NER task. We design the rules according to the definition of the entities or trigger words. By definition, the difference between STARTING_MATERIAL and REAGENT_CATALYST is that STARTING_MATERIAL is consumed during the chemical reaction, while REAGENT_CATALYST is not consumed and only increases the reaction rate. In other words, unlike REACTION_CATALYST, the molecular structure of STARTING_MATERIAL is similar to REACTION_PRODUCT. Therefore, we measure similarity between STARTING_MATERIAL or REAGENT_CATALYST and REACTION_PRODUCT in the snippet and then correct the entity type if it appears to be misclassified. <ref type="foot" target="#foot_6">7</ref>The second method is to correct mispredicted trigger word or entity spans. Sometimes the model predicts different spans for the same word in different sentences. For example, the model predicts "taken up" as a trigger word span, but sometimes only "taken" without "up" in other sentences. Sometimes this can happen because the labels for the same word are different from each other in the dataset. We apply post-processing that modifies all "taken" to "taken up". In the same manner, several spans of the entities are post-processed.</p><p>The third method is to correct the relation misclassification of WORKUP as REAC-TION_STEP. If the RE model predicts that a trigger word is related to REACTION_PRODUCT, YIELD_PERCENT, YIELD_OTHER at one time, the trigger word should be REACTION_STEP rather than WORKUP. The rule should capture the sentence at the end of the snippet that describes the material synthesis in which the product is finally formed. Thus, we force the trigger word WORKUP to be replaced by REACTION_STEP in this case.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Experiments</head><p>We evaluate our domain-specific language model with the BLURB benchmark and ChEMU 2020 dataset. We then evaluate NER and RE models with ChEMU 2022 task 1a and 1b datasets. Since ). Among them, we focus on tasks 1a and 1b. Task 1a is to extract chemical entities and task 1b is to extract both the trigger words and the relations between trigger words and chemical entities. The dataset of task 1b is a superset of task 1a. Table <ref type="table" target="#tab_0">1</ref> shows the overall statistics of the train and development datasets in ChEMU 2022 task 1b.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.2.">BLURB benchmark dataset</head><p>The BLURB benchmark dataset consists of six tasks as follows : named entity recognition, PICO (patient population, interventions, comparator, and outcomes), relation extraction, sentence similarity, document classification, and question answer. Among them, we use only NER and RE datasets, which are the target tasks of ChEMU 2022. Table <ref type="table" target="#tab_1">2</ref> summarizes the dataset we use. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Implementation</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.1.">Domain-specific language model</head><p>We post-train Bio-LinkBert with different corpus combinations and then select the bestperforming model. Based on the architecture and weight of Bio-LinkBert large <ref type="foot" target="#foot_7">8</ref> , we post-train Bio-LinkBert on a task of masked language modeling <ref type="bibr" target="#b12">[13]</ref>. We experiment with three corpora:</p><p>(1) Google patent: 23 GB of chemical domain patents we crawled using chemical keywords, (2) Journal: 22 GB of chemical journal abstracts and body text (3) Pubmed abstract <ref type="foot" target="#foot_8">9</ref> : used by training BioBert <ref type="bibr" target="#b4">[5]</ref>, 38 GB of biomedical domain data. For the Pubmed abstract corpus, we use 12 GB, which is 30% of the total data. This is because Bio-LinkBert has already been trained with Pubmed abstract and the post-training aims to learn new information without losing what has been learned. The corpus used for our best-performing model is the combination of Journal and Pubmed abstract. We train our model for 15,000 steps (approx. 2 epochs) with sequence length 512, batch size 2k, weight reduction 0.01, warm-up 3000 steps, and learning rate 5e-5.</p><p>The training time is about 13 hours using DeepSpeed<ref type="foot" target="#foot_9">10</ref> with 16 Nvidia A100 40GB GPUs.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.2.">NER &amp; RE models</head><p>We train a NER model to predict both entities and trigger words and a RE model to predict relations between them. At inference time, the RE model predicts the relation using the results predicted by the NER model. Although the code for ChEMU 2022 task 1a and task 1b is published, we implement all codes for pre-processing, post-processing, and modeling for NER and RE. As we will discuss in the 4.4 section, we achieve higher performance than the other participants in tasks 1a and 1b even using publicly available domain-specific language models such as For the ensemble, We train a model with 10-fold cross-validation. And these 10 trained models predict entities or relations by soft-voting. Finally, post-processing is applied to the prediction results of the ensemble model.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.">Evaluation Result</head><p>We post-train Bio-linkBert with various corpora to obtain the domain-specific language model that achieves high performance in ChEMU 2022 tasks 1a and 1b. The performance verification of this model is performed using the CHEMU 2020 dataset and the BLURB dataset.</p><p>Table <ref type="table" target="#tab_2">3</ref> shows the entity and trigger extraction performance of post-trained models in ChEMU2020 task 1a. Table <ref type="table" target="#tab_3">4</ref> shows the results of the NER and RE performance of the BLURB dataset. In Tables <ref type="table" target="#tab_3">3 and 4</ref>, the post-training with the journal and Pubmed abstract on Bio-linkBert large achieves the highest overall score, so we choose this model as our final model. We train these models with the input data generated from each paragraph. Training the model with input generated at the document level gives a slight performance improvement. However, in ChEMU 2022, the score eventually drops slightly, so it is not used.</p><p>Table <ref type="table" target="#tab_4">5</ref> and Table <ref type="table" target="#tab_5">6</ref> show the evaluation results of task 1a and 1b in ChEMU2022, respectively. The public and private scores are calculated from 30% and 70% of the test data set, respectively. <ref type="foot" target="#foot_10">11</ref>Both exact match and relaxed match require predicted entity or trigger word type to match the label. For span, exact match requires that predicted span exactly matches gold span. However, the relaxed match only requires that the predicted span overlap the gold span. Both metrics use f1 score which is the harmonic mean of the precision and recall. In Table <ref type="table" target="#tab_4">5</ref>, our single model achieves public and private exact match f1 scores improvement of +2.32 and +1.74 compared to Hokkaido University which achieves the highest score among other participants. Also, our final model with ensemble and post-processing achieves +3.13 and +2.68.</p><p>In Table <ref type="table" target="#tab_5">6</ref>, the evaluation method of task 1b in ChEMU 2022 is very similar to 1a, except that the relation type must match the label as well. To check whether the performance of the RE model is higher than that of other participants, we predicted the relation using the entity and trigger prediction results of the model<ref type="foot" target="#foot_11">12</ref> that achieved the lowest performance among the NER models we submitted. Even in this case, the public and private exact match f1 scores are +2.04 and +1.26 higher than the ChEMU Baseline, which has the highest performance among participants. Therefore, the proposed RE model also affects the performance improvement. The highest score is obtained by training an ensemble RE model using the prediction results of the best performing NER and applying post-processing to the prediction results. In this case, the public and private exact match f1 scores are +4.4 and +2.9 higher than the ChEMU Baseline.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.4.">Analysis</head><p>This section explains how PLM and data pre-processing methods affect the performance of ChEMU 2022 task 1a and 1b. Table <ref type="table" target="#tab_6">7</ref> shows the exact match f1 score of the publicly available PLMs and our language model, which is post-trained on Bio-linkBert large with journal and pubmed abstract data and the best performing model is used by measuring the Blurb and CHEMU20 performance at every 500 steps within 2 epochs. . In task 1a, our language model outperforms all other PLMs. However, although it is not common, the performance of Bio-linkBert large is lower than Bio-linkBert base. Therefore, post-training Bio-linkBert base as we did for Bio-linkBert large may improve the performance. In task 1b, our model outperforms the Bio-linkBert large model, but PubmedBert base gets the highest score. As a result, we use our language model for task 1a and PubmedBert base for task 1b.</p><p>Table <ref type="table" target="#tab_7">8</ref> shows the comparison of the exact match f1 score in ChEMU 2022 according to the pre-processing methods that generates the input data to train the model. A single snippet txt file of ChEMU 2022 data consists of multiple lines. We apply pre-processing methods mentioned in section 3.2 and 3.3 with two different ways. The first is to pre-process the data line by line and the second is to pre-process the entire snippet. If the input is generated only on each line, the model cannot predict the result by referencing the context information given in the other lines. Furthermore, for task 1b, the first deals with relations that occur on a single line, while the second also includes relations that occur in multiple lines. Therefore, the second outperforms the first. Table <ref type="table" target="#tab_8">9</ref> shows the ablation over three post-processing methods: (1) similarity: to correct the entity misclassification of STARTING_MATERIAL as REAGENT_CATALYST, (2) trigger &amp; entity span: to correct mispredicted trigger word or entity spans. (3) strict relation: to correct the relation misclassification of WORKUP as REACTION_STEP in some cases. In the NER model, the similarity method improves public exact and relaxed f1 by +0.03 and +0.04, respectively. The entity span method improves public exact f1 by +0.03. As a result, the score after applying all post-processing methods shows +0.07 and +0.04 improvement in public exact and relaxed f1, respectively. In EE model public score, the most effective method is trigger span method which improves public exact f1 by +0.33. Following method is strict relation, improves public exact f1 by +0.17 and relaxed f1 by +0.16. This rule drops the private score a bit, but it is still effective in the public score. Similarity improves public exact f1 by +0.04 and relaxed f1 by +0.04 and entity span method improves public exact f1 by +0.04. Finally all of post-processing methods applied EE model shows +0.59, +0.21 in public exact, relaxed f1 score higher than EE base model.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Conclusion</head><p>In this paper, we present the domain-specific language model and context-aware NER and RE models for ChEMU 2022. For the best performing domain-specific language model, We post-train Bio-linkBert with various corpora. Based on this language model, we present the NER model using the sequence tagging method and the RE model using the PURE approach. Pre-processing methods where the input contains multiple lines of sentences help the model to be context-aware, which ultimately improves performance. Finally, we train the ensemble model and apply some rules as post-processing.</p><p>We achieve state-of-the-art performance on ChEMU 2022 tasks 1a and 1b and analyze contributions to performance. However, there is still room for improvement. First, because our language model has a maximum sequence length of only 512 tokens, the model can not predict entities and relations referencing the entire snippet. Second, some chemical entities consist of too many tokens, which can degrade the performance of the model. These will be our future works to develop improved language models.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>Overall statistics of train and development datasets in ChEMU 2022 task1b. ChEMU 2022 test dataset may also consist of unseen data, we want to choose a domainspecific language model that generally performs well for the unseen data. This is why we use the BLURB benchmark dataset together rather than just ChEMU 2020. BLURB benchmark dataset is based on the biomedical domain, which has some relevance to the chemical domain. Furthermore, it also includes chemical domain data, such as BC5-chem and ChemProt. The train dataset of ChEMU 2020 is the same as that of ChEMU 2022, but the development and test datasets of ChEMU 2020 are the same as the development dataset of ChEMU 2022. The test data set of ChEMU 2020 is public, while that of ChEMU 2022 is not. In order to get the score of the ChEMU 2022 test data set, the model must be uploaded to the ChEMU website. For convenience, we use the ChEMU 2020 data set to evaluate domain-specific language models, but the ChEMU 2022 data set to evaluate our NER and RE models.</figDesc><table><row><cell>Feature</cell><cell>Value</cell></row><row><cell># Patent snippets</cell><cell>1500</cell></row><row><cell># Entities</cell><cell>26857</cell></row><row><cell># Trigger Words</cell><cell>11236</cell></row><row><cell># Relations</cell><cell>23445</cell></row></table><note>the 4.1. Dataset 4.1.1. ChEMU 2022 dataset ChEMU 2022 includes five tasks: named entity recognition (task 1a), event extraction (task 1b), anaphora resolution (task 1c), chemical reaction reference resolution (task 2a), and table semantic classification (task 2b</note></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2</head><label>2</label><figDesc>The NER and RE datasets in BLURB benchmark.</figDesc><table><row><cell>Dataset</cell><cell>Task</cell><cell>Train</cell><cell>Dev</cell><cell>Test</cell><cell>Evaluation Metrics</cell></row><row><cell>BC5-chem</cell><cell>NER</cell><cell>5203</cell><cell>5547</cell><cell>5385</cell><cell>F1 entity-level</cell></row><row><cell>BC5-disease</cell><cell>NER</cell><cell>4182</cell><cell>4244</cell><cell>4424</cell><cell>F1 entity-level</cell></row><row><cell>NCBI-disease</cell><cell>NER</cell><cell>5134</cell><cell>787</cell><cell>960</cell><cell>F1 entity-level</cell></row><row><cell>BC2GM</cell><cell>NER</cell><cell>15197</cell><cell>3061</cell><cell>6325</cell><cell>F1 entity-level</cell></row><row><cell>JNLPBA</cell><cell>NER</cell><cell>46750</cell><cell>4551</cell><cell>8662</cell><cell>F1 entity-level</cell></row><row><cell>ChemProt</cell><cell>RE</cell><cell>18035</cell><cell>11268</cell><cell>15745</cell><cell>Micro F1</cell></row><row><cell>DDI</cell><cell>RE</cell><cell>22233</cell><cell>5559</cell><cell>5716</cell><cell>Micro F1</cell></row><row><cell>GAD</cell><cell>RE</cell><cell>4261</cell><cell>534</cell><cell>535</cell><cell>Micro F1</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 3</head><label>3</label><figDesc>The Exact match results of the ChEMU 2020 test set. "GP" indicates training with Google Patent corpus we crawl. "PM" refers PubMed abstract corpus and "J" refers to Journal data. "p", "r" and "f1" means precision, recall and f1 score, respectively. We train these models with the input data generated from each paragraph, but (doc) means model trained at the document level.PubmedBert and Bio-linkBert on huggingface. We train the NER model for 20 epochs with a learning rate of 5e-5. At each epoch, we evaluate it on the development dataset and choose the best-performing model. In the same manner, we train the RE model for 10 epochs with a learning rate of 2e-5 and choose the best-performing model. The training time of the NER and RE model is about 20 minutes and 24 hours with 1 Nvidia A100 40GB GPU. Because the RE model is trained to classify all pairs of trigger words and entities in the snippet, it takes longer than the NER model only to classify each token.</figDesc><table><row><cell>Model</cell><cell></cell><cell>Trigger</cell><cell></cell><cell></cell><cell>Entity</cell></row><row><cell></cell><cell>p</cell><cell>r</cell><cell>f1</cell><cell>p</cell><cell>r</cell><cell>f1</cell></row><row><cell cols="7">PubmedBert-base 96.1 94.9 95.5 95.6 94.3 95.0</cell></row><row><cell cols="2">Bio-linkBert-large 96.3</cell><cell>95</cell><cell cols="4">95.6 95.9 94.2 95.1</cell></row><row><cell>+GP</cell><cell cols="6">95.9 97.4 96.6 95.8 96.1 95.9</cell></row><row><cell>+GP+PM</cell><cell cols="4">96.3 97.2 96.8 95.6</cell><cell>96</cell><cell>95.8</cell></row><row><cell>+J</cell><cell>96</cell><cell cols="4">97.3 96.6 95.8 96.1</cell><cell>96</cell></row><row><cell>+J+PM</cell><cell>96.6</cell><cell>96</cell><cell cols="3">96.3 95.9 96.1</cell><cell>96</cell></row><row><cell>+J+PM (doc)</cell><cell cols="6">96.2 97.1 96.7 96.1 96.3 96.2</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Table 4</head><label>4</label><figDesc>The evaluation results of NER and RE tasks in BLURB, the f1 score of the test dataset.</figDesc><table><row><cell>Model</cell><cell>BC5</cell><cell>BC5</cell><cell>NCBI</cell><cell cols="2">BC2GM JNLPBA</cell><cell>Chem</cell><cell>DDI GAD</cell><cell>Average</cell></row><row><cell></cell><cell>-chem</cell><cell>-disease</cell><cell>-disease</cell><cell></cell><cell></cell><cell>Prot</cell><cell></cell><cell>score</cell></row><row><cell cols="2">PubmedBert-base 92.95</cell><cell>85.35</cell><cell>87.57</cell><cell>84.36</cell><cell>79.13</cell><cell cols="3">77.02 82.74 81.89 83.87</cell></row><row><cell cols="2">Bio-linkBert-large 93.33</cell><cell>85.65</cell><cell>87.62</cell><cell>84.61</cell><cell>79.08</cell><cell cols="3">77.68 82.03 84.15 84.26</cell></row><row><cell>+GP</cell><cell>94.1</cell><cell>85.79</cell><cell>88.46</cell><cell>84.9</cell><cell cols="4">79.97 77.85 82.74 85.62 84.92</cell></row><row><cell>+GP+PM</cell><cell>93.66</cell><cell>85.93</cell><cell>88.06</cell><cell>84.67</cell><cell>79.38</cell><cell cols="3">79.91 82.82 85.42 84.98</cell></row><row><cell>+J</cell><cell cols="2">94.13 85.64</cell><cell>88.61</cell><cell>84.51</cell><cell>79.53</cell><cell cols="3">79.49 83.39 84.66 84.99</cell></row><row><cell>+J+PM</cell><cell cols="2">94.11 86.65</cell><cell>88.11</cell><cell>85.03</cell><cell>79.79</cell><cell cols="3">79.92 83.17 85.04 85.24</cell></row><row><cell>+J+PM (doc)</cell><cell>93.92</cell><cell>85.58</cell><cell>89.32</cell><cell>85.06</cell><cell cols="4">79.53 79.97 84.79 84.33 85.31</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_4"><head>Table 5</head><label>5</label><figDesc>ChEMU 2022 task 1a: named entity recognition evaluation results of the test dataset.</figDesc><table><row><cell>Model</cell><cell cols="2">Exact F1 public private</cell><cell cols="2">Relaxed F1 public private</cell></row><row><cell>Hokkaido University</cell><cell>93.20</cell><cell>94.12</cell><cell>94.58</cell><cell>95.35</cell></row><row><cell>ChEMU Baseline</cell><cell>93.20</cell><cell>93.67</cell><cell>95.28</cell><cell>95.72</cell></row><row><cell>Virginia Commonwealth University</cell><cell>77.80</cell><cell>76.86</cell><cell>87.19</cell><cell>87.45</cell></row><row><cell>Ours (single)</cell><cell>95.52</cell><cell>95.86</cell><cell>97.10</cell><cell>97.33</cell></row><row><cell>Ours (Ensemble)</cell><cell>96.26</cell><cell>96.73</cell><cell>97.55</cell><cell>97.93</cell></row><row><cell>Ours (Ensemble) + post processing</cell><cell>96.33</cell><cell>96.80</cell><cell>97.59</cell><cell>97.93</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_5"><head>Table 6</head><label>6</label><figDesc>ChEMU 2022 task 1b: Event extraction evaluation results of the test dataset.</figDesc><table><row><cell>Model</cell><cell cols="2">Exact F1 public private</cell><cell cols="2">Relaxed F1 public private</cell></row><row><cell>Hokkaido University</cell><cell>87.00</cell><cell>88.68</cell><cell>89.63</cell><cell>90.28</cell></row><row><cell>ChEMU Baseline</cell><cell>88.42</cell><cell>89.25</cell><cell>90.36</cell><cell>91.04</cell></row><row><cell>Virginia Commonwealth University</cell><cell>74.08</cell><cell>74.73</cell><cell>78.93</cell><cell>79.46</cell></row><row><cell>Ours (single, worst ner)</cell><cell>90.46</cell><cell>90.51</cell><cell>92.34</cell><cell>92.07</cell></row><row><cell>Ours (single)</cell><cell>92.00</cell><cell>91.84</cell><cell>93.75</cell><cell>93.48</cell></row><row><cell>Ours (Ensemble)</cell><cell>92.23</cell><cell>91.99</cell><cell>94.03</cell><cell>93.63</cell></row><row><cell>Ours (Ensemble) + post processing</cell><cell>92.82</cell><cell>92.15</cell><cell>94.24</cell><cell>93.61</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_6"><head>Table 7</head><label>7</label><figDesc>Ablation over the pre-tained language models. PLMs publicly opened in huggingface and our best performing PLM are evaluated.</figDesc><table><row><cell>Model</cell><cell cols="2">task1a : NER public private</cell><cell cols="2">task1b: EE public private</cell></row><row><cell>PubmedBert base</cell><cell>94.73</cell><cell>95.55</cell><cell>92</cell><cell>91.84</cell></row><row><cell>Bio-linkBert base</cell><cell>95.2</cell><cell>94.91</cell><cell>91.53</cell><cell>91.66</cell></row><row><cell>Bio-linkBert large</cell><cell>95.05</cell><cell>95.34</cell><cell>91.02</cell><cell>91.28</cell></row><row><cell>Ours</cell><cell>95.52</cell><cell>95.86</cell><cell>91.86</cell><cell>91.78</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_7"><head>Table 8</head><label>8</label><figDesc>Ablation over pre-processing methods, how to generate input data. For each task 1a and 1b, the best performance PLM is used respectively.</figDesc><table><row><cell>Pre-processing</cell><cell cols="2">task1a : NER public private</cell><cell cols="2">task1b : EE public private</cell></row><row><cell>line</cell><cell>93.89</cell><cell>94.6</cell><cell>91.89</cell><cell>91.63</cell></row><row><cell>snippet</cell><cell>95.52</cell><cell>95.86</cell><cell>92</cell><cell>91.84</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_8"><head>Table 9</head><label>9</label><figDesc>Ablation over post-processing methods. Similarity refers to post-processing using molecular similarity between REACTION_PRODUCT and STARTING_MATERIAL or REACTION_PRODUCT and REAGENT_CATALYST. Entity span and trigger span indicates post-processing of entities and trigger span mismatch ,respectively. Strict relation refers post-processing to forbid WORKUP relate to REAC-TION_PRODUCT, YIELD_PERCENT and YIELD_OTHER at once. Post-processed means applying all post-processing methods.</figDesc><table><row><cell>Model</cell><cell></cell><cell>public</cell><cell></cell><cell>private</cell></row><row><cell></cell><cell cols="4">Exact F1 Relaxed F1 Exact F1 Relaxed F1</cell></row><row><cell>NER base</cell><cell>96.26</cell><cell>97.55</cell><cell>96.73</cell><cell>97.93</cell></row><row><cell>NER similarity</cell><cell>96.29</cell><cell>97.59</cell><cell>96.73</cell><cell>97.93</cell></row><row><cell>NER entity span</cell><cell>96.29</cell><cell>97.55</cell><cell>96.80</cell><cell>97.93</cell></row><row><cell>NER post-processed</cell><cell>96.33</cell><cell>97.59</cell><cell>96.80</cell><cell>97.93</cell></row><row><cell>EE base</cell><cell>92.23</cell><cell>94.03</cell><cell>91.99</cell><cell>93.63</cell></row><row><cell>EE similarity</cell><cell>92.27</cell><cell>94.07</cell><cell>91.99</cell><cell>93.63</cell></row><row><cell>EE trigger span</cell><cell>92.56</cell><cell>94.03</cell><cell>92.15</cell><cell>93.63</cell></row><row><cell>EE entity span</cell><cell>92.27</cell><cell>94.03</cell><cell>92.01</cell><cell>93.63</cell></row><row><cell>EE strict relation</cell><cell>92.40</cell><cell>94.19</cell><cell>91.98</cell><cell>93.61</cell></row><row><cell>EE post-processed</cell><cell>92.82</cell><cell>94.24</cell><cell>92.15</cell><cell>93.61</cell></row></table></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_0">ChEMU 2022 website shows public and private exact match f1 score and the score mentioned above is the public score. The meaning of public and private scores is explained in section 4</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_1">https://huggingface.co/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_2">.3. Relation ExtractionIn this paper, we use the PURE<ref type="bibr" target="#b9">[10]</ref> approach to extract the relation between entity and trigger word extracted from the NER model. For every entity and trigger word pair in the snippet, we generate input data to train the model to classify as a specific relation type or no-relation. Note that this input includes relations that occur in a single sentence as well as relations that occur in cross sentences. The pre-processing step to generate this input is as follows. First, we split the snippet into sentences as we did in the pre-processing step. Second, we add the sentences in which the entity or the trigger word occurs to the input. Third, if there are intermediate sentences between added sentences, we add them as well. If the generated input is longer than the maximum sequence length, we skip it. Since this input consists of a trigger word and an entity that is far from it, there doesn't seem to be any relation between them. So skipping this input doesn't affect the performance. However, if the generated input is shorter than the maximum sequence length, we add as many left and right sentences as possible to the input to train a context-aware RE model. Finally, in order to include information about the entity</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_3">The maximum span length used in the PURE paper is 8. We use the same value</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="5" xml:id="foot_4">Since entity span candidates can be too long, so 9 width embeddings are used, embeddings for 1 to 8 tokens and embeddings for tokens longer than 8.</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="6" xml:id="foot_5">https://spacy.io/usage/spacy-101</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="7" xml:id="foot_6">the details are described in appendix A.2</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="8" xml:id="foot_7">https://huggingface.co/michiyasunaga/BioLinkBERT-large</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="9" xml:id="foot_8">https://github.com/EleutherAI/the-pile</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="10" xml:id="foot_9">https://github.com/microsoft/DeepSpeed</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="11" xml:id="foot_10">The Private score was published before the submission deadline, and the public score was published after that time.</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="12" xml:id="foot_11">EM F1 public score = 93.65, private score = 94.3</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="13" xml:id="foot_12">https://github.com/mcs07/PubChemPy</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="14" xml:id="foot_13">The RDKit: Open-Source Cheminformatics Software, version 2022.03.2. http://www.rdkit.org</note>
		</body>
		<back>
			<div type="annex">
<div xmlns="http://www.tei-c.org/ns/1.0"><p>A. Appendix</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A.1. Additional Ablation Studies for the NER Model</head><p>Table <ref type="table">9</ref> shows the ablation studies over the NER approach and classification layer in ChEMU 2020. As mentioned in section 3, sequence tagging approach outperforms span-based approach. However, the performance difference between the dense layer and CRF layer is not significant.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A.2. Post-processing to correct the entity misclassification of STARTING_MATERIAL as REAGENT_CATALYST</head><p>As we mentioned in 3.5, we measure the similarity between STARTING_MATERIAL or REAGENT_CATALYST and REACTION_PRODUCT in the snippet and then correct the entity type if it appears to be misclassified. We use Pubchem <ref type="bibr" target="#b18">[19]</ref> Python package pubchempy 13 to parse chemical entity from text, and then the similarity is measured using Python package RDKit 14 with Tanimoto coefficient <ref type="bibr" target="#b19">[20]</ref> and Tversky index <ref type="bibr" target="#b20">[21]</ref>.</p></div>			</div>
			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Introduction to the CoNLL-2003 shared task: Language-independent named entity recognition</title>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">F</forename><surname>Tjong Kim Sang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">De</forename><surname>Meulder</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/W03-0419" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Seventh Conference on Natural Language Learning at HLT-NAACL 2003</title>
				<meeting>the Seventh Conference on Natural Language Learning at HLT-NAACL 2003</meeting>
		<imprint>
			<date type="published" when="2003">2003</date>
			<biblScope unit="page" from="142" to="147" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Kernel methods for relation extraction</title>
		<author>
			<persName><forename type="first">D</forename><surname>Zelenko</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Aone</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Richardella</surname></persName>
		</author>
		<idno type="DOI">10.3115/1118693.1118703</idno>
		<ptr target="https://aclanthology.org/W02-1010.doi:10.3115/1118693.1118703" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2002 Conference on Empirical Methods in Natural Language Processing (EMNLP 2002), Association for Computational Linguistics</title>
				<meeting>the 2002 Conference on Empirical Methods in Natural Language Processing (EMNLP 2002), Association for Computational Linguistics</meeting>
		<imprint>
			<date type="published" when="2002">2002</date>
			<biblScope unit="page" from="71" to="78" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Multi-task identification of entities, relations, and coreference for scientific knowledge graph construction</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Luan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>He</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Ostendorf</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Hajishirzi</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/D18-1360</idno>
		<ptr target="https://aclanthology.org/D18-1360.doi:10.18653/v1/D18-1360" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, Association for Computational Linguistics</title>
				<meeting>the 2018 Conference on Empirical Methods in Natural Language Processing, Association for Computational Linguistics<address><addrLine>Brussels, Belgium</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2018">2018</date>
			<biblScope unit="page" from="3219" to="3232" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">BERT: Pre-training of deep bidirectional transformers for language understanding</title>
		<author>
			<persName><forename type="first">J</forename><surname>Devlin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M.-W</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Toutanova</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/N19-1423</idno>
		<ptr target="https://aclanthology.org/N19-1423.doi:10.18653/v1/N19-1423" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
		<title level="s">Long and Short Papers</title>
		<meeting>the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies<address><addrLine>Minneapolis, Minnesota</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2019">2019</date>
			<biblScope unit="volume">1</biblScope>
			<biblScope unit="page" from="4171" to="4186" />
		</imprint>
	</monogr>
	<note>Association for Computational Linguistics</note>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<author>
			<persName><forename type="first">J</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Yoon</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Kim</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Kim</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Kim</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">H</forename><surname>So</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Kang</surname></persName>
		</author>
		<idno type="DOI">10.1093/bioinformatics/btz682</idno>
		<ptr target="https://doi.org/10.1093/bioinformatics/btz682.doi:10.1093/bioinformatics/btz682" />
	</analytic>
	<monogr>
		<title level="m">BioBERT: a pre-trained biomedical language representation model for biomedical text mining</title>
				<imprint>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<monogr>
		<title level="m" type="main">Domain-specific language model pretraining for biomedical natural language processing</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Gu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Tinn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Cheng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Lucas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Usuyama</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Naumann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Gao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Poon</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2007.15779</idno>
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Linkbert: Pretraining language models with document links</title>
		<author>
			<persName><forename type="first">M</forename><surname>Yasunaga</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Leskovec</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Liang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Association for Computational Linguistics (ACL)</title>
				<imprint>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Domain-specific language model pretraining for biomedical natural language processing</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Gu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Tinn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Cheng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Lucas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Usuyama</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Naumann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Gao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Poon</surname></persName>
		</author>
		<idno type="DOI">10.1145/3458754</idno>
		<ptr target="https://doi.org/10.1145%2F3458754.doi:10.1145/3458754" />
	</analytic>
	<monogr>
		<title level="j">ACM Transactions on Computing for Healthcare</title>
		<imprint>
			<biblScope unit="volume">3</biblScope>
			<biblScope unit="page" from="1" to="23" />
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Natural language processing methods are effective for information extraction from chemical patents</title>
		<author>
			<persName><forename type="first">J</forename><surname>He</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">Q</forename><surname>Nguyen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">A</forename><surname>Akhondi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Druckenbrodt</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Thorne</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Hoessel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Afzal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Zhai</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Fang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Yoshikawa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Albahem</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Cavedon</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Cohn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Baldwin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Verspoor</surname></persName>
		</author>
		<idno type="DOI">10.3389/frma.2021.654438</idno>
		<ptr target="https://www.frontiersin.org/article/10.3389/frma.2021.654438.doi:10.3389/frma.2021.654438" />
	</analytic>
	<monogr>
		<title level="j">Frontiers in Research Metrics and Analytics</title>
		<imprint>
			<biblScope unit="volume">6</biblScope>
			<date type="published" when="2020">2020. 2021</date>
		</imprint>
	</monogr>
	<note>Chemu</note>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">A frustratingly easy approach for entity and relation extraction</title>
		<author>
			<persName><forename type="first">Z</forename><surname>Zhong</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Chen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">North American Association for Computational Linguistics (NAACL)</title>
				<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Melaxtech: A report for CLEF 2020 -chemu task of chemical reaction extraction from patent</title>
		<author>
			<persName><forename type="first">J</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Ren</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Zhang</surname></persName>
		</author>
		<ptr target="http://ceur-ws.org/Vol-2696/paper_238.pdf" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of CLEF 2020 -Conference and Labs of the Evaluation Forum</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">L</forename><surname>Cappellato</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">C</forename><surname>Eickhoff</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>Névéol</surname></persName>
		</editor>
		<meeting><address><addrLine>Thessaloniki, Greece</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2020">September 22-25, 2020. 2696. 2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Packed levitated marker for entity and relation extraction</title>
		<author>
			<persName><forename type="first">D</forename><surname>Ye</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Lin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Li</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Sun</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2022.acl-long.337</idno>
		<ptr target="https://aclanthology.org/2022.acl-long.337.doi:10.18653/v1/2022.acl-long.337" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics</title>
		<title level="s">Long Papers</title>
		<meeting>the 60th Annual Meeting of the Association for Computational Linguistics<address><addrLine>Dublin, Ireland</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2022">2022</date>
			<biblScope unit="volume">1</biblScope>
			<biblScope unit="page" from="4904" to="4917" />
		</imprint>
	</monogr>
	<note>Association for Computational Linguistics</note>
</biblStruct>

<biblStruct xml:id="b12">
	<monogr>
		<author>
			<persName><forename type="first">Y</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Ott</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Goyal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Du</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Joshi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Levy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Lewis</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Zettlemoyer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Stoyanov</surname></persName>
		</author>
		<idno type="arXiv">arXiv:1907.11692</idno>
		<title level="m">Roberta: A robustly optimized bert pretraining approach</title>
				<imprint>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">LUKE: Deep contextualized entity representations with entity-aware self-attention</title>
		<author>
			<persName><forename type="first">I</forename><surname>Yamada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Asai</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Shindo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Takeda</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Matsumoto</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2020.emnlp-main.523</idno>
		<ptr target="https://aclanthology.org/2020.emnlp-main.523.doi:10.18653/v1/2020.emnlp-main.523" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), Association for Computational Linguistics</title>
				<meeting>the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), Association for Computational Linguistics</meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">Improving biomedical pretrained language models with knowledge</title>
		<author>
			<persName><forename type="first">Z</forename><surname>Yuan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Tan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Huang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Huang</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2021.bionlp-1.20</idno>
		<ptr target="https://aclanthology.org/2021.bionlp-1.20.doi:10.18653/v1/2021.bionlp-1.20" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 20th Workshop on Biomedical Language Processing, Association for Computational Linguistics</title>
				<meeting>the 20th Workshop on Biomedical Language Processing, Association for Computational Linguistics</meeting>
		<imprint>
			<date type="published" when="2021">2021</date>
			<biblScope unit="page" from="180" to="190" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Joint entity and relation extraction from scientific documents: Role of linguistic information and entity types</title>
		<author>
			<persName><forename type="first">P</forename><surname>Chakraborty</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Dutta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">K</forename><surname>Sanyal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">P</forename><surname>Das</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">EEKE@JCDL</title>
				<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<monogr>
		<title level="m" type="main">Biomegatron: Larger biomedical domain language model</title>
		<author>
			<persName><forename type="first">H.-C</forename><surname>Shin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Bakhturina</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Puri</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Patwary</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Shoeybi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Mani</surname></persName>
		</author>
		<idno type="DOI">10.48550/ARXIV.2010.06060</idno>
		<ptr target="https://arxiv.org/abs/2010.06060.doi:10.48550/ARXIV.2010.06060" />
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Analyzing the forgetting problem in pretrain-finetuning of open-domain dialogue response models</title>
		<author>
			<persName><forename type="first">T</forename><surname>He</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Cho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Ott</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Glass</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Peng</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">EACL</title>
				<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">Pubchem substance and compound databases</title>
		<author>
			<persName><forename type="first">S</forename><surname>Kim</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">A</forename><surname>Thiessen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">E</forename><surname>Bolton</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Fu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Gindulyte</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Han</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>He</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>He</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">A</forename><surname>Shoemaker</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Nucleic acids research</title>
		<imprint>
			<biblScope unit="volume">44</biblScope>
			<biblScope unit="page" from="D1202" to="D1213" />
			<date type="published" when="2016">2016</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<monogr>
		<author>
			<persName><forename type="first">T</forename><surname>Tanimoto</surname></persName>
		</author>
		<title level="m">Ibm internal report, 17th november</title>
				<meeting><address><addrLine>Armonk, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>IBM Company</publisher>
			<date type="published" when="1957">1957</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">Features of similarity</title>
		<author>
			<persName><forename type="first">A</forename><surname>Tversky</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Psychological review</title>
		<imprint>
			<biblScope unit="volume">84</biblScope>
			<biblScope unit="page">327</biblScope>
			<date type="published" when="1977">1977</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
