<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">CLEF eHealth 2017 Multilingual Information Extraction task overview: ICD10 coding of death certificates in English and French</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Aurélie</forename><surname>Névéol</surname></persName>
							<affiliation key="aff0">
								<orgName type="laboratory">LIMSI</orgName>
								<orgName type="institution" key="instit1">CNRS</orgName>
								<orgName type="institution" key="instit2">Université Paris-Saclay</orgName>
								<address>
									<settlement>Orsay</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author role="corresp">
							<persName><forename type="first">Robert</forename><forename type="middle">N</forename><surname>Anderson</surname></persName>
							<email>rnanderson@cdc.gov</email>
							<affiliation key="aff1">
								<orgName type="department">National Center for Health Statistics</orgName>
								<address>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">K</forename><forename type="middle">Bretonnel</forename><surname>Cohen</surname></persName>
							<affiliation key="aff0">
								<orgName type="laboratory">LIMSI</orgName>
								<orgName type="institution" key="instit1">CNRS</orgName>
								<orgName type="institution" key="instit2">Université Paris-Saclay</orgName>
								<address>
									<settlement>Orsay</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="institution">University of Colorado</orgName>
								<address>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Cyril</forename><surname>Grouin</surname></persName>
							<affiliation key="aff0">
								<orgName type="laboratory">LIMSI</orgName>
								<orgName type="institution" key="instit1">CNRS</orgName>
								<orgName type="institution" key="instit2">Université Paris-Saclay</orgName>
								<address>
									<settlement>Orsay</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Thomas</forename><surname>Lavergne</surname></persName>
							<affiliation key="aff0">
								<orgName type="laboratory">LIMSI</orgName>
								<orgName type="institution" key="instit1">CNRS</orgName>
								<orgName type="institution" key="instit2">Université Paris-Saclay</orgName>
								<address>
									<settlement>Orsay</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="institution">Université Paris-Sud</orgName>
								<address>
									<settlement>Orsay</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Grégoire</forename><surname>Rey</surname></persName>
							<affiliation key="aff4">
								<orgName type="institution">INSERM-CépiDc</orgName>
								<address>
									<addrLine>Le Kremlin</addrLine>
									<settlement>-Bicêtre</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Aude</forename><surname>Robert</surname></persName>
							<affiliation key="aff4">
								<orgName type="institution">INSERM-CépiDc</orgName>
								<address>
									<addrLine>Le Kremlin</addrLine>
									<settlement>-Bicêtre</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Claire</forename><surname>Rondet</surname></persName>
							<affiliation key="aff4">
								<orgName type="institution">INSERM-CépiDc</orgName>
								<address>
									<addrLine>Le Kremlin</addrLine>
									<settlement>-Bicêtre</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Pierre</forename><surname>Zweigenbaum</surname></persName>
							<affiliation key="aff0">
								<orgName type="laboratory">LIMSI</orgName>
								<orgName type="institution" key="instit1">CNRS</orgName>
								<orgName type="institution" key="instit2">Université Paris-Saclay</orgName>
								<address>
									<settlement>Orsay</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">CLEF eHealth 2017 Multilingual Information Extraction task overview: ICD10 coding of death certificates in English and French</title>
					</analytic>
					<monogr>
						<imprint>
							<date/>
						</imprint>
					</monogr>
					<idno type="MD5">390E51712C5C4BA70100B9833CDADD70</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-24T20:31+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Natural Language Processing; Entity Linking</term>
					<term>Text Classification</term>
					<term>French</term>
					<term>Biomedical Text</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>This paper reports on Task 1 of the 2017 CLEF eHealth evaluation lab which extended the previous information extraction tasks of ShARe/CLEF eHealth evaluation labs. The task continued with coding of death certificates, as introduced in CLEF eHealth 2016. This largescale classification task consisted of extracting causes of death as coded in the International Classification of Diseases, tenth revision (ICD10). The languages offered for the task this year were English and French. Participant systems were evaluated against a blind reference standard of 31,690 death certificates in the French dataset and 6,665 certificates in the English dataset using Precision, Recall and F-measure. In total, eleven teams participated: 10 teams submitted runs for the English dataset and 9 for the French dataset. Five teams submitted their systems to the reproducibility track. For death certificate coding, the highest performance was 0.8674 F-measure for French and 0.8501 for English.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introduction</head><p>This paper describes an investigation of information extraction and normalization (also called "entity linking") from French and English-language health documents conducted as part of the CLEF eHealth 2017 lab <ref type="bibr" target="#b0">[1]</ref>. The task addressed is the automatic coding of death certificates using the International Classification of Diseases, 10th revision (ICD10) <ref type="bibr" target="#b1">[2]</ref>. This is an essential task in epidemiology, as the determination and analysis of causes of death at a global level informs public health policies.</p><p>The methodology applied is the shared task model. In shared tasks, multiple groups agree on a "shared" task definition, a shared data set, and a shared evaluation metric. The idea is to allow evaluation of multiple approaches to a problem while minimizing avoidable differences related to the task definition, the data used, and the figure of merit applied <ref type="bibr" target="#b2">[3,</ref><ref type="bibr" target="#b3">4]</ref>.</p><p>Over the past four years, CLEF eHealth offered challenges addressing several aspects of clinical information extraction (IE) including named entity recognition, normalization <ref type="bibr" target="#b4">[5]</ref><ref type="bibr" target="#b5">[6]</ref><ref type="bibr" target="#b6">[7]</ref> and attribute extraction <ref type="bibr" target="#b7">[8]</ref>. Initially, the focus was on a widely studied type of corpus, namely written English clinical text <ref type="bibr" target="#b4">[5,</ref><ref type="bibr" target="#b7">8]</ref>. Starting in 2015, the lab's IE challenge evolved to address lesser studied corpora, including biomedical texts in a language other than English i.e., French <ref type="bibr" target="#b5">[6]</ref>. This year, we continue to offer a shared task based on a large set of gold standard annotated corpora in French with a coding task that required normalized entity extraction at the sentence level. We also provided an equivalent dataset in English.</p><p>The significance of this work comes from the observation that challenges and shared tasks have had a significant role in advancing Natural Language Processing (NLP) research in the clinical and biomedical domains <ref type="bibr" target="#b8">[9,</ref><ref type="bibr" target="#b9">10]</ref>, especially for the extraction of named entities of clinical interest and entity normalization.</p><p>One of the goals for this shared task is to foster research addressing multiple languages for the same task in order to encourage the development of multilingual and language adaption methods. This year's lab suggests that the task of coding can be addressed reproducibly with comparable performance in French and in English without relying on translation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">Material and Methods</head><p>In the CLEF eHealth 2017 Evaluation Lab Task 1, two datasets were used. The French dataset was supplied by the French CépiDc<ref type="foot" target="#foot_0">1</ref> and the English dataset was supplied by the American CDC <ref type="foot" target="#foot_1">2</ref> . Both datasets refer to the International Classification of Diseases, tenth revision (ICD10),a reference classification of about 14,000 diseases and related concepts managed by the World Health Organization and used worldwide, to register causes of death and reasons for hospital admissions. Further details on the datasets, tasks and evaluation metrics are given below.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1">Datasets</head><p>The CépiDc corpus was provided by the French institute for health and medical research (INSERM) for the task of ICD10 coding in CLEF eHealth 2017 (Task 1). It consists of free text death certificates collected from physicians and hospitals in France over the period of 2006-2014 <ref type="bibr" target="#b10">[11]</ref>.</p><p>The CDC corpus was provided by the American Center for Disease Control (CDC). It consists of free text death certificates collected electronically in the United States during the year 2015. These are all records due to natural causes, i.e., there are no injury-related deaths included.</p><p>Dataset excerpts. Death certificates are standardized documents filled by physicians to report the death of a patient. The content of the medical information reported in a death certificate and subsequent coding for public health statistics follows complex rules described in a document that was supplied to participants <ref type="bibr" target="#b10">[11]</ref>. Tables <ref type="table" target="#tab_1">1 and 2</ref> present excerpts of the CépiDC and CDC corpora that illustrate the heterogeneity of the data that participants had to deal with. While some of the text lines were short and contained a term that could be directly linked to a single ICD10 code (e.g., "choc septique"), other lines could contain non-diacritized text (e.g., "peritonite..." missing the diacritic on the first "e"), abbreviations (e.g., "DM II" instead of "diabetes mellitus, type 2"). Other challenges included run-on narratives or mixed text alternating between upper case non-diacritized text and lower-case diacritized text. Descriptive statistics. Tables <ref type="table" target="#tab_3">3 and 4</ref> present statistics for the specific sets provided to participants. For both languages, the dataset construction was timeoriented in order to reflect the practical use case of coding death certificates, where historical data is available to train systems that can then be applied to current data to assist with new document curation. For French, the training set covered the 2006-2012 period, and the development set contained death certificates from 2013 and the test set from 2014. For English, data was only available for the year 2015, but the training and test sets were nonetheless divided chronologically during that year. While the French dataset offers more documents spread over an eight year period, it also reflects changes in the coding rules and practices over the period. In contrast, the English dataset is smaller but more homogeneous. Dataset format. In compliance with the World Health Organization (WHO) international standards, death certificates comprise two parts: Part I is dedicated to the reporting of diseases related to the main train of events leading directly to death, and Part II is dedicated to the reporting of contributory conditions not directly involved in the main death process. <ref type="foot" target="#foot_9">10</ref> According to WHO recommenda-tions, the completion of both parts is free of any automatic assistance that might influence the certifying physician. The processing of death certificates, including ICD10 coding, is performed independently of physician reporting. In France and in the United States, coding of death certificates is performed within 18 months of reporting using the IRIS system <ref type="bibr" target="#b11">[12]</ref>. In the course of coding practice, the data is stored in different files: a file that records the native text entered in the death certificates (referred as 'raw causes' thereafter) and a file containing the result of ICD code assignment (referred as 'computed causes' thereafter). The 'computed causes' file may contain normalized text that supports the coding decision and can be used in the creation of dictionaries for the purpose of coding assistance. We found that the formatting of the data into raw and computed causes made it difficult to directly relate the codes assigned to original death certificate texts. This makes the datasets more suitable for approaching the coding problem as a text classification task at the document level rather than a named entity recognition and normalization task. We have reported separately on the challenges presented by the separation of data into raw and computed causes, and proposed solutions to merge the French data into a single 'aligned' format, relying on the normalized text supplied with the French raw causes <ref type="bibr" target="#b12">[13]</ref>. Table <ref type="table" target="#tab_0">1</ref> presents a sample of French death certificate in 'raw' and 'aligned' format. It illustrates the challenge of alignment with the line 2 in the raw file "péritonite stercorale sur perforation colique" which has to be mapped to line 4 "peritonite stercorale" (code K65.9) and line 5 "perforation colique" (code K63.1) in the computed file.</p><p>As can be seen in Table <ref type="table" target="#tab_1">2</ref> similar alignment challenges can be encountered in the English dataset. In Sample certificate 2, line 1 in the raw file "STROKE IN SEPTEMBER LEFT HEMIPARESIS" has to be mapped to line 1 (code I64, "Stroke, not specified") and line 2 (code G819, "Hemiplegia, unspecified") in the computed file. However, no normalized text was available for English and we were not able to offer an aligned version of the raw and computed files for the American dataset in this edition of the shared task. Data files. Table <ref type="table" target="#tab_4">5</ref> presents a description of the files that were provided to the participants: training (train) and development (dev, French only) files were distributed early in the challenge (in January 2017) ; test files (test, with no gold standard) were distributed at test time (at the end of April 2017); and the gold standard for test files (test+g in aligned format, test, computed in raw format) were disclosed to the participants after the text phase (in May 2017) just before the submission of their workshop papers, so that participants could reproduce the performance measures announced by the organizers.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2">Tasks ICD10 coding</head><p>The coding task consisted of mapping lines in the death certificates to one or more relevant codes from the International Classification of Diseases, tenth revision (ICD10). For the raw datasets, codes were assessed at the certificate level. For the aligned dataset, codes were assessed at the line level. Replication. The replication task invited lab participants to submit a system used to generate one or more of their submitted runs, along with instructions to install and use the system. Then, two of the organizers independently worked with the submitted material to replicate the results submitted by the teams as their official runs.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.3">Evaluation metrics</head><p>System performance was assessed by the usual metrics of information extraction: precision (Formula 1), recall (Formula 2) and F-measure (Formula 3; specifically, we used β=1.).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Precision = true positives true positives + false positives (1)</head><p>Recall = true positives true positives + false negatives</p><p>(2)</p><formula xml:id="formula_0">F-measure = (1 + β 2 ) × precision × recall β 2 × precision + recall<label>(3)</label></formula><p>Results were computed using two perl scripts, one for the raw datasets (in English and in French) and one for the aligned dataset (in French only). The evaluation tools were supplied to task participants along with the training data.</p><p>Measures were computed for "ALL" causes in the datasets as our main evaluation reference for the task. In this case the evaluation is performed for all ICD codes. Measures were also computed for "EXTERNAL" causes as our secondary reference for the task. In this case, the evaluation is limited to ICD codes addressing a particular type of deaths, called "external causes" or violent deaths. These causes are of particular interest for two reasons: first, they are considered as "avoidable" and public health policies can target them specifically, e.g., suicide prevention. Second, the context associated with these deaths is often quite different from other deaths in terms of comorbidity, population affected and terminology used to describe the event. In practice, external causes are characterized by codes V01 to Y98.</p><p>For the raw datasets, matches (true positives) were counted for each ICD10 full code supplied that matched the reference for the associated document.</p><p>For the aligned dataset, matches (true positives) were counted for each ICD10 full code supplied that matched the reference for the associated document line.</p><p>The evaluation of the submissions to the replication task was essentially qualitative: we used a scoring grid to record the ease of installing and running the systems, the time spent to obtain results with the systems (analysts were committed to spend at most one working day-or 8 hours-to work with each system), and whether we managed to obtain the exact same results submitted as official runs.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">Results and Discussion</head><p>Participating teams included between one and twelve team members and resided in Australia (team UNSW), France (teams LIMSI, LIRMM, LITL, Mondeca and SIBM), Germany (teams TUC and WBI), Italy (Team UNIPD) and Russia (team KFU). Teams often comprised members with a variety of backgrounds and drew from computer science, informatics, statistics, information and library science, clinical practice. It can be noted that one team (LITL) participated in the challenge as a master-level class project. One team (LIMSI) was composed of members of the organization team and submitted unofficial runs due to conflict of interest. One team submitted baseline runs.</p><p>For the English raw dataset, we received 15 official runs from 9 teams, including one baseline run and one invalid run (due to formatting issues). For the French raw dataset, we received 7 official runs from 4 teams. For the French aligned dataset, we received 9 official runs from 6 teams, including one baseline run.</p><p>Five systems were submitted to the replication track, allowing us to attempt replicating a total of 22 system runs.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1">Methods implemented in the participants' systems</head><p>Participants used a variety of methods, many of which relied on lexical sources including the dictionaries supplied as part of the training data as well as other medical terminologies and ontologies. Some of these knowledge-based methods exploited the gold standard training data as an additional knowledge source.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>IMS-UNIPD.</head><p>The UNIPD team submitted official runs for the English dataset and later submitted unofficial runs for the French datasets as well <ref type="bibr" target="#b13">[14]</ref>. This team implemented a minimal expert system based on rules to translate acronyms together with a binary weighting approach (run 1) and a tf-idf approach (run 2) to retrieve the items in the dictionary most similar to the portion of the certificate of death. For both configurations, a basic approach was used to select the class with the highest weight.</p><p>KFU. The KFU team submitted two runs for the English dataset <ref type="bibr" target="#b14">[15]</ref>. They used sequence to sequence deep learning models based on recurrent neural networks. As input sequence, the method takes the raw text and outputs sequence of ICD10 codes. Both the supplied corpus and dictionary were used for training, exclusive of any additional data.</p><p>LITL. The LITL team submitted runs for the French dataset in the raw and aligned formats <ref type="bibr" target="#b15">[16]</ref>. The LITL team system was specifically designed by master's students (LITL programme, university of Toulouse) and their teachers for the challenge. The system is based on the search platform SOLR. Training data was indexed using the SolrXML format. The core is organized into ICD codes associated with the corresponding "raw Texts", "diagnostic Texts", ICD headings and SNOMED labels. The raw Texts from the test dataset were automatically transformed into queries and submitted to SOLR. The two runs submitted are based on the same collection and SOLR configuration. For Run 1, raw texts were automatically split into several queries when different causes were detected by using a custom-made rule-based system. For Run 2, each query corresponds to the entire raw text of each CépiDC line.</p><p>LIMSI. The LIMSI team submitted unofficial runs for all datasets <ref type="bibr" target="#b16">[17]</ref>. The starting point for these submissions is their last published system <ref type="bibr" target="#b17">[18]</ref>, which relied upon dictionary projection and supervised multi-class, mono-label text classification using simple features (bag of normalized tokens, character trigrams, and coding year). They extended this system to multi-label classification and the use of dictionary and token bigram features in the classifier. Character n-grams did not improve the F1-score on the training set and were discarded. Coding year was kept for the French data, but not for the English data, because it only spans year 2015. Because it only relies on the material provided by the task organizers, the same system could be applied to both the French and English datasets. In each case, Run 1 used a supervised machine learning method (multi-label SVM, with unigrams, bigrams and [for French] coding year), and Run 2 used a hybrid method: union of calibrated dictionary and multi-label SVM.</p><p>LIRMM. The LIRMM team submitted runs for all datasets <ref type="bibr" target="#b18">[19]</ref>. They annotated death certificate text through the SIFR Bioportal Annotator (http: //bioportal.lirmm.fr/annotator) using different configurations of the web service. For French, Simple Knowledge Organization System (SKOS) was built using ICD10 content from the CISMeF portal, the set of dictionaries provided in the challenge, as well as the training corpus. For the first run, the ontology was generated with a heuristic, where labels that correspond to multiple codes are assigned to the most frequent code only. For the second run, a fall back strategy relaxes the most frequent code heuristic for lines that were not assigned any codes initially. For English, in the first run, the SKOS was built using the American dictionary supplied with training data. In the second run the dictionary was combined with an owl version of ICD10 and ICD10CM (extracted from the Unified Medical Language System).</p><p>Mondeca. The Mondeca team submitted unofficial runs<ref type="foot" target="#foot_11">11</ref> for all datasets <ref type="bibr" target="#b19">[20]</ref>. They approached multilingual extraction of IC10 codes by combining semantic web technology and NLP concepts in four steps: (i) transform all the datasets into RDF for a graph-based manipulation; (ii) transform the dictionaries for all the years into SKOS for better enrichment across the knowledge-bases; (iii) design a GATE workflow to annotate the RDF datasets based on gazetteers extracted from the dictionaries; and (iv) work on both French (raw data) and English corpus within a unique workflow, in a multilingual approach thus enabling simultaneous processing of multiple languages.</p><p>SIBM. The SIBM team submitted runs for all datasets <ref type="bibr" target="#b20">[21]</ref>. Their approach of term extraction is performed at the phrase level using natural language processing. The system is built using Python and Python/C extensions and produces the following output for each identified concept: (i) the entry text, (ii) the offset of the first and the final word contained in the health concept, (iii) the ICD10 identifier and (iv) the ICD10 term. Three main steps lead to the identification of ICD10 concepts for a given text: During tokenization, the input text is sliced into phrases, then words. Stop words are filtered and spell checking is performed using the Enchant library. Next, during ICD10 candidate selection, a method based on the phonetic encoding algorithm Double Metaphone (DM) is used for approximate term search. This system relies on a database storing pre-computed DM codes for each word available in the ICD10 dictionaries. Finally, during candidate ranking, a combination of the longest common substring and fuzzy match algorithms provides the candidate ranking. The most likely term having the highest score is retained as the matching ICD10 code for the phrase.</p><p>TUC. The TUC team submitted runs for all datasets <ref type="bibr" target="#b21">[22]</ref>. Their approach is focused on the exploration of relevant feature groups for multilingual text classification regarding ICD10 codes. First, a large scale brute-force feature set is constructed using the groups bag of words, bag of bigrams, bag of trigrams, latent Dirichlet allocation, and the ontologies of WordNet and UMLS. In the development phase, three different strategies were evaluated in conjunction with support vector machines for the English and French corpus: each feature group separately, early fusion of all feature groups, and late fusion. For English, early fusion (run 1) and the feature group bag of bigrams (run 2) achieved the best results. For French, average late fusion concerning bag of words and bag of bigrams (run 1), and the feature group bag of bigrams (run 2) performed best.</p><p>UNSW. The UNSW team submitted runs for the American dataset <ref type="bibr" target="#b22">[23]</ref>. They deployed a knowledge-based approach to tackle the task by solely using dictionary lookup. The first step is to index manually coded ICD10 lexicon followed by dictionary matching. Priority rules are applied to retrieve the relevant entity/entities and their corresponding ICD10 code(s) given free text cause of death description. Two priority methods were implemented in the submitted runs: the first one relied on BM25 and the second one on direct term match. The advantages of a knowledge-based method include speed and no need for training data.</p><p>WBI. The WBI team submitted runs for the English raw dataset and for the French aligned dataset <ref type="bibr" target="#b23">[24]</ref>. They combined standard rule-based methods for Named Entity Recognition (NER) with machine-learning approaches for candidate ranking. For NER rule-based dictionary lookup and fuzzy matching using Lucene Sorl was applied. Preference was on generating potential candidates for each match to increase recall. Candidates were then ranked using a machinelearning approach. Based on the hierarchy of the ICD10 terminology (chapters, blocks, sub-chapters) combined with ICD10-Codes and Text available from the provided dictionaries a classifier was developed for ranking candidates.</p><p>Baselines. To provide a better assessment of the task difficulty and system performance, this year we offer baseline results using two methods: 1/ the ICD baseline consisted of exact string matching between the terms in the ICD and the death certificate text. 2/ the frequency baseline consisted in assigning to a certificate line from the test set the top 2 most frequently associated ICD10 codes in the training and development sets, using case and diacritic insensitive line matching.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2">System performance on death certificate coding</head><p>Tables 6 to 8 present system performance on the ICD10 coding task for each dataset. Team KFU obtained the best performance in terms of F-measure both overall and for the external causes of death on the English dataset. Team SIBM obtained the best official performance in terms of F-measure both overall and for the external causes of death on the French datasets. It is interesting to note that the participants who obtained the best scores on the French datasets (SIBM and LIMSI) are returning teams who also participated in the coding task in 2016. Team SIBM's performance improved from an F-measure of .680 in 2016 to an F-measure of .804 this year while team LIMSI's performance improved from an Table <ref type="table">6</ref>. System performance for ICD10 coding on the English raw test corpus in terms of Precision (P), recall (R) and F-measure (F). The top part of the table displays official runs, while the bottom part displays non-official and baseline runs. F-measure of .652 in 2016 to an F-measure of .867 this year, which also exceeds the best performance of 2016 obtained by team Erasmus with F-measure of .848. <ref type="foot" target="#foot_12">12</ref> This suggests that there is room for improvement on this task, and that iterations of the task are useful to help identify the best ideas and methods to address the task.</p><formula xml:id="formula_1">ALL EXTERNAL Team P R F Team P R F</formula><p>To provide a more in-depth analysis of results, this year we also introduced a measure of system performance on the external causes of death, which are of specific interest to public-health specialists, and are also thought to be more difficult to code. This hypothesis was confirmed by the results, as system performance was much lower on the external causes vs. all causes for all systems, both for the English and French datasets. Interestingly, some systems offered very good performance overall, but comparatively quite low performance on external causes, and vice-versa. We also note that the performance of the frequency baseline was much higher on the French aligned dataset, compared to the French raw dataset and English dataset. This suggests that there is value to the alignment Table <ref type="table">7</ref>. System performance for ICD10 coding on the French raw test corpus in terms of Precision (P), recall (R) and F-measure (F). A horizontal dash line places the frequency baseline performance. The top part of the table displays official runs, while the bottom part displays non-official and baseline runs. step of data preparation, and to the size of the dataset (the French dataset was significantly larger than the English dataset).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>ALL</head><p>The results show that both knowledge-based and statistical methods can perform well on the task. For English the best performance is obtained from a statistical neural method (team KFU) and the second best is obtained by a machine learning method relying on knowledge based-sources (team LIMSI). For French, the best performance is obtained from a machine learning method relying on knowledge based-sources (team LIMSI), while the second best is obtained with a combination of knowledge based and Natural Language processing methods (Team SIBM). In addition, many teams relied on a system architecture that was the same for both languages and utilized language specific features or knowledge sources, requiring little language adaptation. The results are very encouraging from a practical perspective and indicate that a coding assistance system could prove very useful for the effective processing of death certificates in multiple languages.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3">Replication track and replicability of the results</head><p>Five teams submitted systems to our replication track. Only one of these teams had also participated in the replication track last year. Four systems covered both French and English, and one system only processed English. In addition, the replication track also used the simple scripts used to produce baseline runs.</p><p>Most of the baseline and system runs could be replicated by at least one analyst. However, the analysts still experienced varying degrees of difficulty to install and run the systems. Differences were mainly due to the technical set-up of the computers used to replicate the experiments. Analysts also report that additional information on system requirements, installation procedure and practical use would be useful for all the systems submitted, although documentation was overall more abundant and detailed compared to last year's experiments. In some cases, system authors were contacted for help. They were responsive and contributed to facilitate the use of their system. The results of the experiments suggest that replication is achievable. However, it continues to be more of a challenge than one would hope.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.4">Limitations</head><p>Formatting issues. In the French dataset, a formatting issue affected the certificates whose narratives contained a semicolon. The data export from IRIS to csv failed to adequately protect the text field with quotes, so that some of the data instances were made difficult to parse. Nonetheless, this problem affected less than 1% of the lines so we believe it had limited impact on the results. The export format will be corrected in future releases of the dataset. However, we would like to note that this type of issue fits within the practical 'real life' element of this challenge. While it certainly may have made system development more difficult, it also advocated for systems with strategies for dealing with potentially less-than-perfect data. While unintended, we believe this situation in fact makes for a robust evaluation because this kind of data would also be present in a practical workflow.</p><p>Did smoking contribute to the death? In the American dataset, the assignment of code F179 "Mental and behavioral disorders due to use of tobacco, unspecified" may be supported by information supplied by the reporting physician either in certificate narrative or in a structured data form. As a result, the gold standard assignment of F179 is sometimes unsupported by text. The prevalence of F179 due to form filling vs. text report is unknown and the two cases are currently indistinguishable in the dataset. The sample document shown in Table <ref type="table" target="#tab_1">2</ref> illustrates the case of F179 assignment supported by data form and not by text. The prevalence of the code is 4.7% in the training set and 3.9% in the test set, which creates a bias for all evaluated systems. We estimate that the bias could create differences of up to 2% in the overall F-measure. However, we note that the external causes evaluation is not impacted because F179 does not belong to the external cause of death category.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Conclusion</head><p>We released a new set of death certificates to evaluate systems on the task of ICD10 coding in multiple languages. This is the third edition of a biomedical NLP challenge that provides large gold-standard annotated corpora in French. Results show that high performance can be achieved by NLP systems on the task of coding for death certificates in French and in English. The level of performance observed shows that there is potential for integrating automated assistance in the death certificate coding workflow in both languages. We hope that continued efforts towards reproducibility will support the shift from research prototypes to operational production systems. The corpus used and the participating team system results are an important contribution to the research community. In addition, the focus on a language other than English (French) remains a rare initiative in the biomedical NLP community.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1 .</head><label>1</label><figDesc>A sample document from the CépiDC French Death Certificates Corpus: the raw causes (Raw) and computed causes (Computed) are aligned into line-level mappings to ICD codes (Aligned). English translations for each text line are provided in footnotes</figDesc><table><row><cell cols="2">line text</cell><cell>normalized text</cell><cell>ICD codes</cell></row><row><cell></cell><cell>1 choc septique 3</cell><cell></cell><cell>-</cell></row><row><cell>Raw</cell><cell>2 peritonite stercorale sur perforation colique 4 3 Syndrome de détresse respiratoire aiguë 5 4 defaillance multivicerale 6</cell><cell></cell><cell>---</cell></row><row><cell></cell><cell>5 HTA 7</cell><cell></cell><cell>-</cell></row><row><cell></cell><cell>1</cell><cell>defaillance multivicerale</cell><cell>R57.9</cell></row><row><cell>Computed</cell><cell>2 3 4 5</cell><cell>syndrome détresse respi-ratoire aiguë choc septique peritonite stercorale perforation colique</cell><cell>J80.0 A41.9 K65.9 K63.1</cell></row><row><cell></cell><cell>6</cell><cell>hta</cell><cell>I10.0</cell></row><row><cell></cell><cell>1 choc septique</cell><cell>choc septique</cell><cell>A41.9</cell></row><row><cell></cell><cell cols="2">2 peritonite stercorale sur perforation colique peritonite stercorale</cell><cell>K65.9</cell></row><row><cell>Aligned</cell><cell cols="2">2 peritonite stercorale sur perforation colique perforation colique 3 Syndrome de détresse respiratoire aiguë syndrome détresse respi-ratoire aiguë</cell><cell>K63.1 J80.0</cell></row><row><cell></cell><cell>4 defaillance multivicerale</cell><cell cols="2">défaillance multiviscérale R57.9</cell></row><row><cell></cell><cell>5 HTA</cell><cell>hta</cell><cell>I10.0</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2 .</head><label>2</label><figDesc>Two sample documents from the American CDC Death Certificates Corpus</figDesc><table><row><cell>type line text</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 3 .</head><label>3</label><figDesc>Descriptive statistics of the CépiDc French Death Certificates Corpus</figDesc><table><row><cell></cell><cell cols="3">Training (2006-2012) Development (2013) Test (2014)</cell></row><row><cell>Certificates</cell><cell>65,844</cell><cell>27,850</cell><cell>31,690</cell></row><row><cell>Aligned lines</cell><cell>195,204</cell><cell>80,899</cell><cell>91,962</cell></row><row><cell>Tokens 8</cell><cell>1,176,994</cell><cell>496,649</cell><cell>599,127</cell></row><row><cell>Total ICD codes</cell><cell>266,808</cell><cell>110,869</cell><cell>131,426</cell></row><row><cell>Unique ICD codes</cell><cell>3,233</cell><cell>2,363</cell><cell>2,527</cell></row><row><cell>Unique unseen ICD codes</cell><cell>-</cell><cell>224</cell><cell>266</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Table 4 .</head><label>4</label><figDesc>Descriptive statistics of the CDC American Death Certificates Corpus</figDesc><table><row><cell></cell><cell cols="2">Training (2015) Test (2015)</cell></row><row><cell>Certificates</cell><cell>13,330</cell><cell>6,665</cell></row><row><cell>Non-aligned lines</cell><cell>32,714</cell><cell>14,834</cell></row><row><cell>Tokens 9</cell><cell>90,442</cell><cell>42,819</cell></row><row><cell>Total ICD codes</cell><cell>39,334</cell><cell>18,928</cell></row><row><cell>Unique ICD codes</cell><cell>1,256</cell><cell>900</cell></row><row><cell>Unique unseen ICD codes</cell><cell>-</cell><cell>157</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_4"><head>Table 5 .</head><label>5</label><figDesc>Data files. Files after the dashed lines are test files; files after the dotted lines contain the gold test data. L = language (fr = French, en = English).</figDesc><table><row><cell></cell><cell>L. Split</cell><cell>Type</cell><cell>Year</cell><cell>File name</cell></row><row><cell>Aligned</cell><cell cols="4">fr train aligned 2006-2012 corpus/train/AlignedCauses 2006-2012full.csv fr dev aligned 2013 corpus/dev/AlignedCauses 2013full.csv fr test aligned 2014 aligned/corpus/AlignedCauses 2014test.csv fr test+g aligned 2014 aligned/corpus/AlignedCauses 2014 full.csv</cell></row><row><cell></cell><cell>fr train</cell><cell>raw</cell><cell cols="2">2006-2012 corpus/train/CausesBrutes FR training.csv</cell></row><row><cell></cell><cell>fr train</cell><cell cols="3">ident 2006-2012 corpus/train/Ident FR training.csv</cell></row><row><cell></cell><cell cols="4">fr train computed 2006-2012 corpus/train/CausesCalculees FR training.csv</cell></row><row><cell>Raw</cell><cell cols="2">fr dev fr dev fr dev computed raw ident</cell><cell>2013 2013 2013</cell><cell>corpus/dev/CausesBrutes FR dev.csv corpus/dev/Ident FR dev full.csv corpus/dev/CausesCalculees FR dev.csv</cell></row><row><cell></cell><cell>fr test</cell><cell>raw</cell><cell>2014</cell><cell>raw/corpus/CausesBrutes FR test2014.csv</cell></row><row><cell></cell><cell>fr test</cell><cell>ident</cell><cell>2014</cell><cell>raw/corpus/Ident FR test2014.csv</cell></row><row><cell></cell><cell cols="2">fr test computed</cell><cell>2014</cell><cell>raw/corpus/CausesCalculees FR test2014 full.csv</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_7"><head>Table 8 .</head><label>8</label><figDesc>System performance for ICD10 coding on the French aligned test corpus in terms of Precision (P), recall (R) and F-measure (F). A horizontal dash line places the frequency baseline performance. The top part of the table displays official runs, while the bottom part displays non-official and baseline runs.</figDesc><table><row><cell></cell><cell>ALL</cell><cell></cell><cell></cell><cell cols="2">EXTERNAL</cell><cell></cell></row><row><cell></cell><cell>Team</cell><cell>P</cell><cell>R</cell><cell>F Team</cell><cell>P</cell><cell>R</cell><cell>F</cell></row><row><cell></cell><cell>SIBM-run1</cell><cell cols="3">.835 .775 .804 SIBM-run1</cell><cell cols="3">.534 .472 .501</cell></row><row><cell>Official runs</cell><cell>WBI-run1 TUC-MI-run2 LITL-run1 LIRMM-run1 LIRMM-run2 LITL-run2</cell><cell cols="3">.780 .751 .765 TUC-MI-run2 .874 .611 .719 LIRMM-run1 .612 .550 .579 LIRMM-run2 .506 .530 .518 LITL-run1 .505 .530 .517 LITL-run2 .646 .402 .495 WBI-run1</cell><cell cols="3">.740 .318 .445 .412 .403 .407 .412 .403 .407 .482 .348 .404 .534 .275 .363 .709 .151 .249</cell></row><row><cell></cell><cell>TUC-MI-run1</cell><cell cols="3">.426 .297 .350 TUC-MI-run1</cell><cell cols="3">.218 .119 .154</cell></row><row><cell></cell><cell>average</cell><cell cols="3">.648 .555 .593 average</cell><cell cols="3">.505 .311 .366</cell></row><row><cell></cell><cell>median</cell><cell cols="3">.629 .540 .548 median</cell><cell cols="3">.508 .333 .406</cell></row><row><cell>Non-official</cell><cell cols="7">LIMSI-run2 LIMSI-run1 TUC-MI-run1-corrected .875 .614 .722 TUC-MI-run1-corrected .748 .323 .452 .854 .881 .867 LIMSI-run2 .630 .674 .651 .865 .865 .865 LIMSI-run1 .640 .636 .638 UNIPD-run1 .604 .517 .557 UNIPD-run2 .320 .402 .356 UNIPD-run2 .488 .418 .451 UNIPD-run1 .376 .265 .311</cell></row><row><cell></cell><cell>Frequency baseline</cell><cell cols="3">.640 .470 .542 Frequency baseline</cell><cell cols="3">.508 .338 .406</cell></row><row><cell></cell><cell>ICD baseline</cell><cell cols="3">.346 .041 .073 ICD baseline</cell><cell cols="3">.000 .000 .000</cell></row></table></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="1" xml:id="foot_0">Centre d'épidémiologie sur les causes médicales de décès, Unité Inserm US10, http: //www.cepidc.inserm.fr/.</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_1">American Center for Disease Control, https://www.cdc.gov/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_2">septic shock</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_3">colon perforation leading to stercoral peritonitis</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="5" xml:id="foot_4">Acute Respiratory Distress Syndrome</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="6" xml:id="foot_5">multiple organ failure</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="7" xml:id="foot_6">HBP: High Blood Pressure</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="8" xml:id="foot_7">These numbers were obtained using the linux wc -w command</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="9" xml:id="foot_8">These numbers were obtained using the linux wc -w command applied to the fourth field</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="10" xml:id="foot_9">As can be seen in the sample documents, the line numbering in the raw causes file may (Table2) or may not (Table1) be the same in the computed causes file. In some cases, the ordering in the computed causes file was changed to follow the causal chain of events leading to death.</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" xml:id="foot_10">Rawen train raw 2015 corpus/CausesBrutes EN training.csv en train ident 2015 corpus/Ident EN training.csv en train computed 2015 corpus/CausesCalculees EN training.csv en test raw 2015 raw/corpus/CausesBrutes EN test.csv en test ident 2015 raw/corpus/Ident EN test.csv en test computed 2015 raw/corpus/CausesCalculees EN test full.csv</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="11" xml:id="foot_11">One official run was submitted but did not comply with the challenge required format and could not be evaluated.</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="12" xml:id="foot_12">We note that these comparisons are indicative since the data sets used in 2016 and 2017 are not identical; specifically, the 2016 test set was distributed in 2017 as a development set and the 2017 test set consisted of new data (unreleased in 2016).</note>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgements</head><p>We want to thank all participating teams for their effort in addressing new and challenging tasks. The organization work for CLEF eHealth 2017 task 1 was supported by the Agence Nationale pour la Recherche (French National Research Agency) under grant number ANR-13-JCJC-SIMI2-CABeRneT. The CLEF eHealth 2016 evaluation lab has been supported in part by the CLEF Initiative and Data61.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">CLEF 2017 eHealth Evaluation Lab Overview</title>
		<author>
			<persName><forename type="first">Lorraine</forename><surname>Goeuriot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Liadh</forename><surname>Kelly</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Hanna</forename><surname>Suominen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Aurélie</forename><surname>Névéol</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Aude</forename><surname>Robert</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Evangelos</forename><surname>Kanoulas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Rene</forename><surname>Spijker</surname></persName>
		</author>
		<author>
			<persName><forename type="first">João</forename><surname>Palotti</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Guido</forename><surname>Zuccon</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF 2017 -8th Conference and Labs of the Evaluation Forum</title>
		<title level="s">Lecture Notes in Computer Science (LNCS</title>
		<meeting><address><addrLine>September</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">World Health Organization</title>
	</analytic>
	<monogr>
		<title level="m">International Statistical Classification of Diseases and Related Health Problems</title>
				<imprint>
			<date type="published" when="2011">2011</date>
			<biblScope unit="volume">2</biblScope>
		</imprint>
	</monogr>
	<note>ICD-10. 10th Revision. Instruction manual</note>
</biblStruct>

<biblStruct xml:id="b2">
	<monogr>
		<title level="m" type="main">Evaluating natural language processing systems: An analysis and review</title>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">S</forename><surname>Jones</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">R</forename><surname>Galliers</surname></persName>
		</author>
		<imprint>
			<date type="published" when="1995">1995</date>
			<publisher>Springer Science &amp; Business Media</publisher>
			<biblScope unit="page">1083</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<monogr>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">M</forename><surname>Voorhees</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Harman</forename><surname>Dk</surname></persName>
		</author>
		<author>
			<persName><surname>Others</surname></persName>
		</author>
		<title level="m">TREC: Experiment and evaluation in information retrieval</title>
				<imprint>
			<publisher>MIT press Cambridge</publisher>
			<date type="published" when="2005">2005</date>
			<biblScope unit="volume">1</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Overview of the ShARe/CLEF eHealth Evaluation Lab</title>
		<author>
			<persName><forename type="first">H</forename><surname>Suominen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Salantera</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Velupillai</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><forename type="middle">W</forename><surname>Chapman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Savova</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Elhadad</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Pradhan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">R</forename><surname>South</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">L</forename><surname>Mowery</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Gjf</forename><surname>Jones</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Leveling</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Kelly</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Goeuriot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Martinez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Zuccon</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Information Access Evaluation. Multilinguality, Multimodality, and Visualization. LNCS</title>
				<editor>
			<persName><forename type="first">P</forename><surname>Forner</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Müller</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">R</forename><surname>Paredes</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Rosso</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">B</forename><surname>Stein</surname></persName>
		</editor>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2013">2013. 2013</date>
			<biblScope unit="volume">8138</biblScope>
			<biblScope unit="page" from="212" to="231" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Overview of the CLEF eHealth Evaluation Lab</title>
		<author>
			<persName><forename type="first">L</forename><surname>Goeuriot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Kelly</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Suominen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Hanlen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Névéol</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Grouin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Palotti</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Zuccon</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Information Access Evaluation. Multilinguality, Multimodality, and Interaction</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2015">2015. 2015</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Overview of the CLEF eHealth Evaluation Lab</title>
		<author>
			<persName><forename type="first">L</forename><surname>Kelly</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Goeuriot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Suominen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Névéol</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Palotti</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Zuccon</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Experimental IR Meets Multilinguality, Multimodality, and Interaction. CLEF 2016</title>
		<title level="s">Lecture Notes in Computer Science</title>
		<editor>
			<persName><forename type="first">N</forename><surname>Fuhr</surname></persName>
		</editor>
		<meeting><address><addrLine>Cham</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2016">2016. 2016. 9822</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Overview of the ShARe/CLEF eHealth Evaluation Lab</title>
		<author>
			<persName><forename type="first">L</forename><surname>Kelly</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Goeuriot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Suominen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Schreck</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Leroy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">L</forename><surname>Mowery</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Velupillai</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><forename type="middle">W</forename><surname>Chapman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Martinez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Zuccon</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Palotti</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Information Access Evaluation. Multilinguality, Multimodality, and Interaction. LNCS</title>
				<editor>
			<persName><forename type="first">E</forename><surname>Kanoulas</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Lupu</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Clough</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Sanderson</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Hall</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>Hanbury</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">E</forename><surname>Toms</surname></persName>
		</editor>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2014">2014. 2014</date>
			<biblScope unit="volume">8685</biblScope>
			<biblScope unit="page" from="172" to="191" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Overcoming barriers to NLP for clinical text: the role of shared tasks and the need for additional creative solutions</title>
		<author>
			<persName><forename type="first">W</forename><forename type="middle">W</forename><surname>Chapman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">M</forename><surname>Nadkarni</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Hirschman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Avolio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">W</forename><surname>Savova</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">K</forename><surname>Uzuner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">J Am Med Inform Assoc</title>
		<imprint>
			<biblScope unit="volume">18</biblScope>
			<biblScope unit="issue">5</biblScope>
			<biblScope unit="page" from="540" to="543" />
			<date type="published" when="2011">2011</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Community challenges in biomedical text mining over 10 years: success, failure and the future</title>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">C</forename><surname>Huang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Lu</surname></persName>
		</author>
		<idno>pii: bbv024</idno>
	</analytic>
	<monogr>
		<title level="j">Brief Bioinform</title>
		<imprint>
			<date type="published" when="2015-05-01">2015. 2015 May 1</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Certification et codification des causes médicales de décès</title>
		<author>
			<persName><forename type="first">G</forename><surname>Pavillon</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Laurent</surname></persName>
		</author>
		<ptr target="http://opac.invs.sante.fr/doc_num.php?explnum_id=2065" />
	</analytic>
	<monogr>
		<title level="j">Bulletin Epidémiologique Hebdomadaire -BEH</title>
		<imprint>
			<biblScope unit="page" from="134" to="138" />
			<date type="published" when="2003">2003. 2016-06-06</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">IRIS: A language-independent coding system based on the NCHS system MMDS</title>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">A</forename><surname>Johansson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Pavillon</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">WHO-FIC Network Meeting</title>
				<meeting><address><addrLine>Tokyo, Japan</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2005">2005</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">A Dataset for ICD-10 Coding of Death Certificates: Creation and Usage</title>
		<author>
			<persName><forename type="first">T</forename><surname>Lavergne</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Névéol</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Robert</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Grouin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Rey</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Zweigenbaum</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Fifth Workshop on Building and Evaluating Ressources for Health and Biomedical Text Processing -BioTxtM2016</title>
				<meeting>the Fifth Workshop on Building and Evaluating Ressources for Health and Biomedical Text Processing -BioTxtM2016</meeting>
		<imprint>
			<date type="published" when="2016">2016</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">A Lexicon Based Approach to Classification of ICD10 Codes</title>
		<author>
			<persName><forename type="first">Di</forename><surname>Nunzio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">M</forename><surname>Beghini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Vezzani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Henrot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">IMS Unipd at CLEF eHealth Task 1</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
	<note>CLEF 2017. Online Working Notes. CEUR-WS</note>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">KFU at CLEF eHealth 2017 Task 1: ICD-10 Coding of English Death Certificates with Recurrent Neural Networks</title>
		<author>
			<persName><forename type="first">Z</forename><surname>Miftakhutdinov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Tutubalina</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF 2017</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
	<note>Online Working Notes</note>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">LITL at CLEF eHealth2017: automatic classification of death reports</title>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">M</forename><surname>Ho-Dac</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Fabre</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Birski</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Boudraa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Bourriot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Cassier</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Delvenne</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Garcia-Gonzalez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">B</forename><surname>Kang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Piccinini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Rohrbacher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Séguier</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF 2017</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
	<note>Online Working Notes. CEUR-WS</note>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">Multiple methods for multi-class, multilabel ICD-10 coding of multi-granularity, multilingual death certificates</title>
		<author>
			<persName><forename type="first">P</forename><surname>Zweigenbaum</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Lavergne</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF 2017</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
	<note>Online Working Notes. CEUR-WS</note>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Hybrid methods for ICD-10 coding of death certificates</title>
		<author>
			<persName><forename type="first">P</forename><surname>Zweigenbaum</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Lavergne</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Seventh International Workshop on Health Text Mining and Information Analysis</title>
				<meeting><address><addrLine>Austin, Texas, USA</addrLine></address></meeting>
		<imprint>
			<publisher>EMNLP</publisher>
			<date type="published" when="2016-11">November 2016. 2016</date>
			<biblScope unit="page" from="96" to="105" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">ICD10 coding of death certificates with the NCBO and SIFR Annotator(s) at CLEF eHealth 2017 Task 1</title>
		<author>
			<persName><forename type="first">A</forename><surname>Tchechmedjiev</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Abdaoui</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Emonet</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Jonquet</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF 2017 Online Working Notes</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">NoNLP: Annotating Medical Domain by using Semantic Techologies</title>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">A</forename><surname>Atemezing</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF 2017</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
	<note>Online Working Notes</note>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">SIBM at CLEF eHealth Evaluation Lab 2017: Multilingual Information Extraction with CIM-IND</title>
		<author>
			<persName><forename type="first">C</forename><surname>Cabot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">F</forename><surname>Soualmia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">J</forename><surname>Darmoni</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF 2017</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
	<note>Online Working Notes. CEUR-WS</note>
</biblStruct>

<biblStruct xml:id="b21">
	<analytic>
		<title level="a" type="main">Fusion Methods for ICD10 Code Classification of Death Certificates in Multilingual Corpora</title>
		<author>
			<persName><forename type="first">M</forename><surname>Ebersbach</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Herms</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Eibl</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF 2017</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
	<note>Online Working Notes</note>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">Automatic coding of death certificates to ICD-10 terminology</title>
		<author>
			<persName><forename type="first">J</forename><surname>Jonnagaddala</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Hu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF 2017</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
	<note>Online Working Notes. CEUR-WS</note>
</biblStruct>

<biblStruct xml:id="b23">
	<analytic>
		<title level="a" type="main">Multi-lingual ICD-10 coding using a hybrid rule-based and supervised classification approach at CLEF eHealth</title>
		<author>
			<persName><forename type="first">J</forename><surname>Ševa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Kittner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Roller</surname></persName>
		</author>
		<author>
			<persName><forename type="first">U</forename><surname>Leser</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF 2017</title>
				<imprint>
			<date type="published" when="2017">2017. 2017</date>
		</imprint>
	</monogr>
	<note>Online Working Notes</note>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
