<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">PRNA at ImageCLEF 2017 Caption Prediction and Concept Detection Tasks</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Sadid</forename><forename type="middle">A</forename><surname>Hasan</surname></persName>
							<affiliation key="aff0">
								<orgName type="laboratory">Artificial Intelligence Laboratory</orgName>
								<orgName type="institution">Philips Research North America</orgName>
								<address>
									<settlement>Cambridge</settlement>
									<region>MA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Yuan</forename><surname>Ling</surname></persName>
							<affiliation key="aff0">
								<orgName type="laboratory">Artificial Intelligence Laboratory</orgName>
								<orgName type="institution">Philips Research North America</orgName>
								<address>
									<settlement>Cambridge</settlement>
									<region>MA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Joey</forename><surname>Liu</surname></persName>
							<affiliation key="aff0">
								<orgName type="laboratory">Artificial Intelligence Laboratory</orgName>
								<orgName type="institution">Philips Research North America</orgName>
								<address>
									<settlement>Cambridge</settlement>
									<region>MA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Rithesh</forename><surname>Sreenivasan</surname></persName>
							<affiliation key="aff1">
								<orgName type="institution">Philips Innovation Campus</orgName>
								<address>
									<settlement>Bengaluru</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Shreya</forename><surname>Anand</surname></persName>
							<affiliation key="aff1">
								<orgName type="institution">Philips Innovation Campus</orgName>
								<address>
									<settlement>Bengaluru</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Tilak</forename><forename type="middle">Raj</forename><surname>Arora</surname></persName>
							<affiliation key="aff1">
								<orgName type="institution">Philips Innovation Campus</orgName>
								<address>
									<settlement>Bengaluru</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Vivek</forename><surname>Datla</surname></persName>
							<affiliation key="aff0">
								<orgName type="laboratory">Artificial Intelligence Laboratory</orgName>
								<orgName type="institution">Philips Research North America</orgName>
								<address>
									<settlement>Cambridge</settlement>
									<region>MA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Kathy</forename><surname>Lee</surname></persName>
							<email>kathy.lee1@philips.com</email>
							<affiliation key="aff0">
								<orgName type="laboratory">Artificial Intelligence Laboratory</orgName>
								<orgName type="institution">Philips Research North America</orgName>
								<address>
									<settlement>Cambridge</settlement>
									<region>MA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Ashequl</forename><surname>Qadir</surname></persName>
							<affiliation key="aff0">
								<orgName type="laboratory">Artificial Intelligence Laboratory</orgName>
								<orgName type="institution">Philips Research North America</orgName>
								<address>
									<settlement>Cambridge</settlement>
									<region>MA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Christine</forename><surname>Swisher</surname></persName>
							<affiliation key="aff0">
								<orgName type="laboratory">Artificial Intelligence Laboratory</orgName>
								<orgName type="institution">Philips Research North America</orgName>
								<address>
									<settlement>Cambridge</settlement>
									<region>MA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Oladimeji</forename><surname>Farri</surname></persName>
							<email>dimeji.farri@philips.com</email>
							<affiliation key="aff0">
								<orgName type="laboratory">Artificial Intelligence Laboratory</orgName>
								<orgName type="institution">Philips Research North America</orgName>
								<address>
									<settlement>Cambridge</settlement>
									<region>MA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">PRNA at ImageCLEF 2017 Caption Prediction and Concept Detection Tasks</title>
					</analytic>
					<monogr>
						<imprint>
							<date/>
						</imprint>
					</monogr>
					<idno type="MD5">78D4D5B3159796E856BCD229873F6980</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-24T20:27+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Caption Prediction</term>
					<term>Concept Detection</term>
					<term>Encoder-Decoder Framework</term>
					<term>Attention Mechanism</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>In this paper, we describe our caption prediction and concept detection systems submitted for the ImageCLEF 2017 challenge. We submitted four runs for the caption prediction task and three runs for the concept detection task by using an attention-based image caption generation framework. The attention mechanism automatically learns to emphasize on salient parts of the medical image while generating corresponding words in the output for the caption prediction task and corresponding clinical concepts for the concept detection task. Our system was ranked first in the caption prediction task while showed a decent performance in the concept detection task. We present the evaluation results with detailed comparison and analysis of our different runs.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introduction</head><p>Automatically understanding the content of an image and describing in natural language is a challenging task which has gained a lot of attention from computer vision and natural language processing researchers in recent years through various challenges for visual recognition and caption generation <ref type="bibr" target="#b0">[1,</ref><ref type="bibr" target="#b1">2]</ref>. Due to the ever-increasing number of images in the medical domain that are generated across the clinical diagnostic pipeline, automated understanding of the image content could especially be beneficial for clinicians to provide useful insights and reduce the significant burden on the overall workflow across the care continuum. Motivated by this need for automated image understanding methods in the healthcare domain, ImageCLEF<ref type="foot" target="#foot_0">3</ref> organized its first caption prediction and concept detection tasks in 2017 <ref type="bibr" target="#b2">[3,</ref><ref type="bibr" target="#b3">4]</ref>. The main objective of the concept detection task was to retrieve the relevant clinical concepts that are reflected in a medical image whereas in the caption prediction task, participants were supposed to leverage the clinical concept vocabulary created in the concept detection task towards generating a coherent caption for each medical image.</p><p>The recent advances in deep neural networks have been shown to work well for large scale image processing, classification and captioning tasks. Specifically, the combined use of deep convolutional neural networks (CNN) with recurrent neural networks (RNN) has demonstrated superior performance for these tasks <ref type="bibr" target="#b4">[5]</ref><ref type="bibr" target="#b5">[6]</ref><ref type="bibr" target="#b6">[7]</ref><ref type="bibr" target="#b7">[8]</ref><ref type="bibr" target="#b8">[9]</ref><ref type="bibr" target="#b9">[10]</ref><ref type="bibr" target="#b10">[11]</ref> based on the use of sequence to sequence learning and encoder-decoderbased frameworks for neural machine translation <ref type="bibr" target="#b11">[12]</ref><ref type="bibr" target="#b12">[13]</ref><ref type="bibr" target="#b13">[14]</ref>.</p><p>Motivated by the success of such prior works, we use an encoder-decoder based deep neural network architecture for the caption prediction task <ref type="bibr" target="#b8">[9]</ref>, where the encoder uses a deep CNN <ref type="bibr" target="#b4">[5]</ref> to encode a raw medical image to a feature representation, which is in turn decoded using an attention-based RNN to generate the most relevant caption for the given image. We follow a similar approach to address the concept detection task by treating it as a text generation problem. Our system was ranked first in the caption prediction task while showed a decent performance in the concept detection task. In the next sections, we discuss the experimental settings, present the evaluation results with analysis, and conclude the paper.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">Experimental Setup</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1">Data</head><p>The training data contains 164,614 biomedical images with associated clinical concepts or captions extracted from PubMed Central<ref type="foot" target="#foot_1">4</ref> . Furthermore, 10K images per task are provided as the validation set while 10K additional images are provided as the test set for both tasks.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2">Training</head><p>We use an encoder-decoder-based framework that uses a CNN-based architecture to extract the image feature representation and a RNN-based architecture with an attention-based mechanism to translate the image feature representation to relevant captions <ref type="bibr" target="#b8">[9]</ref>. We use the VGGnet-19 <ref type="bibr" target="#b4">[5]</ref> deep CNN model pre-trained on the ImageNet dataset <ref type="bibr" target="#b5">[6]</ref> with fine tuning on the given ImageCLEF training dataset to extract the image feature representation from a lower convolution layer such that the decoder can focus on the salient aspects of the image via an attention mechanism.</p><p>The decoder uses a long short-term memory (LSTM) network <ref type="bibr" target="#b14">[15]</ref> with a soft attention mechanism <ref type="bibr" target="#b11">[12,</ref><ref type="bibr" target="#b8">9]</ref> that generates a caption by predicting one word at every time step based on a context vector (which represents the important parts of the image to focus on), the previous hidden state, and the previously generated words.</p><p>Our models are trained with stochastic gradient descent using Adam <ref type="bibr" target="#b15">[16]</ref> as the adaptive learning rate algorithm and dropout <ref type="bibr" target="#b16">[17]</ref> as the regularization mechanism. Our models were trained with two NVIDIA Tesla M40 GPUs.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.3">Run Description</head><p>For the caption prediction task, we submitted four runs as follows:</p><p>-Run1: This run does not consider any semantic pre-processing of the captions; the entire training and validation set are used to train the model as described in Section 2.2. -Run2: This run considers semantic pre-processing of captions using MetaMap <ref type="bibr" target="#b17">[18]</ref> and the Unified Medical Language System (UMLS) metathesaurus <ref type="bibr" target="#b18">[19]</ref>, initially trained on the modified VGG19 model with a randomly selected subset of 20K ImageCLEF training images to automatically generate image features and classify the imaging modality, and then finally trained as described in Section 2.2 with a random subset of 24K training images and 2K validation images to minimize time and computational complexity. -Run3: This run is similar to Run1 with automatic generation of UMLS concept unique identifiers (CUIs) using the training dataset for the concept detection task, instead of the captions from the caption prediction task, and then replacing the CUIs (generated for the test set) with the longest relevant clinical terms from the UMLS metathesaurus as the caption. -Run4: This run is similar to Run3 where we replace the CUIs (generated for the test set) with all relevant clinical terms (including synonyms) from the UMLS metathesaurus as the caption.</p><p>For the concept detection task, we submitted three runs as follows:</p><p>-Run1: In this run, we consider the task as a sequence-to-sequence generation problem similar to caption generation, where the CUIs associated with an image are simply treated as a sequence of concepts; the entire training and validation set are used to train the model as described in Section 2.2. -Run2: This run is created by simply transforming the generated captions (for the test set) from Run1 of the caption prediction task by replacing clinical terms with the best possible CUIs from the UMLS metathesaurus. -Run3: This run is created by simply transforming the generated captions (for the test set) from Run2 of the caption prediction task by replacing clinical terms with the best possible CUIs from the UMLS metathesaurus.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.4">Evaluation and Analysis</head><p>The evaluation for the caption prediction task is conducted using the well-known metric for machine translation, BLEU <ref type="bibr" target="#b19">[20]</ref> whereas F1 score is used to evaluate the concept detection systems. Table <ref type="table" target="#tab_0">1</ref> and Table <ref type="table" target="#tab_1">2</ref> show the evaluation results. We can see that for the caption prediction task, Run4 and Run1 achieved high scores denoting the effectiveness of our approach. Overall, our system was ranked first in the caption prediction task. Run4 is better as it includes all possible terms from the ontologies in the generated caption but trades-off the coherence of the caption. Hence, this approach increases the BLEU scores, which essentially computes exact word overlaps between the generated caption and the ground truth caption. Run2 likely suffered from the limited training data whereas Run3 has a lower score as it accepts only the longest possible clinical term as a replacement for a CUI in the caption.</p><p>For the concept detection task, Run1 performed reasonably well, but shows that there is still room for improvement. We may consider treating the task as a multi-label classification problem to achieve possible improvements. Run2 and Run3 were limited due to the 2-step translation of clinical terms to CUIs from the generated captions of the other task, which potentially indicates propagation of errors in learning the captions to the downstream task. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">Conclusion</head><p>We presented the details of our participation in the caption prediction and concept detection tasks of the ImageCLEF 2017 challenge. Our system was ranked first in the caption prediction task and showed decent performance in the concept detection task. Overall, evaluation results showed the effectiveness of our approach. We highlighted potential reasons for errors in our submissions and discussed future work to consider for improved results.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1 .</head><label>1</label><figDesc>Evaluation of caption prediction runs</figDesc><table><row><cell cols="2">Runs Mean BLEU score</cell></row><row><cell>Run1</cell><cell>0.2638</cell></row><row><cell>Run2</cell><cell>0.1107</cell></row><row><cell>Run3</cell><cell>0.1801</cell></row><row><cell>Run4</cell><cell>0.3211</cell></row><row><cell cols="2">Runs Mean F1 score</cell></row><row><cell>Run1</cell><cell>0.1208</cell></row><row><cell>Run2</cell><cell>0.0234</cell></row><row><cell>Run3</cell><cell>0.0215</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2 .</head><label>2</label><figDesc>Evaluation of concept detection runs</figDesc><table /></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_0">http://www.imageclef.org/2017/caption</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_1">https://www.ncbi.nlm.nih.gov/pmc/</note>
		</body>
		<back>
			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">ImageNet Large Scale Visual Recognition Challenge</title>
		<author>
			<persName><forename type="first">Olga</forename><surname>Russakovsky</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jia</forename><surname>Deng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Hao</forename><surname>Su</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jonathan</forename><surname>Krause</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Sanjeev</forename><surname>Satheesh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Sean</forename><surname>Ma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Zhiheng</forename><surname>Huang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Andrej</forename><surname>Karpathy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Aditya</forename><surname>Khosla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Michael</forename><forename type="middle">S</forename><surname>Bernstein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alexander</forename><forename type="middle">C</forename><surname>Berg</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Fei-Fei</forename><surname>Li</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">International Journal of Computer Vision</title>
		<imprint>
			<biblScope unit="volume">115</biblScope>
			<biblScope unit="issue">3</biblScope>
			<biblScope unit="page" from="211" to="252" />
			<date type="published" when="2015">2015</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Show and Tell: Lessons Learned from the 2015 MSCOCO Image Captioning Challenge</title>
		<author>
			<persName><forename type="first">Oriol</forename><surname>Vinyals</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alexander</forename><surname>Toshev</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Samy</forename><surname>Bengio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Dumitru</forename><surname>Erhan</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE Trans. Pattern Anal. Mach. Intell</title>
		<imprint>
			<biblScope unit="volume">39</biblScope>
			<biblScope unit="issue">4</biblScope>
			<biblScope unit="page" from="652" to="663" />
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Overview of ImageCLEF 2017: Information extraction from images</title>
		<author>
			<persName><forename type="first">Bogdan</forename><surname>Ionescu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Henning</forename><surname>Mller</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Mauricio</forename><surname>Villegas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Helbert</forename><surname>Arenas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Giulia</forename><surname>Boato</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Duc-Tien</forename><surname>Dang-Nguyen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Yashin</forename><surname>Dicente Cid</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Carsten</forename><surname>Eickhoff</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alba</forename><surname>Garcia Seco De Herrera</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Cathal</forename><surname>Gurrin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Bayzidul</forename><surname>Islam</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Vassili</forename><surname>Kovalev</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Vitali</forename><surname>Liauchuk</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Josiane</forename><surname>Mothe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Luca</forename><surname>Piras</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Michael</forename><surname>Riegler</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Immanuel</forename><surname>Schwall</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Experimental IR Meets Multilinguality, Multimodality, and Interaction 8th International Conference of the CLEF Association, CLEF 2017</title>
		<title level="s">Springer LNCS</title>
		<imprint>
			<date type="published" when="2017">2017</date>
			<biblScope unit="volume">10456</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Overview of ImageCLEFcaption 2017 -Image Caption Prediction and Concept Detection for Biomedical Images</title>
		<author>
			<persName><forename type="first">Carsten</forename><surname>Eickhoff</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Immanuel</forename><surname>Schwall</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alba</forename><surname>Garca Seco De Herrera</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Henning</forename><surname>Mller</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">CLEF 2017 Labs Working Notes, CEUR Workshop Proceedings</title>
				<imprint>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<monogr>
		<title level="m" type="main">Very Deep Convolutional Networks for Large-Scale Image Recognition</title>
		<author>
			<persName><forename type="first">Karen</forename><surname>Simonyan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Andrew</forename><surname>Zisserman</surname></persName>
		</author>
		<idno type="arXiv">arXiv:1409.1556</idno>
		<imprint>
			<date type="published" when="2014">2014</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">ImageNet Classification with Deep Convolutional Neural Networks</title>
		<author>
			<persName><forename type="first">Alex</forename><surname>Krizhevsky</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Ilya</forename><surname>Sutskever</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Geoffrey</forename><forename type="middle">E</forename><surname>Hinton</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">NIPS</title>
		<imprint>
			<biblScope unit="volume">2012</biblScope>
			<biblScope unit="page" from="1106" to="1114" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Show and tell: A neural image caption generator</title>
		<author>
			<persName><forename type="first">Oriol</forename><surname>Vinyals</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alexander</forename><surname>Toshev</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Samy</forename><surname>Bengio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Dumitru</forename><surname>Erhan</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">CVPR</title>
		<imprint>
			<biblScope unit="volume">2015</biblScope>
			<biblScope unit="page" from="3156" to="3164" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Long-term recurrent convolutional networks for visual recognition and description</title>
		<author>
			<persName><forename type="first">Jeff</forename><surname>Donahue</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Lisa</forename><forename type="middle">Anne</forename><surname>Hendricks</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Sergio</forename><surname>Guadarrama</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Marcus</forename><surname>Rohrbach</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Subhashini</forename><surname>Venugopalan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Trevor</forename><surname>Darrell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Kate</forename><surname>Saenko</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">CVPR</title>
		<imprint>
			<biblScope unit="volume">2015</biblScope>
			<biblScope unit="page" from="2625" to="2634" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<monogr>
		<title level="m" type="main">Show, Attend and Tell: Neural Image Caption Generation with Visual Attention</title>
		<author>
			<persName><forename type="first">Kelvin</forename><surname>Xu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jimmy</forename><surname>Ba</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Ryan</forename><surname>Kiros</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Kyunghyun</forename><surname>Cho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Aaron</forename><surname>Courville</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Ruslan</forename><surname>Salakhutdinov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Richard</forename><surname>Zemel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Yoshua</forename><surname>Bengio</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2015">2015</date>
			<publisher>ICML</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<monogr>
		<title level="m" type="main">Multiple Object Recognition with Visual Attention</title>
		<author>
			<persName><forename type="first">Jimmy</forename><surname>Ba</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Volodymyr</forename><surname>Mnih</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Koray</forename><surname>Kavukcuoglu</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2015">2015</date>
			<publisher>ICLR</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<monogr>
		<title level="m" type="main">Recurrent Models of Visual Attention</title>
		<author>
			<persName><forename type="first">Volodymyr</forename><surname>Mnih</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Nicolas</forename><surname>Heess</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alex</forename><surname>Graves</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Koray</forename><surname>Kavukcuoglu</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2014">2014</date>
			<publisher>NIPS</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<monogr>
		<title level="m" type="main">Neural machine translation by jointly learning to align and translate</title>
		<author>
			<persName><forename type="first">Dzmitry</forename><surname>Bahdanau</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Kyunghyun</forename><surname>Cho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Yoshua</forename><surname>Bengio</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2015">2015</date>
			<publisher>ICLR</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Sequence to Sequence Learning with Neural Networks</title>
		<author>
			<persName><forename type="first">Ilya</forename><surname>Sutskever</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Oriol</forename><surname>Vinyals</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Quoc</surname></persName>
		</author>
		<author>
			<persName><surname>Le</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">NIPS</title>
		<imprint>
			<biblScope unit="volume">2014</biblScope>
			<biblScope unit="page" from="3104" to="3112" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation</title>
		<author>
			<persName><forename type="first">Kyunghyun</forename><surname>Cho</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Bart</forename><surname>Van Merrienboer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Dzmitry</forename><surname>Glehre</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Fethi</forename><surname>Bahdanau</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Holger</forename><surname>Bougares</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Yoshua</forename><surname>Schwenk</surname></persName>
		</author>
		<author>
			<persName><surname>Bengio</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">EMNLP</title>
		<imprint>
			<biblScope unit="volume">2014</biblScope>
			<biblScope unit="page" from="1724" to="1734" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">Long Short-Term Memory</title>
		<author>
			<persName><forename type="first">Sepp</forename><surname>Hochreiter</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jrgen</forename><surname>Schmidhuber</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Neural Computation</title>
		<imprint>
			<biblScope unit="volume">9</biblScope>
			<biblScope unit="issue">8</biblScope>
			<biblScope unit="page" from="1735" to="1780" />
			<date type="published" when="1997">1997</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<monogr>
		<title level="m" type="main">Adam: A Method for Stochastic Optimization</title>
		<author>
			<persName><forename type="first">P</forename><surname>Diederik</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jimmy</forename><surname>Kingma</surname></persName>
		</author>
		<author>
			<persName><surname>Ba</surname></persName>
		</author>
		<idno>CoRR abs/1412.6980</idno>
		<imprint>
			<date type="published" when="2014">2014</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">Dropout: a simple way to prevent neural networks from overfitting</title>
		<author>
			<persName><forename type="first">Nitish</forename><surname>Srivastava</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Geoffrey</forename><forename type="middle">E</forename><surname>Hinton</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alex</forename><surname>Krizhevsky</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Ilya</forename><surname>Sutskever</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Ruslan</forename><surname>Salakhutdinov</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Machine Learning Research</title>
		<imprint>
			<biblScope unit="volume">15</biblScope>
			<biblScope unit="issue">1</biblScope>
			<biblScope unit="page" from="1929" to="1958" />
			<date type="published" when="2014">2014</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Effective mapping of biomedical text to the UMLS Metathesaurus: the MetaMap program</title>
		<author>
			<persName><forename type="first">Alan</forename><forename type="middle">R</forename><surname>Aronson</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">AMIA</title>
		<imprint>
			<date type="published" when="2001">2001</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">The unified medical language system (umls): integrating biomedical terminology</title>
		<author>
			<persName><forename type="first">Olivier</forename><surname>Bodenreider</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Nucleic acids research</title>
		<imprint>
			<biblScope unit="volume">32</biblScope>
			<biblScope unit="issue">1</biblScope>
			<biblScope unit="page" from="D267" to="D270" />
			<date type="published" when="2004">2004</date>
		</imprint>
	</monogr>
	<note>suppl</note>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">BLEU: A Method for Automatic Evaluation of Machine Translation</title>
		<author>
			<persName><forename type="first">Kishore</forename><surname>Papineni</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Salim</forename><surname>Roukos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Todd</forename><surname>Ward</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Wei-Jing</forename><surname>Zhu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">ACL</title>
		<imprint>
			<biblScope unit="page" from="311" to="318" />
			<date type="published" when="2002">2002</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
