<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Towards Evaluating the Impact of Semantic Support for Curating the Fungus Scientific Literature</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Marie-Jean</forename><surname>Meurs</surname></persName>
							<email>mjmeurs@encs.concordia.ca</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Computer Science and Software Engineering</orgName>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Caitlin</forename><surname>Murphy</surname></persName>
							<email>cmurphy@gene.concordia.ca</email>
							<affiliation key="aff1">
								<orgName type="department">Centre for Structural and Functional Genomics</orgName>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="department">Department of Biology</orgName>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Nona</forename><surname>Naderi</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Department of Computer Science and Software Engineering</orgName>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Ingo</forename><surname>Morgenstern</surname></persName>
							<email>imorgenstern@gene.concordia.ca</email>
							<affiliation key="aff1">
								<orgName type="department">Centre for Structural and Functional Genomics</orgName>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="department">Department of Biology</orgName>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Carolina</forename><surname>Cantu</surname></persName>
							<email>ccantut@gene.concordia.ca</email>
							<affiliation key="aff1">
								<orgName type="department">Centre for Structural and Functional Genomics</orgName>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="department">Department of Biology</orgName>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Shary</forename><surname>Semarjit</surname></persName>
							<affiliation key="aff1">
								<orgName type="department">Centre for Structural and Functional Genomics</orgName>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="department">Department of Chemistry and Biochemistry</orgName>
								<orgName type="institution">Concordia University</orgName>
								<address>
									<settlement>Montréal</settlement>
									<region>QC</region>
									<country key="CA">Canada</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Greg</forename><surname>Butler</surname></persName>
							<email>gregb@encs.concordia.ca</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Computer Science and Software Engineering</orgName>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">Centre for Structural and Functional Genomics</orgName>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Justin</forename><surname>Powlowski</surname></persName>
							<affiliation key="aff1">
								<orgName type="department">Centre for Structural and Functional Genomics</orgName>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="department">Department of Chemistry and Biochemistry</orgName>
								<orgName type="institution">Concordia University</orgName>
								<address>
									<settlement>Montréal</settlement>
									<region>QC</region>
									<country key="CA">Canada</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Adrian</forename><surname>Tsang</surname></persName>
							<email>tsang@gene.concordia.ca</email>
							<affiliation key="aff1">
								<orgName type="department">Centre for Structural and Functional Genomics</orgName>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="department">Department of Biology</orgName>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">René</forename><surname>Witte</surname></persName>
							<email>rwitte@cse.concordia.ca</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Computer Science and Software Engineering</orgName>
							</affiliation>
						</author>
						<title level="a" type="main">Towards Evaluating the Impact of Semantic Support for Curating the Fungus Scientific Literature</title>
					</analytic>
					<monogr>
						<imprint>
							<date/>
						</imprint>
					</monogr>
					<idno type="MD5">43D5D99F7F0CFA3D17778DCEFACF2FB8</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-24T11:22+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>We present our ongoing development of a semantic infrastructure supporting biofuel research. Part of this effort is the automatic curation of knowledge from the massive amount of information on fungal enzymes that is available in genomics. Working closely with biologists who manually curate the existing literature, we developed ontological NLP pipelines, integrated through Web-based interfaces, to help them in two main tasks: spending less time to mine the literature for facts, while also being provided with richer and semantically linked information. An ongoing challenge is to measure precisely how much the developed semantic technologies benefit the end users and what their overall impact on the quality of the curated data is. We present preliminary evaluation results that show a significant reduction in manual curation time.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introduction</head><p>Producing sustainable liquid fuels with low environmental impact is one of the major technological challenges the world is facing today. Industrialized and developing countries consider biofuels, fuels produced from biomass, as a promising alternative to fossil based fuels. Extracting sugars from cellulose to produce biofuels requires to break down cellulose by using specific molecules called enzymes. Therefore, in the current race for replacing petroleum based fuels with renewable biofuels, discovering the most efficient enzymes for the cellulose degradation is a key challenge.</p><p>The largest knowledge source available to biofuel researchers is the PubMed bibliographic database, containing more than 19 million citations from over 21,000 life science journals. PubMed is linked to other databases, like Entrez Genome, which provides access to genomic sequences or BRENDA, The Comprehensive Enzyme Information System <ref type="bibr" target="#b8">[9]</ref>, which is the main collection of enzyme functional data available to the scientific community. A biology researcher querying PubMed using keywords collects an often long list of relevant papers. The way to analyze this collection is reading all the abstracts and sometimes the full text papers: this task is time consuming, difficult to handle and significant knowledge can be easily missed.</p><p>To address this problem, Natural Language Processing (NLP) and Semantic Web approaches are increasingly adopted in biomedical research <ref type="bibr" target="#b1">[2,</ref><ref type="bibr" target="#b9">10]</ref>. The work-in-progress we present in this paper focuses on the automatic extraction of knowledge from the massive amount of information on enzymes in fungi available from genome research. Text mining systems, like the one we developed here, are typically evaluated with intrinsic metrics, such as precision and recall. However, while these metrics can give insight into the accuracy of a system, they do not necessarily correspond to their extrinsic performance <ref type="bibr" target="#b0">[1,</ref><ref type="bibr" target="#b3">4]</ref>: How much does the system actually improve the tasks performed by users? Thus, in this work we are interested in also evaluating the impact of our semantic systems on the work performed by our biologists and the quality of the curated data.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">Project Context and System Architecture</head><p>Before we describe our overall architecture and the text mining pipelines, we briefly introduce the user groups involved and the semantic entities we analyse.</p><p>User Groups. The identification and the development of effective fungal enzyme cocktails are key elements of the biorefinery industry. In this context, the manual curation of fungal genes provides the thorough knowledge required for guiding research and experiments. The biology researchers involved in this curation are filling the mycoCLAP database <ref type="bibr" target="#b7">[8]</ref>, which is a searchable database of fungal genes encoding lignocellulose-active proteins that have been biochemically characterized. The curators are therefore the first user group of our system. The biology researchers who make decision about the experiments to conduct and the experimenters executing them represent two further user groups. They are mainly interested in the ability of combining multiple semantic queries to the curated data, thereby integrating the various knowledge resources. Semantic Entities. The system we are developing has to support the manual curation process; therefore, the semantic annotation types have been defined by the curators according to the information they need to store in the mycoCLAP database. Entities include information such as organisms, enzymes, assays, genes, kinetic properties, reactions, substrates, and environmental conditions. To facilitate semantic discovery, linking and querying these concepts across literature and databases, these entities are modeled in OWL ontologies, which are automatically populated from documents. As an example, Fig. <ref type="figure" target="#fig_0">1</ref> shows two main entities encoded in our ontology, organisms <ref type="bibr" target="#b12">[13]</ref> and enzymes. The ontology is used both during the text mining process and for querying the extracted information.</p><p>Semantic Resources. In terms of knowledge sources, the system relies on external and internal processing resources and ontologies. The Taxonomy database <ref type="bibr" target="#b5">[6]</ref> from NCBI is used for initializing the NLP resources supporting the organism recognition. BRENDA <ref type="bibr" target="#b8">[9]</ref> provides the enzyme knowledge along with the UniProtKB/SwissProt <ref type="bibr" target="#b10">[11]</ref>. References to the original sources are integrated into the curated data. This facilitates semantic connections through standard Linked Data techniques, e.g., from an organism mention in a research paper to its corresponding entry in the NCBI Taxonomy database. System Architecture. With the large number of different user groups and their diverging requirements, as well as the existing and continuously updated project infrastructure, we needed to find solutions for incrementally adding semantic support without disrupting day-to-day work. Our solution deploys a looselycoupled, service-oriented architecture that provides semantic services through existing and new clients. To connect these individual services and their results, we rely on standard semantic data formats, like OWL and RDF, which provide both loose coupling and semantic integration, as new data can be browsed and queried as soon as it is added to the framework (Fig. <ref type="figure" target="#fig_1">2</ref>).</p><p>NLP services are provided by the Semantic Assistants architecture <ref type="bibr" target="#b11">[12]</ref>, which facilitates the publication of NLP pipelines through standard Web services with WSDL descriptions. Users can access these Semantic Assistants services from their desktop through client plug-ins for common tools, such as the Firefox Web browser or the OpenOffice word processor.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">Text Mining Pipelines</head><p>Our text mining pipelines are based on the General Architecture for Text Engineering (GATE) <ref type="bibr" target="#b4">[5]</ref>. All documents first undergo basic preprocessing steps using off-the-shelf components, such as tokenization, sentence splitting, and part-ofspeech tagging. Custom pipelines then extract the semantic entities mentioned above and populate the OWL ontologies using the OwlExporter component. The same pipeline can be run for automatic (batch) ontology population, embedded in Teamware (described below) for manual annotation, or brokered to desktop clients through Web services for literature mining and curation.</p><p>Organism Recognition. The organism tagging and extraction relies on external resources that are automatically translated for reuse in our system, thereby providing users with ability to update their installation when the NCBI Taxonomy database changes. Additionally, a custom built organism ontology, presented in Fig. <ref type="figure" target="#fig_0">1</ref>, formally describes the linguistic structure of organism entities at different levels of the taxonomic hierarchy <ref type="bibr" target="#b12">[13]</ref>. The GATE pipeline consists of modules for organism entity detection based on pattern matching to the NCBI reference taxonomy, providing scientific names and the NCBI Taxonomy Identifier. Strain mentions are extracted using a specific text tokenization and a machine learning based approach.</p><p>Enzyme Recognition. Despite the standards published by the Enzyme Commission <ref type="bibr" target="#b6">[7]</ref>, enzymes are often described by the authors under various formats. An enzyme-specific text tokenization, along with grammar rules written in the JAPE language, analyses tokens with the -ase enzyme suffix. Then, the enzyme entity recognition relies on automatically extracted knowledge from the BRENDA database. A pattern matching approach provides enzyme name identification. The detected enzyme mentions are associated with their EC number, their Recommended Name, their Systematic Name and their URL on the BRENDA website.</p><p>Temperature and pH Facts. Temperature and pH mentions are involved in several biological facts, like the temperature and pH dependence/stability or the description of the activity and kinetic assay conditions. Our GATE pipeline contains PRs based on JAPE rules and gazetteer lists of specific vocabulary that enable the detection of these key mentions at the sentence level.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Intrinsic and Extrinsic Evaluation</head><p>As explained above, text mining systems require an evaluation showing their efficiency and effectiveness, both intrinsically and from an end user's point of view. In this section, we first discuss the development of the gold standard corpus and present preliminary evaluation results of our system.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1">The Manual Annotation Process</head><p>For the intrinsic evaluation, we are building a gold standard corpus of freely accessible full-text articles by manually annotating them using GATE Teamware <ref type="bibr" target="#b2">[3]</ref>, a Web-based management platform for collaborative annotation and curation. The annotation team is composed of four biology researchers. The researcher in charge of the curation task and an annotator having a strong background in fungus literature curation are considered as expert annotators. Their inter-annotator agreement is over 80%, hence their annotation sets are always defined as the most reliable sets during the adjudication process. The corpus is composed of ten papers related to a class of enzymes. hydrolase papers and lipase papers each represent 40% of the articles, whereas 20% are related to peroxidases.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2">Intrinsic Evaluation: Precision and Recall</head><p>The correctness of our text mining pipelines is evaluated in terms of precision, recall and F-measure. The reference is provided by the manually annotated (gold standard) corpus. The preliminary results on the four most common entities (Enzyme, Organism, pH and Temperature) are shown in Table <ref type="table" target="#tab_0">1</ref>. The impact of the system on the curation and annotation tasks is evaluated in terms of required time (range and average) per paper and measured in minutes.</p><p>Paper selection. Since the beginning of the curation task, approximately 1000 papers have been examined. The time needed to examine an unannotated full paper and to make a decision about its selection for curation, without any semantic support, previously ranged from 2 to 3 minutes. With added support through the text mining services, the required time decreased to 1-2 minutes.</p><p>Paper curation. Among the 1000 examined papers, around 600 were already selected for curation. The time needed to curate an unannotated full paper, i.e., extracting salient facts for entry into the mycoCLAP database, ranged from 30 to 45 minutes for the fully manual workflow. With added semantic support through the text mining pipelines, the required time decreased to 20-30 minutes.</p><p>Paper annotation. For full paper annotation, we investigated the impact of different levels of semantic support on the time required to add annotations (Table <ref type="table" target="#tab_1">2</ref>). All sets have been manually annotated by four annotators. The 4 papers of the first set (SET 1) were annotated without any semantic support.</p><p>The second set (SET 2) is composed of 3 papers, which have been pre-annotated by a degraded version of the system, using only generic tools, such as simple gazetteering list, resulting in lower precision and recall. The third set (SET 3) contains 3 papers, pre-annotated using the complete text mining pipelines, including the specialized tools and external resources as described above.</p><p>From the preliminary results, we can conclude that (1) there is a significant reduction of the average time required for paper selection, curation and annotation and (2) the level of support has a measurable impact as well. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5">Conclusions</head><p>We presented our ongoing development of a semantic infrastructure for enzyme data management. In the context of biofuel research, our system targets the automatic extraction of knowledge on fungal enzymes from genome research literature. Preliminary experiments show that semantic support allows for a significant decrease in manual curation time. However, future work is needed to evaluate the impact of such a system on the quality of the curated data.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Fig. 1 .</head><label>1</label><figDesc>Fig. 1. Domain Ontology: Organism and Enzyme Entities</figDesc><graphic coords="2,134.77,107.33,345.84,148.59" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Fig. 2 .</head><label>2</label><figDesc>Fig. 2. Integrating Semantic Support in Curation, Analysis, and Retrieval</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1 .</head><label>1</label><figDesc>Text Mining Pipelines: Precision, Recall and F-measure Strict (overlaps discarded) Lenient (overlaps included)</figDesc><table><row><cell></cell><cell cols="6">Recall Precision F-m Recall Precision F-m</cell></row><row><cell>Enzyme</cell><cell>0.64</cell><cell>0.55</cell><cell>0.59</cell><cell>0.78</cell><cell>0.67</cell><cell>0.72</cell></row><row><cell>Organism</cell><cell>0.84</cell><cell>0.81</cell><cell>0.82</cell><cell>0.88</cell><cell>0.83</cell><cell>0.85</cell></row><row><cell>pH</cell><cell>0.74</cell><cell>0.76</cell><cell>0.75</cell><cell>0.95</cell><cell>0.99</cell><cell>0.97</cell></row><row><cell cols="2">Temperature 0.64</cell><cell>0.67</cell><cell>0.65</cell><cell>0.90</cell><cell>0.93</cell><cell>0.91</cell></row><row><cell cols="7">4.3 Extrinsic Evaluation: Literature Mining and Annotation</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2 .</head><label>2</label><figDesc>Average annotation time per paper with different levels of semantic support</figDesc><table><row><cell>set and level of semantic support</cell><cell>available tags</cell><cell>t (min)</cell></row><row><cell>SET 1 (no semantic support)</cell><cell>∅</cell><cell>90</cell></row><row><cell cols="3">SET 2 (partial semantic support) enzyme, organism, pH, temperature 65</cell></row><row><cell>SET 3 (full semantic support)</cell><cell cols="2">enzyme, organism, pH, temperature 56</cell></row></table></figure>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Acknowledgments. Funding for this work was provided by Genome Canada and Génome Québec.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Assisted curation: does text mining really help</title>
		<author>
			<persName><forename type="first">B</forename><surname>Alex</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Grover</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Haddow</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Kabadjov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Klein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Wang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Pacific Symposium on Biocomputing</title>
				<imprint>
			<date type="published" when="2008">2008</date>
			<biblScope unit="volume">13</biblScope>
			<biblScope unit="page" from="556" to="567" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<monogr>
		<author>
			<persName><forename type="first">S</forename><surname>Ananiadou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Mcnaught</surname></persName>
		</author>
		<title level="m">Text Mining for Biology And Biomedicine</title>
				<meeting><address><addrLine>Norwood, MA, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Artech House, Inc</publisher>
			<date type="published" when="2005">2005</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Web-based Collaborative Corpus Annotation: Requirements and a Framework Implementation</title>
		<author>
			<persName><forename type="first">K</forename><surname>Bontcheva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Cunningham</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Roberts</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Tablan</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">New Challenges for NLP Frameworks</title>
				<meeting><address><addrLine>Valletta, Malta</addrLine></address></meeting>
		<imprint>
			<publisher>ELRA</publisher>
			<date type="published" when="2010-05-22">May 22 2010</date>
			<biblScope unit="page" from="20" to="27" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Intrinsic evaluation of text mining tools may not predict performance on realistic tasks</title>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">G</forename><surname>Caporaso</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Deshpande</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">L</forename><surname>Fink</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">E</forename><surname>Bourne</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">B</forename><surname>Cohen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Hunter</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Pacific Symposium on Biocomputing</title>
				<imprint>
			<publisher>World Scientific Publishing</publisher>
			<date type="published" when="2008">2008</date>
			<biblScope unit="volume">13</biblScope>
			<biblScope unit="page" from="640" to="651" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">GATE: A Framework and Graphical Development Environment for Robust NLP Tools and Applications</title>
		<author>
			<persName><forename type="first">H</forename><surname>Cunningham</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Maynard</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Bontcheva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Tablan</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. 40th Anniversary Meeting of the ACL</title>
				<meeting>40th Anniversary Meeting of the ACL</meeting>
		<imprint>
			<date type="published" when="2002">2002</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">The Taxonomy Project</title>
		<author>
			<persName><forename type="first">S</forename><surname>Federhen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">The NCBI Handbook</title>
				<editor>
			<persName><forename type="first">J</forename><surname>Mcentyre</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">J</forename><surname>Ostell</surname></persName>
		</editor>
		<imprint>
			<publisher>National Center for Biotechnology Information</publisher>
			<date type="published" when="2003">2003</date>
			<biblScope unit="volume">4</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
	</analytic>
	<monogr>
		<title level="m">Enzyme Nomenclature</title>
				<meeting><address><addrLine>San Diego, California</addrLine></address></meeting>
		<imprint>
			<publisher>Academic Press</publisher>
			<date type="published" when="1992">1992. 1992</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Curation of characterized glycoside hydrolases of fungal origin</title>
		<author>
			<persName><forename type="first">C</forename><surname>Murphy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Powlowski</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Wu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Butler</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Tsang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Database</title>
		<imprint>
			<date type="published" when="2011">2011. 2011</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">BRENDA, the enzyme information system in 2011</title>
		<author>
			<persName><forename type="first">M</forename><surname>Scheer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Grote</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Schomburg</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Munaretto</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Rother</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Söhngen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Stelzer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Thiele</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Schomburg</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Nucleic Acids Res</title>
		<imprint>
			<biblScope unit="volume">39</biblScope>
			<biblScope unit="page" from="D670" to="676" />
			<date type="published" when="2011">2011</date>
		</imprint>
	</monogr>
	<note>Database issue</note>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">The semantic web revisited</title>
		<author>
			<persName><forename type="first">N</forename><surname>Shadbolt</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Berners-Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Hall</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE Intelligent Systems</title>
		<imprint>
			<biblScope unit="volume">21</biblScope>
			<biblScope unit="issue">3</biblScope>
			<biblScope unit="page" from="96" to="101" />
			<date type="published" when="2006">2006</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">The UniProt Consortium: The Universal Protein Resource (UniProt)</title>
	</analytic>
	<monogr>
		<title level="j">Nucleic Acids Research</title>
		<imprint>
			<biblScope unit="volume">37</biblScope>
			<biblScope unit="page" from="169" to="174" />
			<date type="published" when="2009">2009</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Semantic Assistants -User-Centric Natural Language Processing Services for Desktop Clients</title>
		<author>
			<persName><forename type="first">R</forename><surname>Witte</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Gitzinger</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">3rd Asian Semantic Web Conference (ASWC 2008). LNCS</title>
				<meeting><address><addrLine>Bangkok, Thailand</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2009">2009</date>
			<biblScope unit="volume">5367</biblScope>
			<biblScope unit="page" from="360" to="374" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Ontology Design for Biomedical Text Mining</title>
		<author>
			<persName><forename type="first">R</forename><surname>Witte</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Kappler</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">J O</forename><surname>Baker</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Semantic Web: Revolutionizing Knowledge Discovery in the Life Sciences</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2007">2007</date>
			<biblScope unit="volume">13</biblScope>
			<biblScope unit="page" from="281" to="313" />
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
