<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Integration of variation data through SPARQL Micro-Services</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Frederic</forename><surname>Metereau</surname></persName>
							<email>frederic.metereau@etu.univ-cotedazur.fr</email>
							<affiliation key="aff0">
								<orgName type="laboratory">I3S (UMR 7271)</orgName>
								<orgName type="institution" key="instit1">Université Côte d&apos;Azur</orgName>
								<orgName type="institution" key="instit2">Inria</orgName>
								<orgName type="institution" key="instit3">CNRS</orgName>
								<address>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Franck</forename><surname>Michel</surname></persName>
							<email>fmichel@i3s.unice.fr</email>
							<affiliation key="aff0">
								<orgName type="laboratory">I3S (UMR 7271)</orgName>
								<orgName type="institution" key="instit1">Université Côte d&apos;Azur</orgName>
								<orgName type="institution" key="instit2">Inria</orgName>
								<orgName type="institution" key="instit3">CNRS</orgName>
								<address>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Pierre</forename><surname>Larmande</surname></persName>
							<email>pierre.larmande@ird.fr</email>
							<affiliation key="aff1">
								<orgName type="department">IRD</orgName>
								<orgName type="laboratory">DIADE</orgName>
								<orgName type="institution" key="instit1">Univ. Montpellier</orgName>
								<orgName type="institution" key="instit2">CIRAD</orgName>
								<address>
									<settlement>Montpellier</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="department" key="dep1">French Institute of Bioinformatics (IFB)-South Green Bioinformatics Platform</orgName>
								<orgName type="department" key="dep2">Bioversity</orgName>
								<orgName type="laboratory">CIRAD</orgName>
								<orgName type="institution" key="instit1">INRAE</orgName>
								<orgName type="institution" key="instit2">IRD</orgName>
								<address>
									<settlement>Montpellier</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Guilhem</forename><surname>Sempere</surname></persName>
							<email>guilhem.sempere@cirad.fr</email>
							<affiliation key="aff2">
								<orgName type="department" key="dep1">French Institute of Bioinformatics (IFB)-South Green Bioinformatics Platform</orgName>
								<orgName type="department" key="dep2">Bioversity</orgName>
								<orgName type="laboratory">CIRAD</orgName>
								<orgName type="institution" key="instit1">INRAE</orgName>
								<orgName type="institution" key="instit2">IRD</orgName>
								<address>
									<settlement>Montpellier</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="laboratory">Intertryp</orgName>
								<orgName type="institution" key="instit1">CIRAD</orgName>
								<orgName type="institution" key="instit2">INRAE</orgName>
								<orgName type="institution" key="instit3">IRD</orgName>
								<address>
									<settlement>Montpellier</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Catherine</forename><surname>Faron</surname></persName>
							<email>faron@i3s.unice.fr</email>
							<affiliation key="aff0">
								<orgName type="laboratory">I3S (UMR 7271)</orgName>
								<orgName type="institution" key="instit1">Université Côte d&apos;Azur</orgName>
								<orgName type="institution" key="instit2">Inria</orgName>
								<orgName type="institution" key="instit3">CNRS</orgName>
								<address>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Integration of variation data through SPARQL Micro-Services</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">8F22104FD8848D05DB8B283397A547A0</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T17:39+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Knowledge Graphs</term>
					<term>MongoDB</term>
					<term>FAIR data</term>
					<term>Genetic variations</term>
					<term>Bioinformatics</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Integrating genetic variations data is essential to understand the interactions involving multiple genes in complex diseases. However, managing and extracting meaningful information from a large volume of genotyping data is challenging. This work aims to interconnect efficiently a MongoDB database with an RDF database through SPARQL Micro-Services. We first developed an RDF Model reusing existing ontologies and implemented it. Then, we evaluated some examples of queries interconnecting two applications Gigwa (MongoDB) and AgroLD (SPARQL endpoint).</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>problem, as RDF enables data to be interconnected between several databases. This work aims to find a way to interconnect efficiently a MongoDB database with another RDF database.</p><p>As a proof of concept, we decided to use the Gigwa <ref type="bibr" target="#b0">[1]</ref> and AgroLD <ref type="bibr" target="#b1">[2]</ref> database applications to demonstrate the benefits of leveraging data semantics on a high volume of genomic data. Gigwa is a web application designed to store large volumes of genotypes (up to tens of billions), initially imported from VCF or other file formats, in a MongoDB <ref type="bibr" target="#b2">[3]</ref> database, and to provide a straightforward interface for filtering these data. It makes it possible to navigate within search results, visualize them in different ways, and re-export subsets of data into various common formats. AgroLD is a knowledge graph that exploits Semantic Web technologies to integrate data of interest for the plant science community. AgroLD is built incrementally spanning vast aspects of plant molecular interactions. The current phase covers information on genes, proteins, predictions of homologous genes, metabolic pathways, plant trait associations and genetic studies. For this work, we first developed an RDF model based on existing ontologies and inspired by DisGeNET <ref type="bibr" target="#b3">[4]</ref>. We extended it with some features needed for the Gigwa data model which integrates gene annotation information. Then we developed some SPARQL Micro-Services <ref type="bibr" target="#b4">[5]</ref> using the Gigwa RESTFul API. Finally, we developed and evaluated some queries interconnecting Gigwa and AgroLD through SPARQL query examples.</p></div>		</body>
		<back>
			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Gigwa v2-Extended and improved genotype investigator</title>
		<author>
			<persName><forename type="first">G</forename><surname>Sempéré</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Pétel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Rouard</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Frouin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Hueber</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">De</forename><surname>Bellis</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Larmande</surname></persName>
		</author>
		<idno type="DOI">10.1093/gigascience/giz051</idno>
	</analytic>
	<monogr>
		<title level="j">GigaScience</title>
		<imprint>
			<biblScope unit="volume">8</biblScope>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Agronomic Linked Data (AgroLD): A knowledge-based system to enable integrative biology in agronomy</title>
		<author>
			<persName><forename type="first">A</forename><surname>Venkatesan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">T</forename><surname>Ngompe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><forename type="middle">E</forename><surname>Hassouni</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Chentli</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Guignon</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Jonquet</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Ruiz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Larmande</surname></persName>
		</author>
		<idno type="DOI">10.1371/journal.pone.0198270</idno>
	</analytic>
	<monogr>
		<title level="j">PLOS ONE</title>
		<imprint>
			<biblScope unit="volume">13</biblScope>
			<biblScope unit="page">e0198270</biblScope>
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Adapting TPC-C benchmark to measure performance of multi-document transactions in MongoDB</title>
		<author>
			<persName><forename type="first">A</forename><surname>Kamsky</surname></persName>
		</author>
		<idno type="DOI">10.14778/3352063.3352140</idno>
	</analytic>
	<monogr>
		<title level="j">Proc. VLDB Endow</title>
		<imprint>
			<biblScope unit="volume">12</biblScope>
			<biblScope unit="page" from="2254" to="2262" />
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">DisGeNET: A discovery platform for the dynamical exploration of human diseases and their genes</title>
		<author>
			<persName><forename type="first">J</forename><surname>Piñero</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Queralt-Rosinach</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Bravo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Deu-Pons</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Bauer-Mehren</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Baron</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Sanz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">I</forename><surname>Furlong</surname></persName>
		</author>
		<idno type="DOI">10.1093/database/bav028</idno>
	</analytic>
	<monogr>
		<title level="j">Database</title>
		<imprint>
			<biblScope unit="volume">2015</biblScope>
			<date type="published" when="2015">2015</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Integration of Web APIs and Linked Data Using SPARQL Micro-Services-Application to Biodiversity Use Cases</title>
		<author>
			<persName><forename type="first">F</forename><surname>Michel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Faron</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Gargominy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Gandon</surname></persName>
		</author>
		<idno type="DOI">10.3390/info9120310</idno>
	</analytic>
	<monogr>
		<title level="j">Information</title>
		<imprint>
			<biblScope unit="volume">9</biblScope>
			<biblScope unit="page">310</biblScope>
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
