<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <issn pub-type="ppub">1613-0073</issn>
    </journal-meta>
    <article-meta>
      <title-group>
        <article-title>Technologies for Simulation Neuroscience</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <string-name>Cristina E. González-Espinoza</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Anna-Kristin Kaufmann</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Eugenia Oshurko</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Alejandra Garcia Rojas</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Sarah Moufok</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Jayakrishnan Nair</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Patrycja Lurie</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Silvia Jimenez</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Pierre-Alexandre Fonta</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Huanxiang Lu</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Nabil Alibou</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Mohameth François Sy</string-name>
          <email>mohameth.sy@epfl.ch</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Bogdan Roman</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Samuel Kerrien</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Henry Markram</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Sean L. Hill</string-name>
          <email>sean.hill@epfl.ch</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <aff id="aff0">
          <label>0</label>
          ,
          <addr-line>Konstantinos Platis</addr-line>
        </aff>
        <aff id="aff1">
          <label>1</label>
          <institution>Blue Brain Project, École polytechnique fédérale de Lausanne (EPFL)</institution>
          ,
          <addr-line>Biotech Campus, Geneva</addr-line>
          ,
          <country country="CH">Switzerland</country>
        </aff>
        <aff id="aff2">
          <label>2</label>
          <institution>Department of Psychiatry - Neuroscience and Clinical Translation, University of Toronto</institution>
          ,
          <addr-line>Toronto</addr-line>
          ,
          <country country="CA">Canada</country>
        </aff>
        <aff id="aff3">
          <label>3</label>
          <institution>Krembil Centre for Neuroinformatics, Centre for Addiction and Mental Health (CAMH)</institution>
          ,
          <addr-line>Toronto</addr-line>
          ,
          <country country="CA">Canada</country>
        </aff>
      </contrib-group>
      <pub-date>
        <year>2023</year>
      </pub-date>
      <fpage>6</fpage>
      <lpage>10</lpage>
      <abstract>
        <p>The Blue Brain Project, a Swiss neuroscience research initiative, has pioneered a data-driven approach to digitally building and simulating biologically detailed models of the mouse brain as a complementary approach to understanding the brain alongside experimental, theoretical and clinical neuroscience. One of the key steps of this approach involves acquiring, organizing, and integrating heterogeneous data describing the structural and functional organization of the brain at various levels, ranging from synapses and subcellular components to individual neurons, circuits, and entire brain regions. The data is acquired from many sources including neuroscience experiments, published scientific papers, and brain databases. To address many of the data organization, reuse, sparsity, and publishing challenges that arise alongside this approach, Blue Brain built an RDF-based large-scale knowledge graph bringing together RDFS/OWL ontologies, SHACL schemas, JSON-LD, as well as ontology-, rule-, and graph-based inference to complement classical neuroinformatics tools and methods. In this paper, we present how such a knowledge graph is built and used by the project's domain teams to go beyond high-quality and FAIR metadata cataloging. We describe how the knowledge graph serves a multifaceted role: it addresses the diversity, evolution, and quality assessment of data at the whole brain scale, while concurrently tracking data provenance to facilitate reproducibility and precise attribution. Additionally, it facilitates diverse use cases, including the inference of missing data through knowledge-graph-based methods.</p>
      </abstract>
      <kwd-group>
        <kwd>Neuroscience</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>-</title>
      <p>CEUR
ceur-ws.org</p>
    </sec>
    <sec id="sec-2">
      <title>1. Introduction</title>
      <p>Blue Brain Project (BBP) has pioneered the new field of
Simulation Neuroscience using a
datadriven and supercomputer-based approach to build and simulate biologically detailed brain
tissue models 1. The approach often involves the steps illustrated in high level terms in Figure
1.
CEUR
Workshop
Proceedings</p>
      <p>In this iterative, data-driven modeling approach, each step informs and refines the subsequent
one, giving rise to a set of challenges concerning the storage, accessibility, and reusability
of data and models. These challenges manifest as data is acquired, generated, or published
both internally for users and to a wider scientific audience. For instance, during the data
organization step, the discovery, acquisition, preparation, and release of multi-scale,
multimodal, and heterogeneous data are essential for enabling access and facilitating reuse in model
development. Furthermore, tracking data and models provenance is key during the model
validation step to select validation data diferent from the one used during model building but
also during model publication to support reproducibility and for contribution attribution and
quality assessment.</p>
    </sec>
    <sec id="sec-3">
      <title>2. Knowledge graph as an approach for organizing neuroscience data</title>
      <p>The heterogeneity challenge comes often from data of diferent sizes, formats, generation
contexts and sources. For example, to build single neuron model at a given brain region location
(e.g somatosensory cortex, hippocampus, thalamus), a neuroscientist modeler would often
need to get neuron morphologies (i.e. reconstructed neuron 3d shape) and electrophysiological
recordings (i.e measured electrical behaviour) from which to extract features. While many
neuroscience databases collect and enable the modeler to search and download neuron
morphologies (e.g. NeuroMorpho.org, Mouselight) or neuron electrophysiological recordings (e.g.
Allen Cell type DB), they greatly vary in term of (meta)data formats and on accounting on the
data generation contexts.</p>
      <p>
        These challenges are not specific to simulation neuroscience and are summarized in the FAIR
(Findable, Accessible, Interoperable, Reusable) guiding principles for scientific data
management2. Addressing these challenges is a clear target and use case for Semantic Web
technologies[
        <xref ref-type="bibr" rid="ref1">1</xref>
        ]. BBP built a knowledge graph, the Blue Brain Nexus [
        <xref ref-type="bibr" rid="ref2">2</xref>
        ], by leveraging semantic web
technologies and deploying it as an ecosystem or platform, to support building and
simulating brain tissue models[
        <xref ref-type="bibr" rid="ref3">3</xref>
        ]. Nexus provides a complement to classical neuroinformatics for
organizing neuroscience data. Furthermore, Nexus is domain agnostic and can be used in any
data-driven field as a knowledge graph technology stack.
      </p>
      <sec id="sec-3-1">
        <title>2.1. Building the knowledge graph from diferent data sources</title>
        <p>At Blue Brain, building a knowledge graph from diference sources can be summarized in three
main steps: i) define, in the form of W3C SHACL 3 and ontologies, the schemas and formats of
the targeted neuroscience entities’ types, their metadata referring to the key scientific, technical
activities, protocols, and agents involved in their generation; ii) define simple declarative
JSONbased transformation or mapping rules to map source data to targeted schemas; and iii) apply
the mappings to the data from a given source and register the results in the knowledge graph.
These mappings4 are used with Nexus Forge5, a Python framework for building knowledge
graphs.</p>
        <p>In order to exemplify the complexity of the data integration process, figure 2 shows two
schematic diagrams of two very diferent generation contexts of the same type of neuroscience
data: a neuron morphology.</p>
      </sec>
      <sec id="sec-3-2">
        <title>2.2. Knowledge graph schema</title>
        <p>In Blue Brain, many SHACL shapes and ontologies 6 have been developed as the knowledge
graph schema, extending existing standards such as schema.org and W3C PROV-O. The shapes
and ontologies cover entities from the subcellular level to the whole brain such as neuron
morphologies, electrophysiological recordings, ion channel recordings, parameters from literature,
brain atlases, cell composition of the brain, brain regions, cell types, species, etc. The W3C
RDF format is leveraged, specifically its developer-friendly JSON-LD serialization, which eases
federated access and discoverability of distributed neuroscience (meta)data over the web.
2https://doi.org/10.1038/sdata.2016.18
3https://www.w3.org/TR/shacl/
4https://github.com/BlueBrain/bbp-ontologies/tree/master/mappings
5https://nexus-forge.readthedocs.io/en/latest/interaction.html#mapping
6https://github.com/BlueBrain/bbp-ontologies</p>
      </sec>
      <sec id="sec-3-3">
        <title>2.3. Publishing neuroscience data on the web</title>
        <p>The organized, linked and curated data can be shared both internally and externally as web
portals dynamically built from SPARQL and ElasticSearch queries sourced from the knowledge
graph. The same data is also published as programmatically accessible knowledge graphs. An
example is the Thalamoreticular Microcircuitry web portal 7 allowing users to browse, visualize,
query and download the experimental data (e.g. 3D neuron morphologies, electrophysiological
recordings, and interactive visualizations) used to build digital reconstructions (e.g. single cell
model and microcircuit reconstruction) as well as network simulations.</p>
      </sec>
      <sec id="sec-3-4">
        <title>2.4. Inference as a tool for neuroscience data generalization</title>
        <p>
          One of the main challenges in simulation neuroscience is the sparsity of the data. For instance,
certain brain regions have received very little attention by experimentalists and therefore very
few neuronal morphologies and electrophysiological recordings appear in the literature for
these areas. In this situation, scientists may want to find and adapt the same types of data
but from a diferent but “similar” brain regions, cell types or species (e.g. borrow or adapt
7https://bbp.epfl.ch/portals/thalamoreticular
rat data for building mouse models). These type of data generalizations can be expressed in
the form of knowledge graph-based inference rules. Three main strategies are followed: 1)
classical ontology-based generalization (e.g. a parent brain region can be considered similar to
its descendants); 2) knowledge graph embeddings obtained using metadata and graph structures
(eg. embeddings of neuron morphologies are built from their neighbours in the graph using
techniques such as RDF2VEC [
          <xref ref-type="bibr" rid="ref4">4</xref>
          ]); and 3) similarity embeddings generated from entity features
(eg. an embedding vector is built for each neuron morphology by vectorizing its 3D shape using
topological techniques [
          <xref ref-type="bibr" rid="ref5">5</xref>
          ]).
        </p>
      </sec>
    </sec>
    <sec id="sec-4">
      <title>3. Conclusion</title>
      <p>In this work, a complex use-case for semantic web technologies has been presented in the context
of the emerging field of simulation neuroscience. This particular domain poses a formidable
challenge due to the heterogeneous nature of data sources, the data sparsity, and the dynamic
nature of terminology and conceptual models.</p>
    </sec>
    <sec id="sec-5">
      <title>Acknowledgments</title>
      <p>This study was supported by funding to the Blue Brain Project, a research center of the École
polytechnique fédérale de Lausanne, from the Swiss government’s ETH Board of the Swiss
Federal Institutes of Technology. Funding has been provided in part from the European Union’s
Horizon 2020 Framework Programme for Research and Innovation under the Specific Grant
Agreement No. 785907 (Human Brain Project SGA2).</p>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          [1]
          <string-name>
            <given-names>P.</given-names>
            <surname>Ristoski</surname>
          </string-name>
          ,
          <string-name>
            <given-names>H.</given-names>
            <surname>Paulheim</surname>
          </string-name>
          ,
          <article-title>Semantic Web in data mining and knowledge discovery: A comprehensive survey</article-title>
          ,
          <source>Journal of Web Semantics</source>
          <volume>36</volume>
          (
          <year>2016</year>
          )
          <fpage>1</fpage>
          -
          <lpage>22</lpage>
          . URL: http://www.sciencedirect. com/science/article/pii/S1570826816000020. doi:
          <volume>10</volume>
          .1016/j.websem.
          <year>2016</year>
          .
          <volume>01</volume>
          .001.
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          [2]
          <string-name>
            <given-names>M. F.</given-names>
            <surname>Sy</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B.</given-names>
            <surname>Roman</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Kerrien</surname>
          </string-name>
          ,
          <string-name>
            <surname>M. D. M.</surname>
            ,
            <given-names>H.</given-names>
          </string-name>
          <string-name>
            <surname>Genet</surname>
            ,
            <given-names>W.</given-names>
          </string-name>
          <string-name>
            <surname>Wajerowicz</surname>
            ,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Dupont</surname>
            ,
            <given-names>I. Lavriushev</given-names>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Machon</surname>
          </string-name>
          ,
          <string-name>
            <given-names>K.</given-names>
            <surname>Pirman</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D. Neela</given-names>
            <surname>Mana</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Stafeeva</surname>
          </string-name>
          , A.
          <article-title>-</article-title>
          K. Kaufmann,
          <string-name>
            <given-names>H.</given-names>
            <surname>Lu</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Jonathan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.-A.</given-names>
            <surname>Fonta</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A. G. R.</given-names>
            <surname>Martinez</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A. D.</given-names>
            <surname>Ulbrich</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Lindqvist</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Jimenez</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Rotenberg</surname>
          </string-name>
          ,
          <string-name>
            <given-names>H.</given-names>
            <surname>Markram</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S. L.</given-names>
            <surname>Hill</surname>
          </string-name>
          ,
          <article-title>Blue brain nexus: An open, secure, scalable system for knowledge graph management and data-driven science</article-title>
          ,
          <source>Semantic Web</source>
          <volume>14</volume>
          (
          <year>2022</year>
          )
          <fpage>697</fpage>
          -
          <lpage>727</lpage>
          . URL: http://doi.acm.
          <source>org/10</source>
          .3233/SW-222974. doi:
          <volume>10</volume>
          .3233/SW- 222974.
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          [3]
          <string-name>
            <given-names>F.</given-names>
            <surname>Schürmann</surname>
          </string-name>
          ,
          <string-name>
            <surname>J.-D. Courcol</surname>
            ,
            <given-names>S.</given-names>
          </string-name>
          <string-name>
            <surname>Ramaswamy</surname>
          </string-name>
          ,
          <article-title>Computational Concepts for Reconstructing and Simulating Brain Tissue</article-title>
          , Springer International Publishing, Cham,
          <year>2022</year>
          , pp.
          <fpage>237</fpage>
          -
          <lpage>259</lpage>
          . URL: https://doi.org/10.1007/978-3-
          <fpage>030</fpage>
          -89439-9_
          <fpage>10</fpage>
          . doi:
          <volume>10</volume>
          .1007/978- 3-
          <fpage>030</fpage>
          - 89439- 9_
          <fpage>10</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          [4]
          <string-name>
            <given-names>P.</given-names>
            <surname>Ristoski</surname>
          </string-name>
          , H. Paulheim,
          <article-title>RDF2vec: RDF Graph Embeddings for Data Mining</article-title>
          , in: International Semantic Web Conference,
          <year>2016</year>
          . doi:
          <volume>10</volume>
          .1007/978- 3-
          <fpage>319</fpage>
          - 46523- 4_
          <fpage>30</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref5">
        <mixed-citation>
          [5]
          <string-name>
            <given-names>L.</given-names>
            <surname>Kanari</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Dłotko</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Scolamiero</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Levi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Shillcock</surname>
          </string-name>
          ,
          <string-name>
            <given-names>K.</given-names>
            <surname>Hess</surname>
          </string-name>
          ,
          <string-name>
            <given-names>H.</given-names>
            <surname>Markram</surname>
          </string-name>
          ,
          <article-title>A topological representation of branching neuronal morphologies</article-title>
          ,
          <source>Neuroinformatics</source>
          <volume>16</volume>
          (
          <year>2018</year>
          ).
          <source>doi:10. 1007/s12021- 017- 9341- 1.</source>
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>