<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta />
    <article-meta>
      <title-group>
        <article-title>Introducing the HOBBIT platform into the Ontology Alignment Evaluation Campaign</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <string-name>Ernesto Jime´nez-Ruiz</string-name>
          <xref ref-type="aff" rid="aff14">14</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Tzanina Saveta</string-name>
          <xref ref-type="aff" rid="aff9">9</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Ondrˇej Zamazal</string-name>
          <xref ref-type="aff" rid="aff16">16</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Sven Hertling</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Michael Ro¨ der</string-name>
          <xref ref-type="aff" rid="aff13">13</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Irini Fundulaki</string-name>
          <xref ref-type="aff" rid="aff9">9</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Axel-Cyrille Ngonga Ngomo</string-name>
          <xref ref-type="aff" rid="aff13">13</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Mohamed Ahmed Sherif</string-name>
          <xref ref-type="aff" rid="aff13">13</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Amina Annane</string-name>
          <xref ref-type="aff" rid="aff11">11</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Zohra Bellahsene</string-name>
          <xref ref-type="aff" rid="aff11">11</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Sadok Ben Yahia</string-name>
          <xref ref-type="aff" rid="aff15">15</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Gayo Diallo</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Daniel Faria</string-name>
          <xref ref-type="aff" rid="aff10">10</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Marouen Kachroudi</string-name>
          <xref ref-type="aff" rid="aff15">15</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Abderrahmane Khiat</string-name>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Patrick Lambrix</string-name>
          <xref ref-type="aff" rid="aff12">12</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Huanyu Li</string-name>
          <xref ref-type="aff" rid="aff12">12</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Maximilian Mackeprang</string-name>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Majid Mohammadi</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Maciej Rybinski</string-name>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Booma Sowkarthiga Balasubramani</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Cassia Trojahn</string-name>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <aff id="aff0">
          <label>0</label>
          <institution>ADVIS Lab, Department of Computer Science, University of Illinois at Chicago</institution>
          ,
          <country country="US">USA</country>
        </aff>
        <aff id="aff1">
          <label>1</label>
          <institution>BPH Center - INSERM U1219, Team ERIAS &amp; LaBRI UMR5800, University of Bordeaux</institution>
        </aff>
        <aff id="aff2">
          <label>2</label>
          <institution>Data and Web Science Group, University of Mannheim</institution>
          ,
          <country country="DE">Germany</country>
        </aff>
        <aff id="aff3">
          <label>3</label>
          <institution>Delft University of Technology</institution>
          ,
          <country country="NL">The Netherlands</country>
        </aff>
        <aff id="aff4">
          <label>4</label>
          <institution>Department of Informatics, University of Oslo</institution>
          ,
          <country country="NO">Norway</country>
        </aff>
        <aff id="aff5">
          <label>5</label>
          <institution>Dpto. Lenguajes y Ciencias de la Computacio ́n, University of Malaga</institution>
          ,
          <country country="ES">Spain</country>
        </aff>
        <aff id="aff6">
          <label>6</label>
          <institution>Ecole nationale Superieure d'Informatique</institution>
          ,
          <addr-line>Alger, Algerie</addr-line>
        </aff>
        <aff id="aff7">
          <label>7</label>
          <institution>Human-Centered Computing, Freie Universitat Berlin</institution>
          ,
          <addr-line>4195 Berlin</addr-line>
          ,
          <country country="DE">Germany</country>
        </aff>
        <aff id="aff8">
          <label>8</label>
          <institution>Institut de Recherche en Informatique de Toulouse</institution>
          ,
          <country country="FR">France</country>
        </aff>
        <aff id="aff9">
          <label>9</label>
          <institution>Institute of Computer Science - FORTH</institution>
          ,
          <country country="GR">Greece</country>
        </aff>
        <aff id="aff10">
          <label>10</label>
          <institution>Instituto Gulbenkian de Cieˆncia</institution>
          ,
          <country country="PT">Portugal</country>
        </aff>
        <aff id="aff11">
          <label>11</label>
          <institution>LIRMM, Universit de Montpellier</institution>
          ,
          <addr-line>CNRS, Montpellier</addr-line>
          ,
          <country country="FR">France</country>
        </aff>
        <aff id="aff12">
          <label>12</label>
          <institution>Linko ̈ping University &amp; Swedish e-Science Research Centre</institution>
          ,
          <addr-line>Linko ̈ping</addr-line>
          ,
          <country country="SE">Sweden</country>
        </aff>
        <aff id="aff13">
          <label>13</label>
          <institution>Paderborn University, Data Science Group</institution>
          ,
          <addr-line>Pohlweg 51, D-33098 Paderborn</addr-line>
          ,
          <country country="DE">Germany</country>
        </aff>
        <aff id="aff14">
          <label>14</label>
          <institution>The Alan Turing Institute</institution>
          ,
          <addr-line>London</addr-line>
          ,
          <country country="UK">United Kingdom</country>
        </aff>
        <aff id="aff15">
          <label>15</label>
          <institution>Universite ́ de Tunis El Manar</institution>
          ,
          <addr-line>LIPAH-LR 1 ES14, 2092, Tunis, Tunisie</addr-line>
        </aff>
        <aff id="aff16">
          <label>16</label>
          <institution>University of Economics</institution>
          ,
          <addr-line>Prague</addr-line>
          ,
          <country country="CZ">Czech Republic</country>
        </aff>
      </contrib-group>
      <abstract>
        <p>This paper describes the Ontology Alignment Evaluation Initiative 2017.5 pre-campaign. Like in 2012, when we transitioned the evaluation to the SEALS platform, we have also conducted a pre-campaign to assess the feasibility of moving to the HOBBIT platform. We report the experiences of this precampaign and discuss the future steps for the OAEI.</p>
      </abstract>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>-</title>
      <p>The Ontology Alignment Evaluation Initiative1 (OAEI) is a coordinated international
initiative which organizes the evaluation of ontology matching systems [1, 2]. The main
goal of the OAEI is to compare systems and algorithms openly and on the same basis to
allow anyone to draw conclusions about the best matching strategies. Furthermore, our
ambition is to help tool developers to improve their systems through such evaluations.</p>
      <p>The initiative started in 2004, and from 2006 until the present, the OAEI campaigns
were held at the Ontology Matching workshop, collocated with the ISWC conference.
Since 2011, we have been using an environment for automatically processing
evaluations which was developed within the SEALS (Semantic Evaluation At Large Scale)</p>
    </sec>
    <sec id="sec-2">
      <title>1 http://oaei.ontologymatching.org</title>
      <p>project2. SEALS provided a software infrastructure for automatically executing
evaluations and evaluation campaigns for typical semantic web tools, including ontology
matching. In the OAEI 2017, a novel evaluation environment called HOBBIT was
adopted for the novel HOBBIT Link Discovery track. In that OAEI campaign, all
systems were executed under the SEALS client in all other tracks.</p>
      <p>The good experience of the 2017 Link Discovery (e.g., novel platform, online
evaluation, automatic generation of result tables, attraction of link discovery developers,
customization of the matching requirements of a benchmark task) track motivated the
interest in assessing the possibility of transitioning the whole OAEI evaluation to
HOBBIT. To that end, we decided to set-up an OAEI pre-campaign, as happened in the OAEI
2011.5 when the OAEI moved to SEALS,3 to evaluate potential risks and challenges.
The nature of the link discovery tracks is different from the (traditional) OAEI tracks
and we foresaw sources of uncertainty with respect to: (i) the use of a new evaluation
environment, (ii) the adaptation of tracks with multiple tasks (like multifarm), (iii) the
introduction of Docker to organisers and participants, (iv) the inclusion of interactivity
capabilities, and (v) the storage of results. The objective of the Ontology Alignment
Evaluation Initiative 2017.5 pre-campaign was, therefore, to evaluate the feasibility of
moving some (traditional) OAEI tracks to the HOBBIT platform. In this paper, we report
the experiences of this pre-campaign and future steps of the OAEI.</p>
      <p>The remainder of the paper is organised as follows. Section 2 introduces the
HOBBIT platform. In Section 3, we present the overall evaluation methodology that has been
used. Section 4 describes the evaluation data sets and Section 5 the participating
systems. Section 6 overviews the lessons learned from the campaign; and finally, Section 7
summarizes the conclusions of this experience and discusses future plans for the OAEI.
2</p>
      <sec id="sec-2-1">
        <title>HOBBIT platform</title>
        <p>The HOBBIT platform is a generic, modular and distributed platform for Big Linked
Data systems. It was designed to enable Big Data practitioners and Linked Data users to
benchmark all steps of the data lifecycle at scale, i.e., with all necessary contemplations
of volume, velocity, value and veracity necessary to benchmark real applications. Some
of its most important features within the context of link discovery include the support of
(i) benchmarks that focus on the evaluation of the quality of a system using single
consecutive requests as well as (ii) benchmarks aiming at evaluating the efficiency of Big
Linked Data solutions, e.g., by generating distributed parallel requests leading to a high
workload. The HOBBIT project4 designed and develops the HOBBIT platform with the
aim of providing an open-source, extensible, FAIR5 and scalable evaluation platform
(in a fashion akin to GERBIL [3]) along with corresponding benchmarks and
mimicking algorithms for real data sources of industrial scale. The platform being open-source
means that it can be downloaded and installed locally for tests. The online instance of
the platform allows (i) running public challenges and (ii) making sure that even people
without the required infrastructure are able to run the benchmarks they are interested in.
The platform, as well as the benchmarks that are designed and implemented in HOBBIT
2 http://www.seals-project.eu
3 http://oaei.ontologymatching.org/2011.5/
4 http://project-hobbit.eu
5 Findable, Accessible, Interoperable and Reusable
are modelled as actors with which the platform interacts. The use cases relevant for end
users supported by the platform include:
– Benchmarking a System: the user can select a benchmark to test a system. The
platform loads appropriate configuration parameters for the benchmark, as well as
the list of available systems for this benchmark. The user configures the benchmark
and selects one of the available systems to benchmark.
– Showing and Comparing Benchmark Results: the user can view the results of a
single benchmark run or select multiple, e.g., to compare several systems that have
been evaluated with the same benchmark.
– Adding a System: the user adds the system that needs to be benchmarked in the
platform by providing a docker image of the system and a system adapter which
serves as a proxy between the benchmark and the system.</p>
        <p>The Platform Controller makes sure that the benchmark chosen by the user can
be started and ensures that all nodes of the cluster are available. It communicates with
the system to be benchmarked, ensures that it is working properly and generates the
benchmark controller that is responsible for producing the data and task generators as
well as the evaluation storage. The Data Generator produces the source dataset that is
sent to the Benchmarked System, and the target dataset as well as the Gold Standard
which are sent to the Task Generator. The Task Generator sends the target dataset to
the Benchmarked System and forwards the Gold Standard to the Evaluation Storage.
When the system finishes its task, it sends the answers to the Evaluation Storage. The
Evaluation Module receives the system and the Gold Standard answers and returns the
Key Performance Indicators (KPIs) for the experiment.</p>
      </sec>
      <sec id="sec-2-2">
        <title>Methodology</title>
        <p>The OAEI campaigns are typically divided into three phases: (i) preparation phase
(datasets are prepared), (ii) execution phase (systems are tuned), and (iii) evaluation
phase (systems are evaluated). In this OAEI pre-campaign, we focused on the
preparation and execution phases given the time constraints and the challenges encountered
during the migration to the HOBBIT platform.
3.1</p>
        <sec id="sec-2-2-1">
          <title>Preparation phase</title>
          <p>The preparation phase for the OAEI 2017.5 pre-campaign, unlike recent OAEI
campaigns, was more demanding as the OAEI track organisers were required to migrate the
SEALS datasets following the novel HOBBIT specifications. We provided the
benchmark definitions for the (i) Largebio6 and (ii) Link discovery7 tracks to make the
transition smoother. These (reference) datasets were made available by the end of
January 2018. Next, we provide a brief summary of the main components of a HOBBIT
benchmark.</p>
          <p>HOBBIT benchmark definition. The HOBBIT workflow and format of benchmarks is
generic as the platform was designed to accommodate benchmarks across the whole
of the Linked Data lifecycle. This flexibility adds some complexity with respect to the
SEALS benchmark generation. Note that, since in the OAEI multi-tasks benchmarks
the source dataset may change, we have slightly modified the general HOBBIT
workflow depicted in Figure 1. In the OAEI workflow, the TaskGenerator deals with both
the source and target datasets to generate a Task. A benchmark is composed by the
following classes:
BenchmarkController is the main class of the benchmark where the general
benchmark execution workflow is specified.</p>
          <p>DataGenerator generates the benchmark datasets (e.g., input ontologies and
alignments) and prepares the datasets for the TaskGenerator. For multiple-task
benchmarks it also deals with the preparation of queue names to be sent to the system.
Task includes the information of the source and target datasets and the expected results
together with some other parameters like which type of entity should be matched
(e.g., only classes).</p>
          <p>TaskGenerator deals with the generation of the task(s) and sends the task(s) to the
system and the EvaluationModule.</p>
          <p>EvaluationModule compares the expected results (e.g., reference alignment) provided
by the TaskGenerator and the computed results by a system, and generates the
KPIs.</p>
          <p>Each benchmark is also associated to a metadata file8 where the docker images of
the benchmark are referenced, the KPIs defined, and the name of the benchmark’s API
specified (e.g., bench:LargebioAPI).</p>
        </sec>
      </sec>
    </sec>
    <sec id="sec-3">
      <title>6 https://gitlab.com/ernesto.jimenez.ruiz/largebio 7 https://github.com/hobbit-project/SpatialBenchmark/ 8 Metadata for largebio: https://git.project-hobbit.eu/ernestoj/largebio</title>
      <p>Track API KPIs
Conference bench:ConferenceAPI Precision, Recall, F-measure
Anatomy bench:AnatomyAPI Precision, Recall, F-measure, Recall+
Largebio bench:LargebioAPI Precision, Recall, F-measure
Spimbench bench:spimbenchAPI Precision, Recall, F-measure</p>
      <p>Link discovery bench:LinkingAPI Precision, Recall, F-measure
OAEI 2017.5 tracks. The preparation phase was complete in early March 2018 and led
to four novel tracks running under the HOBBIT platform: conference, anatomy,
largebio, and instance matching - spimbench. Note that the link discovery track was already
running under HOBBIT in the OAEI 2017 campaign. The benchmarks are (briefly)
described in Section 4.
3.2</p>
      <sec id="sec-3-1">
        <title>Execution phase</title>
        <p>The execution phase also brought the new challenge to developers of implementing a
system compliant with the HOBBIT specifications. We provided the following sources
of instruction to support system developers with the integration with HOBBIT: (i)
General HOBBIT instructions,9 (ii) LogMap’s example implementing the interfaces for the
conference, anatomy, largebio and spimbench tracks10, and (iii) the Maven framework
to facilitate the wrapping of systems.11
HOBBIT system definition. The interface of a system is defined via the SystemAdapter
class (e.g., LogMapSystemAdapter). This class receives the dataset definition from the
DataGenerator of a benchmark (e.g. set of tasks and matching requirements) and the
individual tasks (source and target datasets) from the TaskGenerator of a benchmark. The
results (e.g., a file containing the mappings in RDF Alignment format) are sent to the
benchmark’s EvaluationModule. The system adapter class communicates to the
benchmark classes in a special way since it is submitted to the HOBBIT platform as a docker
image. Each system is also associated to a metadata file,12 which explicitly mentions
the APIs the system implements (e.g. hobbit:implementsAPI bench:LargebioAPI). This
enables the automation of the evaluation of the OAEI benchmarks.</p>
        <p>OAEI 2017.5 participation. Ten systems were registered to participate in the OAEI
2017.5 campaign in March 2018. Only eight of them reported results or experiences
during April and May: OntoIdea, LogMap, SANOM, DisMatch, KEPLER, YAM-BIO,
AML and RADON. The participating system and proof-of-concept results are (briefly)
presented in Section 5.
9 https://project-hobbit.eu/challenges/oaei2017-5/oaei2017-5-tasks/
10 LogMap [4]: https://gitlab.com/ernesto.jimenez.ruiz/logmap-hobbit
11 Maven framework: https://github.com/sven-h/ontMatchingHobbit
12 LogMap’s metadata: https://git.project-hobbit.eu/ernestoj/logmapsystem</p>
        <sec id="sec-3-1-1">
          <title>Benchmarks</title>
          <p>The OAEI 2017.5 pre-campaign included five tracks: conference, anatomy, largebio,
instance matching - spimbench, and link discovery. Table 1 provides a summary of
the benchmarks. This pre-campaign did not include the multifarm and the interactive
tracks. In the case of the multifarm track the main restriction was to move thousands of
matching tasks to a new environment. While for the interactive tracks the main
limitation was technological as the inclusion of an “oracle” requires significant modifications
on the HOBBIT pipeline. Next we briefly describe the datasets of the OAEI 2017.5
benchmarks.</p>
          <p>Anatomy track. This track consists of finding an alignment between the Adult Mouse
Anatomy ontology (AMA) and a part of the National Cancer Institute (NCI) Thesaurus
(NCI-A). This data set has been used since 2007 with some improvements over the
years [5]. The AMA ontology contains 2,744, while the NCI-A contains 3,304 concepts
describing the human anatomy. Systems participating in the anatomy track are evaluated
in terms of runtime, precision, recall and F-measure. In addition, the anatomy track
measures the systems’ ability to find non-trivial correspondences (recall+) and checks
whether the systems generate coherent alignments.</p>
          <p>Conference track. This track consists of 21 test cases with ontologies from the domain
of organising conferences. The conference track has been used since 2006 and it was
gradually improved [6]. The advantage of the conference domain is the fact that it is
generally understandable. The ontologies were developed independently and based on
different resources, thus they capture the issues in organising conferences from different
points of view and using different nomenclature. Finally, ontologies within this track are
of small-medium size and relatively rich in OWL 2 axioms.</p>
          <p>Largebio track. This track consists of finding alignments between the Foundational
Model of Anatomy (FMA), SNOMED CT, and the National Cancer Institute Thesaurus
(NCI) [7]. These ontologies are semantically rich and contain tens of thousands of
classes. UMLS Metathesaurus has been selected as the basis for the track reference
alignments (see [8] for details). UMLS is currently the most comprehensive effort for
integrating independently-developed medical thesauri and ontologies, including FMA,
SNOMED CT, and NCI. In this track we also put special attention to the number of
unsatisfiabilities led by the mappings computed by a participating system.
SPIMBENCH track. The datasets in this strack are produced using SPIMBENCH
benchmark generator [9] with the aim to generate descriptions of the same entity where
valuebased, structure-based and semantics-aware transformations are employed on a source
dataset in order to create the target dataset(s). The value-based transformations
consider mainly typographical errors and different data formats, the structure-based
transformations implement transformations applied on the structure of object and datatype
properties and the semantics-aware transformations concern the instance level and take
into account schema information. The latter are used to examine if the matching
systems take into account RDFS and OWL constructs in order to discover correspondences
between instances that can be found only by considering schema information.
Link discovery track. This track is composed of two tasks called: linking and spatial.
The linking task measures how well systems can match traces that have been modified
using string-based approaches along with addition and deletion of intermediate points.</p>
          <p>The original datasets only contain coordinates, thus, we have replaced a number of those
points with labels retrieved from Linked Data spatial datasets using the Google Maps13,
Foursquare14 and Nominatim Openstreetmap15 APIs to be able apply string-based
modifications implemented in LANCE [10]. This task also contains modifications of date
and coordinate formats.</p>
          <p>The spatial task measures how well systems can identify the DE-9IM
(Dimensionally Extended nine-Intersection Model) topological relations between LineStrings and
Polygons in two-dimensional spaces. The supported spatial relations are the following:
Equals, Disjoint, Touches, Contains/Within, Covers/CoveredBy, Intersects, Crosses,
Overlaps. The instances are represented in the Well-Known Text (WKT) format. For each
relation, a different pair of source and target datasets is given to the participants.
5</p>
        </sec>
        <sec id="sec-3-1-2">
          <title>Participation and proof-of-concept results</title>
          <p>In this section we introduce the systems contributing to the OAEI 2017.5 campaign
and provide an overview of how experiments are executed from the HOBBIT public
instance.
Experiments can be executed via the HOBBIT public instance16 by following the
Benchmarks menu. Note that, currently, only registered developers who are the owners of a
system which conforms the specification (i.e., API) of one or more benchmarks can run
experiments. Figure 2 shows the interface to select a benchmark and evaluate a system
implementing its API within the HOBBIT platform.</p>
          <p>Every experiment is assigned a unique ID and, once they are finalized, registered
users can access its results (see Experiments menu). In addition, one can also select
several experiments for comparison purposes. For example, Figure 3 shows the results
of LogMap for all six tasks of the largebio track.
13 https://developers.google.com/maps/
14 https://developer.foursquare.com/
15 http://nominatim.openstreetmap.org/
16 https://master.project-hobbit.eu/
AgreementMakerLight (AML) [11, 12] is an all-purpose ontology alignment system
inspired on AgreementMaker [13] and sharing its focus on flexibility and extensibility
as main design paradigms. While initially primarily focused on the biomedical domain
and on the use of background knowledge, its tool suite and capabilities were gradually
extended to cover the full range of ontology matching tasks evaluated under the OAEI.
DisMatch [14] is an experimental ontology matching system built around the idea of
leveraging the recent advancements in semantic representations of texts within the
context of the ontology alignment problem. The lexical matcher uses semantic similarity
calculated from distributional representations of domain-specific words. In the
experiments several relatedness measures were tested, based on different text representation
methods, including DomESA [15] and Word2Vec’s Skip-Gram model [16].
Kepler [17] is an ontology alignment system able to deal with normal and large scale
ontologies. Kepler is also able to cope with multilingual ontologies thanks to its
translator module. Kepler exploits the expressiveness of the OWL language to detect and
compute the similarity between ontology entities through six modules: preprocessing,
partitioning, translation, indexation, candidate selection and final alignment generation.
LogMap [4] relies on lexical and structural indexes to enhance scalability. It also
incorporates approximate reasoning and repair techniques to minimise the number of
logical errors in the aligned ontology. LogMap comes with two variants:
LogMapBio [18], which uses BioPortal [19] as a (dynamic) provider of mediating ontologies;
and LogMapLt, a “lightweight” variant of LogMap that only applies (efficient) string
matching techniques.</p>
          <p>OntoIdea [20] is an instance matching tool implementing an enhanced version of the
STRIM algorithm proposed in previous work [21]. The new version of the OntoIdea
system identifies not only the “sameAs” relationships between instances, but also the
“topological” relationships (e.g., contains, equals, overlaps, covers, etc.) on geo-spatial
datasets. The type of relationship is driven by the information associated to the entities
(i.e., text or geometry).</p>
          <p>RADON [22] is one of the systems of the LIMES framework. It addresses the efficient
computation of topological relations on geo-spatial datasets, which belong to the largest
sources of Linked Data. The main innovation of the approach is a novel sparse index
for geo-spatial resources based on minimum bounding boxes (MBB). Based on this
index, it is able to discard unnecessary computations for DE-9IM relations. Extensive
experiments show that RADON scales well and outperforms the state of the art by up to
3 orders of magnitude w.r.t. to its runtime.</p>
          <p>SANOM [23] is an ontology alignment system that uses simulated annealing as the
principal technique to find correspondences between two given ontologies. The system
translates the alignment problem into a state optimization and then applies the simulated
annealing to find the optimal alignment of two given ontologies. The optimality of
a state is obtained by a complex fitness function which utilizes different lexical and
structural similarity metrics.</p>
          <p>YAM-BIO is an instance of a generic background knowledge based ontology-matching
framework [24] which is publicly available on GitHub17. YAM-BIO instance uses YAM++
[25] as matcher and the two biomedical ontologies UBERON and DOID as background
knowledge. In the OAEI 2017.5, YAM-BIO adopted a derivation with a specific
algorithm that reduces the path number by avoiding to reuse the same background
knowledge concept more than once, and the rule-based mapping selection estrategy.
YAMBIO relies on the LogMap-Repair [26] module to eliminate the inconsistent mappings
in the generated alignments.
6</p>
        </sec>
        <sec id="sec-3-1-3">
          <title>Discussion and lessons learned</title>
          <p>We collected feedback from eight platform developers pertaining to the transition from
SEALS to HOBBIT. A common tenor found in most of the feedback from the systems
pertained to the balance between complexity and guarantees. The HOBBIT platform
requires (i) the systems to be implemented using the Docker stack of technologies,
(ii) the implementation of a single interface to ensure a set of standardized
communication processes and (iii) debugging by using log files collected by the platform across
the distributed infrastructure it employs for benchmarking.</p>
          <p>The use of Docker and associated technologies was largely regarded positively.
Whereas the developers unfamiliar with Docker suggested that the supplementary effort
necessary to create docker packages was considerable, most developers regarded the use
of this technology as a step towards a better integration of tools and more controlled run
of benchmarks. To ensure that the development with Docker can be carried efficiently,
HOBBIT allows for single Docker files to be ran using different configurations such as
to ease the deployment and use. This feature will be made more prominent to ensure
that developers make more extensive use thereof.</p>
          <p>Participating systems had to implement an API defined by the benchmark to receive
the datasets which should be linked and to return the generated results. The participants
found a template for this step very helpful and would like to have an even simpler
template in the future to reduce the amount of adaptations. Especially when the benchmark
API is adapted to support even more complex tasks like the multifarm task, a provided
template eases the participation. The prepared template could take care of receiving the
different ontologies and storing them in single files following the predefined structure
of the multifarm task before starting the linking process.18 Such an extension would
enable a backwards compatibility to older solutions which are based on the directory
structure. Additionally, a clearer distinction of the necessary and optional steps when
implementing the system adapter was rated as helpful.</p>
          <p>
            The online instance of the HOBBIT platform is based on a cluster infrastructure
and offers its services to a public community. Since the final evaluation is carried out
17 https://github.com/AminaANNANE/GenericBKbasedMatcher
18 https://www.irit.fr/recherches/MELODI/multifarm/
on this instance, the participants were asked to make sure that their systems can be
deployed on the platform. The development process in itself can however be eased
significantly by testing locally. The HOBBIT platform provides two means for doing so:
a software development toolkit (SDK) and a recipe-based deployment infrastructure.
The HOBBIT SDK19 allows to develop and debug a system adapter locally. To this end,
the SDK simulates a platform running in a cluster and ensure rapid local development
without the overheads (e.g., long waiting times) created by a shared cluster. HOBBIT
also supports complete local deployments (instead of simulations) through the
recipebased framework Exoframe20 for developers who would prefer not using a simulation.
Therewith, it allows developers to follow a three-step process: (
            <xref ref-type="bibr" rid="ref1">1</xref>
            ) install the HOBBIT
SDK or the HOBBIT platform via Exoframe, (
            <xref ref-type="bibr" rid="ref2">2</xref>
            ) develop and test your system locally,
(
            <xref ref-type="bibr" rid="ref3">3</xref>
            ) upload the system image(s) and execute it (them) using the online instance.
7
          </p>
        </sec>
        <sec id="sec-3-1-4">
          <title>Conclusions and next OAEI steps</title>
          <p>The OAEI 2017.5 pre-campaign was instrumental to understand the strengths of
HOBBIT but also the challenges into moving to a new platform. The feedback obtained from
system developers has been very valuable for the next steps of the OAEI campaigns and
the future development of the HOBBIT platform (e.g., support for more complex tasks,
storage of computed alignment). The OAEI 2018 campaign21 will continue using the
HOBBIT platform together with the SEALS infrastructure, with some tracks like
largebio providing a dual evaluation mode (i.e., both HOBBIT and SEALS). This way, system
developers, organisers and HOBBIT developers will have additional time to guarantee
a successful migration to the new evaluation platform. From the infrastructure point
of view, the HOBBIT SDK will make the developing and debugging phase under the
HOBBIT easier. In addition, we will continue offering the Maven-based framework to
facilitate the submission to both HOBBIT and SEALS.</p>
          <p>
            Acknowledgements. EJR was funded by the Centre for Scalable Data Access
(SIRIUS), the RCN project BigMed, and the Alan Turing project AIDA. OZ is supported
by the CSF grant no. 18-23964S. DF was supported by the ELIXIR-EXCELERATE
project (INFRADEV-3-2015). The work of MR is supported by grant
TIN2017-86049R (Ministerio de Econom´ıa, Industria y Competitividad, Spain). This work has received
funding from the European Union’s Horizon 2020 research and innovation programme
under grant agreement No 688227 (HOBBIT).
19 https://github.com/hobbit-project/java-sdk
20 https://github.com/hobbit-project/exoframe-recipe-hobbit
21 oaei.ontologymatching.org/2018/
5. Dragisic, Z., Ivanova, V., Li, H., Lambrix, P.: Experiences from the anatomy track in the
ontology alignment evaluation initiative. Journal of biomedical semantics 8(
            <xref ref-type="bibr" rid="ref1">1</xref>
            ) (2017) 56
6. Zamazal, O., Sva´tek, V.: The Ten-Year OntoFarm and its Fertilization within the
Onto
          </p>
          <p>Sphere. J. Web Semantics 43 (2017) 46–53
7. Jime´nez-Ruiz, E., Grau, B.C., Horrocks, I.: Is my ontology matching system similar to
yours? In: 8th International Workshop on Ontology Matching. (2013) 229–230
8. Jime´nez-Ruiz, E., Grau, B.C., Horrocks, I., Llavori, R.B.: Logic-based assessment of the
compatibility of UMLS ontology sources. J. Biomedical Semantics 2(S-1) (2011) S2
9. Saveta, T., Daskalaki, E., Flouris, G., Fundulaki, I., Herschel, M., Ngomo, A.C.N.: Pushing
the limits of instance matching systems: A semantics-aware benchmark for linked data. In:
WWW, Companion Volume. (2015)
10. Saveta, T., Daskalaki, E., Flouris, G., Fundulaki, I., Herschel, M., Ngomo, A.C.N.: Lance:</p>
          <p>Piercing to the heart of instance matching tools. In: Int’l Semantic Web Conference. (2015)
11. Faria, D., Pesquita, C., Santos, E., Palmonari, M., Cruz, I.F., Couto, F.M.: The
Agreement</p>
          <p>MakerLight Ontology Matching System. In: OTM Conferences - ODBASE. (2013) 527–541
12. Faria, D., Pesquita, C., Santos, E., Palmonari, M., Cruz, I.F., Couto, F.M.: Results of AML
in OAEI 2017. In: ISWC International Workshop on Ontology Matching (OM). (2017)
13. Cruz, I.F., Palandri Antonelli, F., Stroe, C.: AgreementMaker: Efficient Matching for Large</p>
          <p>
            Real-World Schemas and Ontologies. PVLDB 2(
            <xref ref-type="bibr" rid="ref2">2</xref>
            ) (2009) 1586–1589
14. Rybinski, M., del Mar Rolda´n Garc´ıa, M., Garc´ıa-Nieto, J., Montes, J.F.A.: Dismatch results
for OAEI 2016. In: 11th International Workshop on Ontology Matching (OM). (2016)
15. Rybin´ ski, M., Montes, J.F.A.: DomESA: a novel approach for extending domain-oriented
lexical relatedness calculations with domain-specific semantics. Journal of Intelligent
Information Systems 49(
            <xref ref-type="bibr" rid="ref3">3</xref>
            ) (2017) 315–331
16. Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in
vector space. arXiv preprint arXiv:1301.3781 (2013)
17. Kachroudi, M., Diallo, G., Ben Yahia, S.: OAEI 2017 results of KEPLER. In: 12th
International Workshop on Ontology Matching. Volume 2032 of CEUR-WS. (2017) 138–145
18. Chen, X., Xia, W., Jime´nez-Ruiz, E., Cross, V.V.: Extending an ontology alignment system
with bioportal: a preliminary analysis. In: ISWC Posters &amp; Demonstrations Track. (2014)
19. Fridman Noy, N., Shah, N.H., Whetzel, P.L., Dai, B., Dorf, M., Griffith, N., Jonquet, C.,
Rubin, D.L., Storey, M.A.D., Chute, C.G., Musen, M.A.: BioPortal: ontologies and integrated
data resources at the click of a mouse. Nucleic Acids Research 37(Web-Server-Issue) (2009)
20. Khiat, A., Benaissa, M., Belfedhal, M.A.: STRIM results for OAEI 2015 instance matching
evaluation. In: 10th International Workshop on Ontology Matching. (2015) 208–215
21. Khiat, A., Mackeprang, M.: I-Match and OntoIdea results for OAEI 2017. In: 12th
International Workshop on Ontology Matching. (2017) 135–137
22. Sherif, M.A., Dreßler, K., Smeros, P., Ngomo, A.N.: RADON - Rapid Discovery of
Topological Relations. In: AAAI Conference on Artificial Intelligence. (2017) 175–181
23. Mohammadi, M., Atashin, A.A., Hofman, W., Tan, Y.: SANOM results for OAEI 2017. In:
12th International Workshop on Ontology Matching. (2017) 185–189
24. Annane, A., Bellahsene, Z., Azouaou, F., Jonquet, C.: Building an effective and efficient
background knowledge resource to enhance ontology matching. J. Web Semantics (2018)
25. Ngo, D., Bellahsene, Z.: Overview of YAM++ - (not) yet another matcher for ontology
alignment task. J. Web Semantics 41 (2016) 30–49
26. Jime´nez-Ruiz, E., Meilicke, C., Grau, B.C., Horrocks, I.: Evaluating mapping repair systems
with large biomedical ontologies. In: 26th Workshop on Description Logics. (2013) 246–257
          </p>
        </sec>
      </sec>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          1.
          <string-name>
            <surname>Euzenat</surname>
            ,
            <given-names>J.</given-names>
          </string-name>
          ,
          <string-name>
            <surname>Meilicke</surname>
            ,
            <given-names>C.</given-names>
          </string-name>
          ,
          <string-name>
            <surname>Shvaiko</surname>
            ,
            <given-names>P.</given-names>
          </string-name>
          ,
          <string-name>
            <surname>Stuckenschmidt</surname>
            ,
            <given-names>H.</given-names>
          </string-name>
          , Trojahn dos Santos,
          <string-name>
            <surname>C.</surname>
          </string-name>
          :
          <article-title>Ontology alignment evaluation initiative: six years of experience</article-title>
          .
          <source>J. Data Semantics XV</source>
          (
          <year>2011</year>
          )
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          2.
          <string-name>
            <surname>Euzenat</surname>
            ,
            <given-names>J.</given-names>
          </string-name>
          ,
          <string-name>
            <surname>Shvaiko</surname>
            ,
            <given-names>P.</given-names>
          </string-name>
          :
          <article-title>Ontology matching</article-title>
          .
          <source>2nd edn</source>
          . Springer-Verlag (
          <year>2013</year>
          )
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          3. Ro¨der,
          <string-name>
            <given-names>M.</given-names>
            ,
            <surname>Usbeck</surname>
          </string-name>
          ,
          <string-name>
            <surname>R.</surname>
          </string-name>
          ,
          <string-name>
            <given-names>Ngonga</given-names>
            <surname>Ngomo</surname>
          </string-name>
          ,
          <string-name>
            <surname>A.C.</surname>
          </string-name>
          :
          <article-title>Gerbil-benchmarking named entity recognition and linking consistently</article-title>
          .
          <source>Semantic Web (Preprint)</source>
          (
          <year>2017</year>
          )
          <fpage>1</fpage>
          -
          <lpage>21</lpage>
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          4.
          <string-name>
            <surname>Jime</surname>
          </string-name>
          <article-title>´nez-</article-title>
          <string-name>
            <surname>Ruiz</surname>
            ,
            <given-names>E.</given-names>
          </string-name>
          ,
          <string-name>
            <surname>Grau</surname>
            ,
            <given-names>B.C.</given-names>
          </string-name>
          ,
          <string-name>
            <surname>Zhou</surname>
            ,
            <given-names>Y.</given-names>
          </string-name>
          ,
          <string-name>
            <surname>Horrocks</surname>
            ,
            <given-names>I.</given-names>
          </string-name>
          :
          <article-title>Large-scale interactive ontology matching: Algorithms and implementation</article-title>
          .
          <source>In: Europ. Conf. Artif. Intell. (ECAI)</source>
          .
          <article-title>(</article-title>
          <year>2012</year>
          )
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>