<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-title-group>
        <journal-title>PLoS ONE 7 (2012) e29715.
doi:10.1371/journal.pone.0029715.
[25] R. L. Hurt</journal-title>
      </journal-title-group>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.1093/nar/gkw918</article-id>
      <title-group>
        <article-title>Towards a Conceptual Model for FAIR Metadata Schemas</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <string-name>Filipi Miranda Soares</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff11">11</xref>
          <xref ref-type="aff" rid="aff13">13</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Luís Ferreira Pires</string-name>
          <xref ref-type="aff" rid="aff13">13</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Luiz Olavo Bonino da Silva Santos</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff13">13</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Rodrigo Fernandes Calhau</string-name>
          <xref ref-type="aff" rid="aff10">10</xref>
          <xref ref-type="aff" rid="aff13">13</xref>
          <xref ref-type="aff" rid="aff9">9</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Benildes Coura Moreira dos Santos Maculan</string-name>
          <email>benildes@gmail.com</email>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Karen Coyle</string-name>
          <email>kcoyle@kcoyle.net</email>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Shenghui Wang</string-name>
          <email>shenghui.wang@utwente.nl</email>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Erwin Folmer</string-name>
          <email>Erwin.folmer@han.nl</email>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Debora Pignatari Drucker</string-name>
          <email>debora.drucker@embrapa.br</email>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Maria Luiza de Almeida Campos</string-name>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Carlos Henrique Marcondes</string-name>
          <email>ch_marcondes@id.uf.br</email>
          <xref ref-type="aff" rid="aff12">12</xref>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Maurício Barcellos Almeida</string-name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Kelly Rosa Braghetto</string-name>
          <email>kellyrb@ime.usp.br</email>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Guilherme Ataíde Dias</string-name>
          <email>guilhermeataide@gmail.com</email>
        </contrib>
        <contrib contrib-type="author">
          <string-name>José Augusto Salim</string-name>
          <email>jasalim@unicamp.br</email>
          <xref ref-type="aff" rid="aff14">14</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Fernando Elias Corrêa</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Dilvan de Abreu Moreira</string-name>
          <email>dilvan@icmc.usp.br</email>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Alexandre Cláudio Botazzo Delbem</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Antonio Mauro Saraiva</string-name>
          <email>saraiva@usp.br</email>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff11">11</xref>
        </contrib>
        <aff id="aff0">
          <label>0</label>
          <institution>BioSemantics, Department of Human Genetics, Leiden University Medical Center</institution>
          ,
          <addr-line>Einthovenweg 20, Leiden 2333 ZC</addr-line>
          ,
          <country country="NL">The Netherlands</country>
        </aff>
        <aff id="aff1">
          <label>1</label>
          <institution>Center for Artificial Intelligence (C4AI)</institution>
          ,
          <addr-line>370 Av. Prof. Lúcio Martins Rodrigues, São Paulo - SP, 05508-020</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff2">
          <label>2</label>
          <institution>Embrapa Agricultura Digital</institution>
          ,
          <addr-line>209 Av. Andre Tosello, Campus da Unicamp, Campinas - SP, 13083-886</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff3">
          <label>3</label>
          <institution>Federal University of Minas Gerais</institution>
          ,
          <addr-line>6627 Av. Pres. Antônio Carlos, Pampulha, Belo Horizonte - MG, 31270-901</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff4">
          <label>4</label>
          <institution>HAN University of Applied Sciences</institution>
          ,
          <addr-line>26 Ruitenberglaan, Arnhem 6826 CC</addr-line>
          ,
          <country country="NL">The Netherlands</country>
        </aff>
        <aff id="aff5">
          <label>5</label>
          <institution>Human Media Interaction (HMI), University of Twente</institution>
          ,
          <addr-line>5 Drienerlolaan, Enschede 7522 NB</addr-line>
          ,
          <country country="NL">The Netherlands</country>
        </aff>
        <aff id="aff6">
          <label>6</label>
          <institution>Independent Researcher</institution>
          ,
          <addr-line>Berkeley, CA</addr-line>
          ,
          <country country="US">USA</country>
        </aff>
        <aff id="aff7">
          <label>7</label>
          <institution>Institute of Mathematics and Computer Sciences (ICMC), University of São Paulo</institution>
          ,
          <addr-line>400 Av. Trab. São Carlense, São Carlos - SP, 13566-590</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff8">
          <label>8</label>
          <institution>Institute of Mathematics and Statistics (IME), University of São Paulo</institution>
          ,
          <addr-line>1010 Rua do Matão, 05508-090 São Paulo - SP</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff9">
          <label>9</label>
          <institution>LEDS, Federal Institute of Espírito Santo (IFES)</institution>
          ,
          <addr-line>110 Av. Copacabana, Serra, 29173-087</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff10">
          <label>10</label>
          <institution>Ontology and Conceptual Modeling Research Group, Federal University of Espírito Santo</institution>
          ,
          <addr-line>514 Av. Fernando Ferrari, Vitória - ES, 29075-910</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff11">
          <label>11</label>
          <institution>Polytechnic School, University of Sao Paulo</institution>
          ,
          <addr-line>158 Av. Prof. Luciano Gualberto, São Paulo - SP, 05508-010</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff12">
          <label>12</label>
          <institution>Programa de Pós-graduação em Ciência da Informação (PPGCI), Universidade Federal Fluminense</institution>
          ,
          <addr-line>126 R. Lara Vilela, Niterói - RJ, 24210-590</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
        <aff id="aff13">
          <label>13</label>
          <institution>Semantics, Cybersecurity &amp; Services (SCS), Faculty of Electrical Engineering</institution>
          ,
          <addr-line>Mathematics and Computer Science (EEMCS)</addr-line>
          ,
          <institution>University of Twente</institution>
          ,
          <addr-line>5 Drienerlolaan, Enschede 7522 NB</addr-line>
          ,
          <country country="NL">The Netherlands</country>
        </aff>
        <aff id="aff14">
          <label>14</label>
          <institution>University of Campinas</institution>
          ,
          <addr-line>255 Monteiro Lobato Street, Campinas - SP 13083-862</addr-line>
          ,
          <country country="BR">Brazil</country>
        </aff>
      </contrib-group>
      <pub-date>
        <year>2007</year>
      </pub-date>
      <volume>91</volume>
      <fpage>437</fpage>
      <lpage>452</lpage>
      <abstract>
        <p>This paper explores the design and creation of metadata schemas based on the FAIR Data Principles. We provide a clear interpretation of these principles, focusing on how they apply to metadata schemas. Leveraging the OntoUML language, we developed a conceptual model that explains the key components of a FAIR-compliant metadata schema. Through detailed discussion and provision of examples for each model component, this work aims to help metadata designers and curators better understand how to incorporate the FAIR Data principles into their schemas.</p>
      </abstract>
      <kwd-group>
        <kwd>eol&gt;Metadata schema</kwd>
        <kwd>FAIR Data Principles</kwd>
        <kwd>Conceptual Modeling</kwd>
        <kwd>Semantic Web</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>1. Introduction</title>
      <p>Metadata schemas are tools that provide a framework for creating consistent metadata records.
Essentially, these schemas define data structures and represent the semantic model of specific
domains. Ideally, their development should be community-driven to incorporate diverse insights
and ensure broad applicability.</p>
      <p>
        Over recent decades, many communities have developed a wide range of metadata schemas,
significantly contributing to data standardization across domains. One of the goals of employing
these schemas is to enhance data’s findability, accessibility, interoperability, and reusability, in
line with the FAIR principles. However, many metadata schemas do not adhere to the FAIR
Data principles themselves, making their reuse dificult [
        <xref ref-type="bibr" rid="ref1 ref2 ref3">1, 2, 3</xref>
        ]. To solve this, schema curators
should apply the FAIR principles in the development and management of these schemas. As
outlined in FAIR principle I2, each community should decide the level of FAIRness required for
the vocabularies they use. At a minimum, these vocabularies should have unique and permanent
identifiers (F1), be accessible using standard methods (A1), and be described in a clear, widely
understood language for knowledge representation (I1) [
        <xref ref-type="bibr" rid="ref4">4</xref>
        ].
      </p>
      <p>This paper explores the application of FAIR Data principles to the design and management of
metadata schemas. This research has been inspired by our work on agriculture and biodiversity
data management. We propose an OntoUML conceptual model that outlines key components
essential for metadata schemas to comply with these principles. The structure of the paper is as
follows: Section 2 covers related work, Section 3 provides an introduction to OntoUML, Section
nEvelop-O
LGOBE
rOcid
ER2024: Companion Proceedings of the 43rd International Conference on Conceptual Modeling: ER Forum, Special
Topics, Posters and Demos, October 28-31, 2024, Pittsburgh, Pennsylvania, USA</p>
      <p>https://research.utwente.nl/en/persons/filipi-miranda-soares (F. M. Soares);
https://research.utwente.nl/en/persons/luis-ferreira-pires (L. Ferreira Pires);
https://people.utwente.nl/l.o.boninodasilvasantos (L. O. Bonino da Silva Santos);
https://nemo.inf.ufes.br/equipe/rodrigo-calhau/ (R. F. Calhau); http://kcoyle.net (K. Coyle);
https://people.utwente.nl/shenghui.wang (S. Wang); https://www.han.nl/onderzoek/onderzoekers/erwin-folmer/
(E. Folmer); https://www.embrapa.br/equipe/-/empregado/353696/debora-pignatari-drucker (D. P. Drucker);
http://lattes.cnpq.br/9545682339961651 (M. L. d. A. Campos); https://mba.eci.ufmg.br/english/ (M. B. Almeida);
http://lattes.cnpq.br/7588616253679294 (J. A. Salim)</p>
      <p>0000-0002-0674-7960 (F. M. Soares); 0000-0001-7432-7653 (L. Ferreira Pires); 0000-0002-1164-1351 (L. O. Bonino
da Silva Santos); 0009-0006-6051-2165 (R. F. Calhau); 0000-0003-4303-9071 (B. C. M. d. S. Maculan);
0000-0002-1085-7089 (K. Coyle); 0000-0003-0583-6969 (S. Wang); 0000-0002-7845-1763 (E. Folmer);
Workshop
Proceedings
htp:/ceur-ws.org CEUR Workshop Proceedings (CEUR-WS.org)</p>
      <p>ISN1613-073
4 outlines the methods, Section 5 presents a conceptual model for FAIR metadata schemas,
Section 6 focuses on assessing the FAIRness of metadata schemas, and Section 7 concludes with
ifnal considerations.</p>
    </sec>
    <sec id="sec-2">
      <title>2. Related Work</title>
      <p>The application of FAIR data principles to semantic artifact curation is an evolving field, crucial
for ensuring that resources like controlled vocabularies, taxonomies, ontologies, and metadata
schemas support data FAIRification. Several studies have explored this area.</p>
      <p>Berg-Cross and Arbor [5] highlight the importance of ontologies in making metadata
computerprocessable by providing axiomatized definitions. However, adapting FAIR principles to these
resources introduces challenges, such as the need for community standards and meaningful
semantics.</p>
      <p>
        The FAIRsFAIR project report [
        <xref ref-type="bibr" rid="ref1">1</xref>
        ] proposes refined principles to improve the FAIRness of
semantic artifacts, building on existing community recommendations. Poveda-Villalón et al. [
        <xref ref-type="bibr" rid="ref3">3</xref>
        ]
examine the challenges of aligning ontologies with FAIR principles, emphasizing the need for
ontologies that support data interoperability and reuse. Amdouni and Jonquet [
        <xref ref-type="bibr" rid="ref2">2</xref>
        ] present a
framework for evaluating the FAIRness of semantic artifacts, ofering a quantitative approach
to assess adherence to FAIR principles. These eforts underscore the importance of establishing
robust standards and practices for FAIR semantic artifacts construction, however none of
them target metadata schemas specifically, neither present a conceptual model to elucidate the
requirements for FAIR metadata schemas. Considering the importance of community metadata
schemas for FAIR data, this paper proposes a series of recommendations to make this specific
type of semantic artifact FAIR.
      </p>
    </sec>
    <sec id="sec-3">
      <title>3. A (very) Brief Introduction to OntoUML Stereotypes</title>
      <p>OntoUML is a language for specifing ontologies that has been implemented as a UML extension.
Grounded in the Unified Foundational Ontology (UFO), OntoUML provides a robust conceptual
framework, making it an ideal tool for accurately representing complex domains [6, 7], such
as in this paper. UFO’s comprehensive approach to ontology modeling, which includes key
elements of real-world phenomena such as objects, events, and their interrelations, forms a
solid basis for OntoUML. Consequently, OntoUML leverages this foundational strength to create
“detailed and precise” syntactic models; however, semantics remain within the model users’
agreement. [6, 7].</p>
      <p>OntoUML introduces several stereotypes to capture diferent kinds of ontological distinctions.
These stereotypes ofer a clear and precise way to model various elements within a domain,
including, but not limited to: Kind, a natural category that provides identity to its instances;
Role, an entity’s role in a specific context; Subkind, a specialized kind that inherits properties
from a broader kind while introducing unique features; Mixin, a characteristic shared by entities
from diferent kinds; RoleMixin, which combines the concepts of role and mixin to represent a
role that can be assumed by entities from diferent kinds; Category, an abstract grouping of
mixins that does not provide identity; Relator, which mediates relationships between two or
more entities; and Quality, a property or characteristic that an entity possesses, which can be
measured or perceived [6].</p>
      <p>OntoUML is particularly useful in scenarios where precise and unambiguous domain
representations are crucial. For instance, in the development of FAIR metadata schemas, OntoUML
can help create models that allows us to understand all the components and their relationships
to define a FAIR metadata schema.</p>
    </sec>
    <sec id="sec-4">
      <title>4. Methods</title>
      <p>We defined our ontological conceptual model by following an approach similar to that proposed
by Guizzardi [8], known as “ontological unpacking”. This method involves analyzing a domain
to reveal its underlying ontological conceptual model. Collaboration with specialists provided
valuable insights that shaped this model. The following steps were undertaken:
Domain Analysis: This phase focused on identifying key concepts, entities, and relationships
within the domain through structured meetings, workshops, and communications. Key
discussions occurred during workshops promoted by the GO FAIR Agro Brazil Network1,
Brazil’s 5th National Action Plan on Open Government2, and the development of the
Almes Core Metadata Schema [9]. Additionally, collaboration with the TDWG Biological
Interaction Data Interest Group3 during the development of the Plant-Pollinator
Interactions (PPI) Vocabulary [10] played a crucial role. Insights from the WorldFAIR Case Study
on Agricultural Biodiversity [11, 12] also helped us to understand the key components of
FAIR community metadata schemas.</p>
      <p>Literature Consultation: Literature suggestions from domain experts and related works were
also considered. The work of Marcia L. Zeng and collaborators [13, 14], and the ISO/IEC
11179 standards [15, 16, 17, 18] were key references for several concepts included in this
model, due to their solid foundation for metadata schema modeling. The domain analysis
combined with insights from the literature resulted in the requirements listed in Table 1.
Ontological Analysis: We examined the concepts in Table 1 to determine their ontological
nature, and then classified these concepts into appropriate ontological categories, such as
kinds, roles, phases, and mixins, in accordance with OntoUML.</p>
      <p>Model Development: The conceptual model was developed based on the insights gained
from the ontological analysis. This model represents the ontological structure of the
domain of FAIR metadata schemas, capturing the essential nature and relationships of its
concepts.</p>
      <p>Validation: We validated the model by conducting a FAIR assessment grounded in the concepts
outlined in this conceptual model.</p>
      <sec id="sec-4-1">
        <title>1https://go-fair-agro.github.io/ 2https://www.opengovpartnership.org/wp-content/uploads/2022/01/Brazil_Action-Plan_2021-2023_EN.pdf 3https://github.com/tdwg/interaction 4https://www.dcc.ac.uk/guidance/standards/metadata</title>
        <p>Each metadata schema should have a unique identifier (i.e., the schema namespace),
represented by a Uniform Resource Identifier (URI). Diferent versions of a metadata
schema should have distinct identifiers.</p>
        <p>The landing page of a metadata schema should contain a descriptive metadata record
with details about the schema. This metadata record should be machine-readable to
allow search engines and other automated tools to discover the schema [20].
To improve metadata schema findability, the schema and its metadata record should
be indexed in vocabulary catalogs such as the Linked Open Vocabularies (LOV) [21],
OntoBee [22], and the Digital Curation Centre List of Metadata Standards4
The URIs implemented by the metadata schema should resolve to web pages accessible
through widely used, universally accessible and secure communication protocols, such
as HTTPS.</p>
        <p>The sustained accessibility of metadata schema documentation should be ensured,
regardless of whether the schema is still active or maintained. This is important for
applications that use past schema versions.</p>
        <p>Formal languages (e.g., RDF, OWL) should be used for the machine-readable
representation of metadata schemas.</p>
        <p>Conceptual models in UML, OntoUML, or other languages should be provided to help
humans understand the relationships between concepts.</p>
        <p>The range (data types) and the domain (a class) of datatype properties should be clearly
specified in the schema.</p>
        <p>The range (a class) and the domain (a class) for object properties should be clearly
specified. If the classes used as range are defined elsewhere, this external controlled
vocabulary should also adhere to the FAIR principles.</p>
        <p>Properties and classes should have clear definitions and names.</p>
        <p>Metadata schemas should be published under open licenses such as CC BY 4.0, and
the license information should be machine-readable.</p>
        <p>Schema modifications, such as reviewing, deleting, and adding new terms should be
documented.</p>
        <p>The authority responsible for curating the metadata schema should be indicated.
When developing new metadata schemas, reusing elements from existing schemas
should be prioritized to minimize duplication. Reuse is recommended only when the
elements have the same meaning, regardless of the schema context. The namespaces
of all reused terms should be listed on the schema’s landing page and included in its
serializations.</p>
      </sec>
    </sec>
    <sec id="sec-5">
      <title>5. A Conceptual Model for FAIR Metadata Schemas</title>
      <p>Drawing on the refined FAIR Data principles in Table 1, supplemented by insights from the
literature and the collective domain expertise of this paper’s collaborators, we present a
conceptual model for FAIR metadata schemas in Fig. 1. We further explain each component of the
conceptual model, providing examples.</p>
      <sec id="sec-5-1">
        <title>5.1. Schema Authority</title>
        <p>A FAIR metadata schema should be created through the collaborative eforts of specific
communities of practice, to ensure they encode their discipline-specific elements [ 23]. Examples of
community-developed schemas include DwC [24], Astronomy Visualization Metadata (AVM)
[25], and the Data Documentation Initiative (DDI) [26]. Expert groups or institutions, often
referred to as the schema authority [15, 17, 18, 27], should lead the development and curation
of metadata schemas. The authority is responsible for any modification in the schema, which
includes adding, reviewing, and removing metadata elements.</p>
        <p>The authority behind the metadata schema should be publicly declared on the schema’s
landing page and within the schema metadata record. This transparency ensures that the user
community knows who to contact for assistance, inquiries, suggestions, or participation in
the curation community. The authority is also responsible for indexing the schema (and its
descriptive metadata record) in vocabulary catalogs like Linked Open Vocabularies (LOV) and
OntoBee, so more users can find and reuse it.</p>
        <p>Additionally, maintaining a backup of the metadata schema in data repositories like GitHub
and Zenodo is crucial for its longevity, accessibility, and version control. Platforms like GitHub
provide tools for tracking changes, reverting to previous versions, and facilitating efective
collaboration.</p>
        <p>When setting up a backup on GitHub, including a README file that explains the schema’s
purpose and provides guidelines for use and contribution is helpful. Utilizing GitHub features
such as branches for ongoing developments, tags for stable releases, and releases for distributing
specific versions can improve schema management. Engaging the community through issues and
pull requests can also gather feedback and contributions, enriching the schema’s development
and adoption. Examples of community schemas using GitHub for this purpose include DwC5,
DDI-CDI6, and Almes Core (ALM)7 [9].</p>
      </sec>
      <sec id="sec-5-2">
        <title>5.2. Landing Page of a Metadata Schema</title>
        <p>The landing page of a metadata schema is a type of website that consolidates all the
information describing a schema. It typically includes details such as the schema’s purpose, structure,
and usage guidelines. It should be accessible via a URL that supports open data transfer
protocols such as HTTPS, as recommended by RF5. This page should include at least the following
components:
• A schema metadata record, describing the schema in formats accessible to both humans
and machines.</p>
        <p>• The metadata schema itself, providing the structure and specifications.</p>
        <p>Furthermore, the landing page should declare the namespaces employed by the metadata
schema, ensuring consistent terms usage. This includes declaring namespaces for terms reused
from other schemas as well. For instance, The Data Catalog Vocabulary (DCAT) defines its
namespace as http://www.w3.org/ns/dcat#, reuses many terms from Dublin Core (DC), and
introduces a minimal set of its own classes and properties8.</p>
      </sec>
      <sec id="sec-5-3">
        <title>5.3. Schema’s Metadata Record</title>
        <p>A schema metadata record typically appears in the first section of the landing page, as in
the W3C standards, for instance. It should be accessible and readable to both humans and
machines. This record should include, at least: resource identifier (the schema URI), title,
distribution, license, resource type, description, authority, modified date, version, keywords,
and distributions. This aligns with the recommendations for metadata discovery by the Cross
Domain Interoperability Framework (CDIF) [20], which advises on using metadata terms from
established schemas like DCAT, Schema.org (SDO), and DC to create the metadata record, and
recommends using machine readable formats like JSON-LD. The metadata record should be
provided (or registered) by the schema’s authority.</p>
      </sec>
      <sec id="sec-5-4">
        <title>5.4. Metadata Schema</title>
        <p>A metadata schema is a type of semantic artifact aimed at organizing, categorizing, and
managing information and knowledge within a specific domain or context [ 28, 29]. A metadata
schema requires comprehensive documentation to ensure it is accessible and reusable for humans
and computer applications alike. This documentation comprehends several components, as
detailed in Fig. 1. A key component of metadata schemas is its namespace, which distinguishes
it from other schemas.</p>
        <p>Metadata schemas have as basic components metadata elements and the relationships
between these elements [13]. In an ontological approach, these elements are usually classified
6https://github.com/ddialliance
7https://github.com/AlmesCore/
8https://www.w3.org/TR/2023/WD-vocab-dcat-3-20230307/
as classes or properties [13, 30]. They are typically described in textual form (as a glossary
of terms) on the schema’s landing page for human consumption. Additionally, these schemas
should be represented in both conceptual and machine-actionable models.</p>
      </sec>
      <sec id="sec-5-5">
        <title>5.5. Metadata Element</title>
        <p>A metadata element can be either a class or a property [13]. A class represents a category
or type of thing (e.g., “Book,” “Person,” “Organism”) and is a way to group similar items based
on shared characteristics [31]. Classes provide a high-level organizational structure to metadata.
They allow for categorizing diferent entities or concepts within the described domain. For
instance, the DwC metadata schema (developed for the biodiversity domain) includes classes
such as dwc:Event, dwc:Occurrence, and dwc:Taxon.</p>
        <p>Properties describe specific characteristics or aspects of an individual within a given class.
For the class dwc:Event, examples of properties include dwc:eventDate, dwc:eventID, and
dwc:EventMeasurement. Properties in metadata schemas, as in ontologies, can be classified
into two types: object property and data property.</p>
        <p>Object properties define relationships between two entities (or classes) within the metadata
schema. They link instances of one class to instances of another class [32]. For example, in a
bibliographic metadata schema:
:author a owl:ObjectProperty ;
rdfs:domain :Book ;
rdfs:range :Person .</p>
        <p>In this example, :author is an object property that relates a :Book to a :Person. This
indicates that a book can have one or more authors, and each author is a person.</p>
        <p>Data properties (also known as datatype properties) define relationships between an entity
and primitive data types. They link instances of a class to values such as strings, numbers, or
dates (i.e., primitive data types) [32, 33]. For example, in the bibliographic schema:
:publicationDate a owl:DatatypeProperty ;
rdfs:domain :Book ;
rdfs:range xsd:date .</p>
        <p>The domain of a property specifies the class of individuals to which the property can be applied.
In other words, if a property has a certain class as its domain, then any individual that possesses
this property is assumed to be a member of that class. For instance, if a property :hasAuthor
has a as domain :Book, then any resource that :hasAuthor is inferred to be a Book [32].</p>
        <p>Defining domains and ranges between concepts in a metadata schema enhances the precision
and clarity of the relationships between data elements and enables machine inferences [34]. By
specifying which classes of objects a property can relate to (domain) and the type of values it
can take (range), we infer that data elements belong to the specified classes.</p>
        <p>In addition to the relationships between elements in a schema, each metadata element should
include a specification of semantics and content [ 13, 14, 15]. Semantics is about the “meanings
of the elements”, and content is related to providing “general instructions on what and how
values should be assigned to the elements in an application” [13, p.39].</p>
      </sec>
      <sec id="sec-5-6">
        <title>5.6. Element Semantic Specification</title>
        <p>Unclear semantic specification of the elements in a metadata schema hamper its reuse. A
combination of attributes can be used to define the semantics of an element. For instance in DC,
“term name” assigns a specific, distinct name to the metadata element within the schema. A
“Label” is included for human readability and can be available in multiple languages. “Definition”
provides in-depth explanations regarding the element’s purpose, scope and use.</p>
        <p>The ISO/IEC 11179 standards, grounded on the Theory of Terminology, guide articulating
the semantics of metadata elements, focusing on formulating definitions in Part 4 [ 17] and term
names in Part 5 [18].
5.6.1. Formulating Metadata Definitions
Part 4 of ISO/IEC 11179 outlines a set of requirements and recommendations for metadata
definitions [ 17]. The requirements specify that definitions should focus on singular concepts,
express the essence of the concept rather than what it is not, and be formulated as
descriptive phrases or sentences. They should also avoid uncommon abbreviations and not embed
definitions of other data or underlying concepts. Additionally, the recommendations advise
that definitions convey the essential meaning of the concept, be precise, unambiguous, concise,
and able to stand alone. Definitions should also avoid embedding rationale, functional usage,
domain information, or procedural details. Circular reasoning should be avoided, and consistent
terminology and logical structure should be maintained for related definitions.
5.6.2. Formulating Metadata Names
In addition to providing clear definitions, metadata elements must have appropriate names.
ISO/IEC 11179-5 [18] provides naming conventions to ensure names are meaningful and easily
understood by users. In this standard, each name represents a concept in a metadata schema and
follows specific conventions, which include syntactic, semantic, and lexical rules that should be
documented. The guiding principles for naming include defining the scope (general or specific),
identifying the naming authority, specifying the components (semantic principle), arranging
them (syntactic principle), and determining their appearance (lexical principle). Additionally, the
uniqueness principle ensures names are distinct within the defined scope, with diferentiation
rules if needed.</p>
        <p>ISO/IEC 11179-5 provides a flexible framework for nomenclature, adaptable across domains.
The semantics of name parts depend on the type of metadata term. While class-type terms may
be abstract, they must still be named and hierarchically organized within the schema, similar to
other metadata terms, in relation to their properties [18].</p>
        <p>Property name consistency follows two main rules: Discreteness, where each property term
has a unique meaning without overlap, and Completeness, ensuring that the set of property
terms fully describes an entity [18]. Property names can indicate value domains, such as Color,
Quantity, or Height, as seen in the Plant-Pollinator Interactions (PPI) schema, with terms like
ppi:styleLengthInMilimiters, ppi:ovuleQuantity, and ppi:visitedFlowersQuantity
[10].</p>
        <p>A property name may also include a qualifier term [ 18]. A qualifier can be used to distinguish
metadata terms with identical names or to represent more specific values, such as the qualifiers
‘start’ and ‘end’ in dcat:startDate and dcat:endDate.</p>
        <p>Finally, naming conventions should include the use of separators like hyphens, grouping
symbols, or formatting resources like CamelCase, to connect name parts, for both classes and
properties [18].</p>
      </sec>
      <sec id="sec-5-7">
        <title>5.7. Machine-Actionable Serialization</title>
        <p>Semantic Web languages like RDF and OWL allow the creation of machine-readable and
actionable serializations of metadata schemas. Initially, early metadata schemas, including DC
and VRA Core, were designed as purely XML schemas. Over time, particularly in the second
decade of the millennium, there was a shift towards RDF vocabularies [ 13].</p>
        <p>FAIR metadata schemas should be implemented as Semantic Web schemas, using languages
such as RDF and OWL, as outlined in principle RF7 in Table 1. These languages enable advanced
functions like semantic querying and automated reasoning. RDF provides a framework for
expressing information about resources in a graph form, which is highly beneficial for linking
data [35]. OWL adds a richer ontology layer, representing more complex relationships and
classifications, enhancing the depth and utility of the schema [ 36]. The choice of serialization
language depends on the goals governing authority of the schema. A persistent identifier (URI
or IRI) should be used as the schema namespace. Each metadata element should then have a
unique identifier within that namespace. There are services that help generate these identifiers,
such as the W3C Permanent Identifier Community Group 9 and purl.org10.</p>
      </sec>
      <sec id="sec-5-8">
        <title>5.8. Schema Conceptual Model</title>
        <p>A conceptual model is pivotal in developing and implementing a metadata schema. It is an
abstract representation of the phenomena the metadata schema aims to describe, capturing the
essential concepts and their relationships, sometimes referred to as metamodel or upper model.
A conceptual model may serve diferent purposes in a metadata schema:
• Schema design: A conceptual model helps identify the key concepts, entities, and attributes
relevant to the described domain, ensuring that the schema covers all necessary aspects
of the data it intends to represent [37].
• Data integration and mapping: Conceptual models may provide a reference point for
mapping between schemas. They are a fundamental component for enabling semantic
interoperability between heterogeneous datasets [37].
• Human understanding: Conceptual models are useful for schema designers and end-users.</p>
        <p>They can facilitate the understanding of the schema by providing a visual, conceptual
representation of its classes and properties. This also makes the schema easier to reuse
by users unfamiliar with it [37].</p>
        <sec id="sec-5-8-1">
          <title>9https://w3id.org/ 10https://purl.org/</title>
          <p>Many metadata schemas use conceptual models for these purposes. UML is one of the
most popular language used for metadata schema conceptual models. For instance, EML11
is based on a UML conceptual model that defines the structure and semantics of metadata to
describe ecological data efectively. Besides EML, the DDI-CDI Framework is another example
of a schema that also uses a UML conceptual model, which is described as the “core” of their
schema12.</p>
        </sec>
      </sec>
    </sec>
    <sec id="sec-6">
      <title>6. Assessing the FAIRness of Metadata Schemas</title>
      <p>Assessing the FAIRness of digital objects is a complex, multi-layered task that involves evaluating
various elements contributing to compliance with the FAIR principles. This process can be
either automated or manual, each with its own set of advantages and limitations. Automated
assessments typically rely on widely accepted metadata schemas (e.g., DC, SDO, DCAT) and
identifier systems such as PURL and DOI. While these tools are eficient, they may overlook
domain-specific schemas or resources that do not fit within their predefined rules, potentially
resulting in lower FAIRness scores [38]. Manual assessments ofer more flexibility to evaluate
domain-specific elements, but this approach can introduce bias due to subjective interpretation
[38].</p>
      <p>
        In this paper we introduce a methodology for manually assessing the FAIRness of metadata
schemas, applying principles similar to those in Amdouni and Jonquet’s FAIRness Assessment
Grid for Ontologies [
        <xref ref-type="bibr" rid="ref2">2</xref>
        ]. The proposed approach attributes scores to FAIR indicators, which are
then summed up to calculate an overall FAIR compliance percentage for each principle.
      </p>
      <p>
        Using this method, we assessed the FAIRness of three metadata schemas: Darwin Core
(DwC)13, D-CAT14, and Almes Core (ALM)15. The overall FAIR compliance scores were as
follows: DwC: approximately 87%, D-CAT: 100%, and ALM: 91.75%. The main reason DwC did
not achieve 100% was the lack of machine-actionable serializations and defined domains and
ranges between classes and properties. ALM’s score for findability was reduced because it is
not indexed in vocabulary catalogs. Detailed guidelines and examples of how this assessment
was applied to these specific metadata schemas are provided on Zenodo [
        <xref ref-type="bibr" rid="ref1">1</xref>
        ].
      </p>
      <p>These assessments illustrate the applicability of the proposed conceptual model and
demonstrate its utility in helping communities identify and address gaps in their metadata schemas to
achieve full FAIR compliance.</p>
    </sec>
    <sec id="sec-7">
      <title>7. Final Considerations</title>
      <p>
        The FAIR assessment proposed here is not intended as an indicator of schema quality. As
emphasized by GO FAIR [
        <xref ref-type="bibr" rid="ref4">4</xref>
        ] and mentioned in this paper’s introduction, the responsibility for
determining the FAIRness level of schemas rests with the goals set by the schema authorities.
11https://eml.ecoinformatics.org
12https://ddialliance.org/Specification/ddi-cdi#uml_model
13http://rs.tdwg.org/dwc/terms/
14http://www.w3.org/ns/dcat#
15https://w3id.org/AlmesCore#
This paper aims to support communities in applying FAIR Data principles to metadata schema
management, ofering insights to enhance the findability, accessibility, interoperability, and
reusability of their schemas.
      </p>
      <p>Regarding limitations, the proposed FAIR assessment requires further refinement. Developing
an automated tool to streamline the FAIR assessment process would be a valuable enhancement.
For future work, additional empirical validation through multiple case studies or user feedback
from metadata schema designers is necessary to further mature and validate this conceptual
model.</p>
    </sec>
    <sec id="sec-8">
      <title>Data Availability</title>
    </sec>
    <sec id="sec-9">
      <title>Acknowledgments</title>
      <p>FMS received funding from the São Paulo Research Foundation - FAPESP (Grant n. 2021/15125-0
and 2022/08385-8). BCMSM received funding from Conselho Nacional de Desenvolvimento
Científico e Tecnológico - CNPq (Grant n. 307765/2023-7). KRB received funding from FAPESP
(Grant n. 2023/00779-0). FMS, DPD, KRB, FEC, DAM, ACBD, and AMS thank the Center for
Artificial Intelligence (C4AI), a partnership of USP, IBM, and FAPESP (Grant 2019/07665-4).
This study was financed in part by the Coordenação de Aperfeiçoamento de Pessoal de Nível
Superior – Brazil (CAPES) – Finance Code 001.
[5] G. Berg-Cross, S. Arbor, Beyond simple fair principles for ontologies and semantic
resources: Grounding rich, meaningful metadata, Journal of the Washington Academy of
Sciences 108 (2022) 1–26. URL: https://www.jstor.org/stable/27281297, accessed 2 Aug.
2024.
[6] G. Guizzardi, Ontological Foundations for Structural Conceptual Models, Ph.D. thesis,</p>
      <p>University of Twente, 2005.
[7] G. Guizzardi, G. Wagner, Conceptual simulation modeling with onto-uml advanced
tutorial, in: Proceedings of the 2012 Winter Simulation Conference (WSC), 2012, pp. 1–15.
doi:10.1109/WSC.2012.6465133.
[8] G. Guizzardi, A. Bernasconi, O. Pastor, V. C. Storey, Ontological unpacking as explanation:
The case of the viral conceptual model, in: A. Ghose, J. Horkof, V. E. Silva Souza, J. Parsons,
J. Evermann (Eds.), Conceptual Modeling, Springer International Publishing, Cham, 2021,
pp. 356–366.
[9] F. M. Soares, F. E. Corrêa, L. F. Pires, L. O. B. da Silva Santos, D. P. Drucker, et al., Building
a community-based fair metadata schema for brazilian agriculture and livestock trading
data, in: Proceedings of the SEMPW 2022 Workshop, CEUR Workshop Proceedings,
Rheinisch-Westfälische Technische Hochschule, 2022.
[10] J. A. Salim, A. M. Saraiva, P. F. Zermoglio, K. Agostini, M. Wolowski, et al., Data
standardization of plant–pollinator interactions, GigaScience 11 (2022) giac043. doi:10.1093/
gigascience/giac043.
[11] D. Drucker, J. Salim, J. Poelen, et al., WorldFAIR (D10.2) Agricultural Biodiversity Standards,
Best Practices and Guidelines Recommendations, Zenodo, 2024. doi:10.5281/ZENODO.
10666593.
[12] D. Drucker, J. Salim, J. Poelen, et al., WorldFAIR (D10.3) Agricultural biodiversity FAIR
data assessment rubrics, Zenodo, 2024. doi:10.5281/zenodo.10719265.
[13] M. L. Zeng, J. Qin, Metadata, third edition ed., ALA Neal-Schuman, Chicago, 2022.
[14] L. M. Chan, M. L. Zeng, Metadata interoperability and standardization - a study of
methodology part i: Achieving interoperability at the schema level, D-Lib Magazine 12
(2006). doi:10.1045/june2006- chan.
[15] ISO/IEC, ISO/IEC 11179-1:2023: Information technology: Metadata registries (MDR): Part
1: Framework, 2023.
[16] ISO, Building a Metadata Schema – Where to Start, ISO, Geneva, 2008.
[17] ISO/IEC, Information technology – metadata registries (mdr) – part 4: Formulation of data
definitions, 2004.
[18] ISO/IEC, Information technology – metadata registries (mdr) – part 5: Naming principles,
2015.
[19] M. D. Wilkinson, M. Dumontier, I. J. Aalbersberg, G. Appleton, M. Axton, et al., The fair
guiding principles for scientific data management and stewardship, Scientific Data 3 (2016)
160018. doi:10.1038/sdata.2016.18.
[20] Cross-Domain Interoperability Framework (CDIF) Working Group, S. Richard, A. Gregory,
S. Hodson, D. Fils, et al., Cross domain interoperability framework (cdif): Discovery module
(v01 draft for public consultation), 2023. doi: 10.5281/ZENODO.10252564.
[21] P.-Y. Vandenbussche, G. A. Atemezing, M. Poveda-Villalón, B. Vatant, Linked open
vocabularies (lov): A gateway to reusable semantic vocabularies on the web, Semantic Web 8</p>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          [1]
          <string-name>
            <given-names>Y.</given-names>
            <surname>Le Franc</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Parland-von Essen</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Bonino</surname>
          </string-name>
          , et al.,
          <source>D2</source>
          .2 fair semantics: First recommendations,
          <year>2020</year>
          . doi:
          <volume>10</volume>
          .5281/ZENODO.5361930.
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          [2]
          <string-name>
            <given-names>E.</given-names>
            <surname>Amdouni</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Jonquet</surname>
          </string-name>
          ,
          <article-title>Fair or fairer? an integrated quantitative fairness assessment grid for semantic resources and ontologies</article-title>
          , in: E. Garoufallou,
          <string-name>
            <given-names>M. A.</given-names>
            <surname>OvallePerandones</surname>
          </string-name>
          , A. Vlachidis (Eds.),
          <source>Metadata and Semantic Research. MTSR</source>
          <year>2021</year>
          , volume
          <volume>1537</volume>
          of Communications in Computer and Information Science, Springer, Cham,
          <year>2022</year>
          . doi:
          <volume>10</volume>
          .1007/978- 3-
          <fpage>030</fpage>
          - 98876-
          <issue>0</issue>
          _
          <fpage>6</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          [3]
          <string-name>
            <given-names>M.</given-names>
            <surname>Poveda-Villalón</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Espinoza-Arias</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Garijo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>O.</given-names>
            <surname>Corcho</surname>
          </string-name>
          ,
          <article-title>Coming to terms with fair ontologies</article-title>
          , in: C.
          <string-name>
            <surname>M. Keet</surname>
          </string-name>
          , M. Dumontier (Eds.),
          <article-title>Knowledge Engineering and Knowledge Management</article-title>
          .
          <source>EKAW</source>
          <year>2020</year>
          , volume
          <volume>12387</volume>
          of Lecture Notes in Computer Science, Springer, Cham,
          <year>2020</year>
          . doi:
          <volume>10</volume>
          .1007/978- 3-
          <fpage>030</fpage>
          - 61244- 3_
          <fpage>18</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          [4]
          <string-name>
            <surname>GO</surname>
            <given-names>FAIR</given-names>
          </string-name>
          ,
          <article-title>Fair principles</article-title>
          , https://www.go-fair.org/fair-principles/,
          <year>2023</year>
          . Accessed:
          <fpage>2024</fpage>
          - 08-02.
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>