<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Results of SemTab 2021 ⋆</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Vincenzo</forename><surname>Cutrona</surname></persName>
							<email>vincenzo.cutrona@supsi.ch</email>
							<affiliation key="aff0">
								<orgName type="institution">SUPSI</orgName>
								<address>
									<country key="CH">Switzerland</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Jiaoyan</forename><surname>Chen</surname></persName>
							<email>jiaoyan.chen@cs.ox.ac.uk</email>
							<affiliation key="aff1">
								<orgName type="institution">University of Oxford</orgName>
								<address>
									<country key="GB">UK</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Vasilis</forename><surname>Efthymiou</surname></persName>
							<affiliation key="aff2">
								<orgName type="department">FORTH-ICS</orgName>
								<address>
									<country key="GR">Greece</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Oktie</forename><surname>Hassanzadeh</surname></persName>
							<email>hassanzadeh@us.ibm.com</email>
							<affiliation key="aff3">
								<orgName type="institution">IBM Research</orgName>
								<address>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Ernesto</forename><surname>Jiménez-Ruiz</surname></persName>
							<email>ernesto.jimenez-ruiz@city.ac.uk</email>
							<affiliation key="aff4">
								<orgName type="institution">University of London</orgName>
								<address>
									<settlement>City</settlement>
									<country key="GB">UK</country>
								</address>
							</affiliation>
							<affiliation key="aff5">
								<orgName type="institution" key="instit1">SIRIUS</orgName>
								<orgName type="institution" key="instit2">University of Oslo</orgName>
								<address>
									<country key="NO">Norway</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Juan</forename><surname>Sequeda</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Kavitha</forename><surname>Srinivas</surname></persName>
							<email>kavitha.srinivas@ibm.com</email>
							<affiliation key="aff3">
								<orgName type="institution">IBM Research</orgName>
								<address>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Nora</forename><surname>Abdelmageed</surname></persName>
							<email>nora.abdelmageed@uni-jena.de</email>
							<affiliation key="aff6">
								<orgName type="institution">University of Jena</orgName>
								<address>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Madelon</forename><surname>Hulsebos</surname></persName>
							<email>m.hulsebos@uva.nl</email>
							<affiliation key="aff7">
								<orgName type="institution">University of Amsterdam</orgName>
								<address>
									<country key="NL">The Netherlands</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Daniela</forename><surname>Oliveira</surname></persName>
							<email>dpoliveira@fc.ul.pt</email>
							<affiliation key="aff8">
								<orgName type="department">Faculdade de Ciências</orgName>
								<orgName type="laboratory">LASIGE</orgName>
								<orgName type="institution">Universidade de Lisboa</orgName>
								<address>
									<country key="PT">Portugal</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Catia</forename><surname>Pesquita</surname></persName>
							<email>clpesquita@fc.ul.pt</email>
							<affiliation key="aff8">
								<orgName type="department">Faculdade de Ciências</orgName>
								<orgName type="laboratory">LASIGE</orgName>
								<orgName type="institution">Universidade de Lisboa</orgName>
								<address>
									<country key="PT">Portugal</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Results of SemTab 2021 ⋆</title>
					</analytic>
					<monogr>
						<imprint>
							<date/>
						</imprint>
					</monogr>
					<idno type="MD5">11D59919A6F09F3C2E6F38AD94EACECC</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-19T15:28+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Tabular data</term>
					<term>Knowledge Graphs</term>
					<term>Matching</term>
					<term>SemTab</term>
					<term>Semantic Web Challenge</term>
					<term>Semantic Table Interpretation</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>SemTab 2021 was the third edition of the Semantic Web Challenge on Tabular Data to Knowledge Graph Matching, successfully collocated with the 20th International Semantic Web Conference (ISWC) and the 16th Ontology Matching (OM) Workshop. SemTab provides a common framework to conduct a systematic evaluation of state-of-the-art systems.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Motivation</head><p>Data in tabular format are the most frequent input to data analytics pipeline, thanks to their high storage and processing efficiency. Also, the tabular format allows users to represent the information in a compacted way, by exploiting the clear data structure defined by rows and columns. However, such clear structure does not imply a clear understanding of the semantic structure (e.g., relationships between columns), as well as the meaning of the content (e.g., if data are about a specific topic). The lack of understanding hinders data analytics processes, requiring additional effort to properly understand the data first. Gaining the semantic understanding is valuable for many applications, including data cleaning, data mining, data integration, data analysis and machine learning, and knowledge discovery. For example, the semantic understanding can help in assessing what kind of transformations are more appropriate for a dataset, or which datasets can be integrated to enable new analytics (e.g., marketing analysis) <ref type="bibr" target="#b9">[10]</ref>.</p><p>In addition to their efficiency, the huge availability of tabular data on the Web makes Web tables a valuable source to consider for data miners (e.g., open data CSV files). Adding semantic information to Web tables is useful for a wide range of applications, including web search, question answering, and knowledge base construction.</p><p>Tabular data to Knowledge Graph (KG) matching is the process of clarifying the semantic meaning of a table by mapping its elements (i.e., cells, columns, rows) to semantic tags (i.e., entities, classes, properties) from KGs (e.g., Wikidata, DBpedia). The task difficulty increases when table metadata (e.g., table captions, table description, or column names) being missing, incomplete or ambiguous.</p><p>The tabular data to KG matching process is typically broken down into the following tasks: (i) cell to KG entity matching (CEA task), (ii) column to KG class matching (CTA task), and (iii) column pair to KG property matching (CPA task).</p><p>Over the last decade several approaches made advances in addressing one or several of above tasks, also constructing benchmark datasets ( <ref type="bibr" target="#b17">[18,</ref><ref type="bibr" target="#b21">22,</ref><ref type="bibr" target="#b16">17,</ref><ref type="bibr" target="#b10">11]</ref>). The creation of SemTab<ref type="foot" target="#foot_0">1</ref>  <ref type="bibr" target="#b14">[15,</ref><ref type="bibr" target="#b15">16]</ref> aimed at putting this significant amount of work into a common framework, enabling the systematic evaluation of state-of-the-art systems. The ambition is to make SemTab becoming the reference challenge in the Semantic Web community, in the same way the OAEI<ref type="foot" target="#foot_1">2</ref> is for the Ontology Matching community. <ref type="foot" target="#foot_2">3</ref></p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">The Challenge</head><p>The SemTab 2021 challenge has been organised into 3 different tracks: the Accuracy Track, which is the standard track proposed in previous editions; the Usability Track, a new track addressing the lack of publicly available, easy-to-use and generic solutions; and the Applications Track, which focuses on applications in real-world settings where the output of matching systems can contribute. The application track was also open to the submission of novel benchmark datasets.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1">Accuracy Track</head><p>The Accuracy Track included 3 rounds, running from June 30 to October 15. Different target KGs were used across rounds (see Table <ref type="table" target="#tab_0">1</ref>):</p><p>-DBpedia <ref type="bibr" target="#b2">[3]</ref>: http://downloads.dbpedia.org/wiki-archive/</p><p>(version 2016-10) -Wikidata <ref type="bibr" target="#b23">[24]</ref>: https://zenodo.org/record/6153449 -Schema.org <ref type="bibr" target="#b11">[12]</ref>: https://gittables.github.io/downloads/sche ma 20210528.pkl</p><p>The different rounds of SemTab 2021 have been organised to evaluate participating systems on different datasets with variable difficulty. All the rounds were run with the support of AIcrowd;<ref type="foot" target="#foot_3">4</ref> SemTab 2021 also used the STILTool system <ref type="bibr" target="#b7">[8,</ref><ref type="bibr" target="#b4">5]</ref> for getting additional insights about the submitted solutions. Table <ref type="table" target="#tab_2">3</ref> shows the participation per round. Compared with previous editions, we had 11 participants (vs 28 in 2020) submitting to at least one round. <ref type="foot" target="#foot_5">6</ref> We identified 6 core participants (vs 8 in 2020), which completed ∼14 tasks on average (out of 17 tasks). Seven participants submitted a system paper to the challenge: <ref type="bibr" target="#b25">[26]</ref>, Kepler-aSI <ref type="bibr" target="#b5">[6]</ref>, and DAGOBAH <ref type="bibr" target="#b13">[14]</ref>.</p><formula xml:id="formula_0">MTab [19], MAGIC [23], MantisTable V [4], JenTab [1], GBMTab</formula><p>Evaluation measures As per the previous editions, systems have been evaluated on a single annotation for each provided target, for all the tasks; i.e., in CEA, target cells are to be annotated with a single entity from the target KG; in CTA, target columns are to be annotated with a single type from the target KG (as fine-grained as possible).  The evaluation measures for CEA, CPA and CTA (DBpedia and Schema.org) are the standard Precision, Recall and F1-score, as defined in Equation <ref type="formula" target="#formula_1">1</ref>:</p><formula xml:id="formula_1">P = |Correct Annotations| |System Annotations| , R = |Correct Annotations| |Target Annotations| , F 1 = 2 × P × R P + R<label>(1)</label></formula><p>where target annotations refer to the target cells for CEA, the target columns for CTA, and the target column pairs for CPA. We consider an annotation as correct when it is included within the ground truth set (a target cell usually has multiple annotations in the ground truth, because of redirect and same-as links in KGs).</p><p>Given the fine-grained type hierarchy in Wikidata, we adopted approximations of Precision and Recall in the CTA evaluation. Approximations adapt their numerators to consider partially correct annotations, i.e., annotations that are ancestors or descendants of the ground truth (GT) classes. The correctness score cscore of a CTA annotation α considers the distance between the annotation and the GT classes in the type hierarchy, and it is defined as </p><formula xml:id="formula_2">cscore(α) =      0.8 d(α) , if α is in GT, or an ancestor of the GT, with d(α) ≤ 5 0.7 d(α) , if α is a descendant of the GT, with d(α) ≤ 3 0, otherwise;<label>(2)</label></formula><formula xml:id="formula_3">AP = cscore(α) |System Annotations| , AR = cscore(α) |Target Annotations| , AF 1 = 2 × AP × AR AP + AR<label>(3)</label></formula><p>Results Table <ref type="table" target="#tab_3">4</ref> contains the average F1-score achieved by the 11 participating systems. The Tough Tables dataset still represent a challenge for almost all the systems, specially considering the fact that the the dataset is the same as in SemTab 2020. The BiodivTab and GitTables datasets brought additional complexity in Round 3, highlighting that realworld tables are challenging.</p><p>CEA task. Results for the CEA task are reported in Figure <ref type="figure">1</ref> for all the datasets. The Round 1 used the same 2T tables from last year edition, 7 raising the difficulty bar at the very beginning. Most of the systems faced important challenges when dealing with 2T tables, with only 2 systems managing to achieve an F1-score over 0.8 and several of them participating in only one of the tasks. It is worth noting the work of the DAGOBAH team, which improved their system over the last year, being able to achieve higher scores on 2T this year. Starting from Round 2, systems have been evaluated on datasets never seen before. The AG datasets aimed at bringing new challenges in each round, and we can observe than only the best systems managed to maintain almost the same score on the two different versions of this dataset. Concerning bio-related datasets, performance in Round 2 were positive (slightly below 0.9 on average), confirming that tables with many rows (∼2,500 on average) do not represent a problem for most of all the systems. Instead, the complexity brought by the (relatively small) tables in the BiodivTab dataset represented a new problem to solve, showing significantly reduced performance (none of the systems scored over 0.6). The JenTab system ranked 1 st over a very difficult dataset. It is worth noting, however, that members of the JenTab team are also the providers of the BiodivTab dataset.</p><p>CTA task. As shown in Figure <ref type="figure">2</ref>, the results in the CTA tasks resemble the trend already seen from the CEA results. This is an indicator that most of the systems solve the CTA tasks based on annotations found in the CEA. Additional challenges have been included in Round 3 with the GitTables dataset, where we can see a critical performance drop for all the involved systems. It is worth emphasising that, given the general picture provided by the results in CTA, more research is needed to make existing systems able to deal with real-world tables, where the cells may be missing a correspondence to the target KG.</p><p>CPA task. Results for the CPA tasks are plotted in Figure <ref type="figure">3</ref>. Currently, only BioTables and the AG datasets provide a GT for CPA. Results are overall positive for all the tasks, with a general improvement from Round 2 to Round 3 for all the involved systems, except for MAGIC, whose performance dropped a bit during the last round. Fig. <ref type="figure">3</ref>: Results in the CPA task for the core participants.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2">Usability Track</head><p>Starting from SemTab 2021, the organisation committee agreed to include a new track focusing on system usability. The main goal of this track is to mitigate a pain point in the community: the lack of publicly available, easy-to-use, and generic solution that will address the needs of a variety of applications and settings.</p><p>Evaluation measures Deeply evaluating the usability of a system requires user studies to monitor different parameters <ref type="bibr" target="#b20">[21]</ref>. Within the SemTab scope, we decided to simply verify the overall usability of tools as judged by a review panel. Participants' solutions were examined for the following criteria:</p><p>-Open source: open-source solutions make a great contribution to the community, especially when released with a permissive license. Publicly available resources can be used as a starting point for new tools or research investigations, and make experiments easily reproducible. -System dependencies: some tools may require specific platforms to be executed on premises, or have a huge resource consumption that may affect the use in common settings. For example, requiring many indexes/databases may prevent the usage of a tool by users with limited access to hardware. -Model generality: a tool may be considered general when it applies to different (and new) applications/domains, requiring near-zero adaptations; for example, tools employing machine learning techniques should not require extensive training and tuning to be adapted to different contexts. -Availability: tools may not be released as open source, but offered as a publicly available services. In this case, a tool served as a public service supports further research activities, and represent a big contribution to the community. -User experience: the purpose of a tool is to help people in solving a task; for this reason, semantic table to graph matching tools should come with a well-designed user interface that makes the tool usable also by practitioner with a limited experience in semantic matching. That is, the tool should not require an extensive training to be mastered. </p><formula xml:id="formula_4">✓ ✓ MAGIC ✓ ✓ DAGOBAH ✓ MantisTable V ✓ ✓ JenTab ✓ Kepler-aSI</formula><p>Results Almost all the core participants obtained good results in this track, by performing well on one or more of the above evaluation criteria. Evaluation details are reported in Table <ref type="table" target="#tab_4">5</ref>. We exclude system dependencies and model generality because of the insufficient available evidence, which resulted in these two criteria not impacting the overall assessment strongly. Indeed, available data about system performance (i.e., accuracy) with reference to the different datasets and target KGs used in SemTab rounds do not allow us to draw any consistent conclusions. For example, it is not clear if tools were customized or tweaked (e.g., changing the lookup function for noisy data) to increase their accuracy in different rounds; we are not able to assess how hard a system adapts to a different context (e.g., changing the target KG).</p><p>The evaluation panel concluded that most of the tools are pre-configured and can potentially be used out of the box: for example, JenTab has been packaged in Docker containers to ease the deployment and execution of the tool on local premises. In general, tools requirements vary in complexity, but they are reasonable overall (e.g., preprocessing required, like creating new indexes or embeddings).</p><p>Considering the other criteria, JenTab is the only system released as open source under a permissive license (Apache 2.0). The MTab tool has been made publicly available as a Web service, free to use (MIT license); but the back-end application has not been disclosed. However, having a public API enables MTab serving third-party application (with no rate limit), and this was a key point in declaring MTab the most usable tool. Systems like DAGOBAH and MantisTable delivered a framework with impressive GUIs, while others (e.g., MAGIC) opted for a lightweight application.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.3">Applications Track</head><p>This new track aims at addressing applications in real-world settings that take advantage of the output of the matching systems. Challenging dataset proposals have also been accepted and included within the SemTab 2021 rounds.</p><p>Results A specific application has been identified within the biological domain, where new data are constantly produced thanks to the advances in the field. The domain is particularly challenging from the semantics standpoint because of the the complexity of the biological relations between entities. Within SemTab, the data representation significantly impact the systems performance since entities are usually represented by codes (e.g., chemical formulas or gene names). Two different datasets have been submitted related to the biological domain; the first one, BioTables, is a dataset focused on molecular biology data; the second, BiodivTab, is a dataset focused on biodiversity research data and data augmentation.</p><p>Along side the above domain, a different dataset has been submitted to this track and also included in Round 3, GitTables. This dataset includes relational tables extracted from CSV files hosted at GitHub, and it comes with a peculiarity: the GT for CTA uses a mixture of classes and properties to annotate columns (both for the DBpedia and Schema.org versions).</p><p>The three datasets brought new complexity and contributed to increment the data diversity among the SemTab benchmark datasets.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.4">Prizes</head><p>As in previous editions, IBM Research<ref type="foot" target="#foot_6">8</ref> sponsored SemTab 2021 and awarded the best systems in each track with the following prizes:</p><p>-Accuracy Track: DAGOBAH (1 st prize) was the top system in most of the tasks, showing appreciable improvements over the last years. Honorary mention to MTab -Usability Track: MTab team (1 st prize), for providing the easy-to-use MTab tool <ref type="foot" target="#foot_7">9</ref>along with Web services to lookup entities and annotate tables; JenTab (2 st prize), for being the only open-source system with a permissive license. Honorary mentions to DAGOBAH, MAGIC and MantisTable. -Applications Track: BiodivTab dataset (1 st prize), for having brought new challenges in CEA and CTA tasks. Honorary mention to GitTables.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">Lessons Learned and Future Work</head><p>Avoiding over-fitting to AG. We have been using the same automated dataset generation process, with some variations that make it more challenging, since the first SemTab challenge. This may be resulting to participating systems that explicitly target datasets with characteristics similar to those of the AG datasets. This becomes evident from the almost perfect results shown in Table <ref type="table" target="#tab_3">4</ref>. For that reason, this year we have introduced several new datasets, while we are also planning to use as much as possible real data, rather than synthetic, in the future versions of the challenge.</p><p>System generalizability beyond KGs. Many systems currently rely on matching table values to entities in KGs. In this version of SemTab, we challenged the participating systems on their ability to detect the semantic types of table columns even when their values are not linkable to KG entities. We conclude that most systems do not generalize well in this scenario as indicated by the performance drop on the CTA task for GitTables (see Section 2.1). Improving systems to this end would make them useful for expanding KG coverage by matching tables from novel data sources to KGs in order to populate the "unknown unknowns" <ref type="bibr" target="#b24">[25]</ref>. This generalizability would also benefit the applicability of the systems in offline databases. We plan to encourage and evaluate systems on their generalizability towards novel data sources in future versions of SemTab.</p><p>CTA vs CPA: the case of GitTables. Since the first edition of SemTab, we are used to consider CTA and CPA as two separated tasks, the first focuses on ontology classes, and the latter is dedicated to properties. However, GitTables annotations for CTA includes also properties from DBpedia and Schema.org. The rationale behind this choice stands in the relational nature of the considered tables: columns typically correspond to the attributes of an entity, which are reflected by properties in DBpedia and Schema.org, for example. Also, this choice is very useful when annotating literal columns (i.e., columns not containing mentions of entities), avoiding annotations based on datatypes (e.g., xsd:string). Therefore, GitTables introduced a new technical challenge, which potentially contributed to the complexity observed from the results in Figure <ref type="figure">2</ref>. The case of GitTables may result in a new task to accomplish in the future, given that it enables table-to-KG matching with tables from alternative data sources and contexts (e.g., database dumps from industry).</p><p>Usability track. We believe that the introduction of the usability track has contributed to making participating systems publicly accessible. Our goal was exactly to encourage this, despite the competitive nature that a challenge may have. Thus, we consider this new track to be a very important one and we are planning to keep it in the next challenges. Next SemTab editions may consider to improve the evaluation of this track, for example by adopting the System Usability Scale (SUS) <ref type="bibr" target="#b6">[7]</ref> to score the overall user experience. In particular, developing a systematic way to evaluate systems' generality and dependencies would definitely improve the evaluation of this track.</p><p>Applications track. We believe that the call of the application track has grasped more attention from the community by introducing their own datasets. Contributions from the community like BiodivTab, BioTable and GitTables help in extending the SemTab benchmark with new real-world challenges that are hard to reproduce in synthetic datasets as AG. Thus, this new track has been an important addition to SemTab.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Fig. 1 :Fig. 2 :</head><label>12</label><figDesc>Fig. 1: Results in the CEA task for the core participants. MTab results on 2T are from 2020.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1 :</head><label>1</label><figDesc>Datasets used across SemTab 2021 rounds.</figDesc><table><row><cell></cell><cell>Rounds</cell><cell></cell><cell></cell><cell>Tasks</cell><cell></cell><cell cols="2">Target KGs</cell></row><row><cell></cell><cell cols="7">R1 R2 R3 CTA CPA CEA DBpedia Wikidata Schema.org</cell></row><row><cell>2T [9]</cell><cell>✓</cell><cell></cell><cell>✓</cell><cell></cell><cell>✓</cell><cell>✓</cell><cell>✓</cell></row><row><cell>BioTable [20]</cell><cell>✓</cell><cell></cell><cell>✓</cell><cell>✓</cell><cell>✓</cell><cell></cell><cell>✓</cell></row><row><cell>AG [15]</cell><cell>✓</cell><cell>✓</cell><cell>✓</cell><cell>✓</cell><cell>✓</cell><cell></cell><cell>✓</cell></row><row><cell>BiodivTab [2]</cell><cell></cell><cell>✓</cell><cell>✓</cell><cell></cell><cell>✓</cell><cell></cell><cell>✓</cell></row><row><cell>GitTables [13]</cell><cell></cell><cell>✓</cell><cell>✓</cell><cell></cell><cell></cell><cell>✓</cell><cell>✓</cell></row><row><cell cols="8">Datasets The different datasets used to run SemTab 2021 rounds are reported in Ta-</cell></row><row><cell cols="8">ble 1, with some statistics available in Table 2. All the datasets are available in Zenodo:</cell></row><row><cell cols="8">-Tough Tables (2T): a dataset featuring high-quality manually-curated tables with</cell></row><row><cell cols="8">non-obviously linkable cells, i.e., where values are ambiguous names, typos, and</cell></row><row><cell cols="8">misspelled entity names. These challenges are particularly relevant for the annota-</cell></row><row><cell cols="6">tion of structured legacy sources to existing KGs.</cell><cell></cell></row><row><cell cols="7">Link: https://doi.org/10.5281/zenodo.6211551</cell></row><row><cell cols="8">-BioTable: a dataset focused on molecular biology data covering different entities.</cell></row><row><cell cols="7">It has the larges number of rows per table in the challenge.</cell></row><row><cell cols="7">Link: https://doi.org/10.5281/zenodo.5606585</cell></row><row><cell cols="8">-Automatically Generated (AG): 5 a synthetic dataset with tables generated automat-</cell></row><row><cell cols="8">ically by means of SPARQL queries. AG is the largest dataset used in SemTab.</cell></row><row><cell cols="7">Link: https://zenodo.org/record/6154708</cell></row><row><cell cols="8">-BiodivTab: a dataset with tables from real-world biodiversity research datasets.</cell></row><row><cell cols="7">Original tables have been adapted for the SemTab challenge.</cell></row><row><cell cols="7">Link: https://doi.org/10.5281/zenodo.5584180</cell></row><row><cell cols="8">-GitTables: a large-scale corpus of relational tables extracted from CSV files in</cell></row><row><cell cols="8">GitHub. The main purpose of this dataset is to facilitate learning table represen-</cell></row><row><cell cols="8">tation models and applications in e.g., data management. A subset of tables has</cell></row><row><cell cols="8">been curated for benchmarking column type detection methods in SemTab.</cell></row><row><cell cols="7">Link: https://doi.org/10.5281/zenodo.5706316</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2 :</head><label>2</label><figDesc>Statistics of the datasets in each SemTab 2021 round. For target values: W=Wikidata; D=DBpedia; S=Schema.org.</figDesc><table><row><cell></cell><cell>AG</cell><cell></cell><cell>2T</cell><cell>BioTables</cell><cell cols="2">BiodivTab GitTables</cell></row><row><cell></cell><cell cols="2">Round 2 Round 3</cell><cell>Round 1</cell><cell>Round 2</cell><cell>Round 3</cell><cell>Round 3</cell></row><row><cell>Tables #</cell><cell cols="2">1,750.00 7,207.00</cell><cell>180.00</cell><cell>110.00</cell><cell>50.00</cell><cell>1,101.00</cell></row><row><cell>Avg. Rows # (total)</cell><cell>16.73</cell><cell>8.18</cell><cell>1,080.21</cell><cell>2,449.08</cell><cell>259.06</cell><cell>58.20</cell></row><row><cell>Avg. Cols # (total)</cell><cell>3.19</cell><cell>2.48</cell><cell>4.46</cell><cell>5.97</cell><cell>23.96</cell><cell>15.87</cell></row><row><cell>Avg. Rows # (target CEA)</cell><cell>16.73W</cell><cell>8.18W</cell><cell>1, 080.19D 1, 080.21W</cell><cell>2, 449.08W</cell><cell>258.28W</cell><cell></cell></row><row><cell>Avg. Cols # (target CEA)</cell><cell>1.65W</cell><cell>1.00W</cell><cell>3.00D 3.00W</cell><cell>5.97W</cell><cell>13.60W</cell><cell></cell></row><row><cell>Avg. Cols # (target CTA)</cell><cell>1.25W</cell><cell>1.00W</cell><cell>3.00D 3.00W</cell><cell>5.97W</cell><cell>12.28W</cell><cell>3.08D 2.62S</cell></row><row><cell>Avg. Cols # (target CPA)</cell><cell>3.19W</cell><cell>2.48W</cell><cell></cell><cell>5.97W</cell><cell></cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 3 :</head><label>3</label><figDesc>Participation in the SemTab 2021 challenge.</figDesc><table><row><cell></cell><cell>Round 1</cell><cell>Round 2</cell><cell></cell><cell></cell><cell>Round 3</cell><cell></cell></row><row><cell></cell><cell>2T</cell><cell cols="5">BioTable AG AG BiodivTab GitTables</cell></row><row><cell>CEA</cell><cell>5D 7W</cell><cell>6</cell><cell>6</cell><cell>5</cell><cell>5</cell><cell>-</cell></row><row><cell>CTA</cell><cell>3D 7W</cell><cell>7</cell><cell>6</cell><cell>6</cell><cell>6</cell><cell>4D 2S</cell></row><row><cell>CPA</cell><cell>-</cell><cell>6</cell><cell>6</cell><cell>5</cell><cell>-</cell><cell>-</cell></row><row><cell>Total</cell><cell>11</cell><cell>7</cell><cell>6</cell><cell>6</cell><cell>6</cell><cell>4</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Table 4 :</head><label>4</label><figDesc>Average F1-score consider the 11 participating systems. We included MTab results for 2T from SemTab 2020.</figDesc><table><row><cell></cell><cell>Round 1</cell><cell cols="2">Round 2</cell><cell></cell><cell>Round 3</cell></row><row><cell></cell><cell>2T</cell><cell cols="2">BioTable AG</cell><cell cols="3">AG BiodivTab GitTables</cell></row><row><cell>CEA</cell><cell>0.51D 0.52W</cell><cell>0.82</cell><cell cols="2">0.91 0.90</cell><cell>0.41</cell><cell>-</cell></row><row><cell>CTA</cell><cell>0.35D 0.53W</cell><cell>0.78</cell><cell cols="2">0.91 0.80</cell><cell>0.23</cell><cell>0.04D 0.19S</cell></row><row><cell>CPA</cell><cell>-</cell><cell>0.88</cell><cell cols="2">0.96 0.95</cell><cell>-</cell><cell>-</cell></row></table><note>where d(α) is the shortest distance to one of the GT classes (as for CEA, also CTA GT columns may have multiple classes). For example, d(α) = 0 if α is a class in the ground truth (cscore(α) = 1), and d(α) = 2 if α is a grandchild of a class in the ground truth (cscore(α) = 0.49). Types in the higher level(s) of the KG type hierarchy are not considered in the GT (e.g., Q35120 [entity] in Wikidata). Given the correctness score cscore, approximated Precision (AP), Recall (AR), and F1-score (AF1) for the CTA evaluation are as follows:</note></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_4"><head>Table 5 :</head><label>5</label><figDesc>Usability evaluation details.</figDesc><table><row><cell>Open source</cell><cell>Availability as a Service</cell><cell>User Experience (GUI)</cell></row><row><cell>MTab</cell><cell></cell><cell></cell></row></table></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="1" xml:id="foot_0">http://www.cs.ox.ac.uk/isg/challenges/sem-tab/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_1">http://oaei.ontologymatching.org/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_2">http://ontologymatching.org/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_3">https://www.aicrowd.com/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="5" xml:id="foot_4">In SemTab 2021, also referred to as Hard Tables.</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="6" xml:id="foot_5">AIcrowd leaderboard scores 23 participants because of test submissions.</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="8" xml:id="foot_6">https://www.research.ibm.com/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="9" xml:id="foot_7">https://github.com/phucty/mtab tool</note>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgements</head><p>We would like to thank the challenge participants, the ISWC &amp; OM organisers, the AIcrowd team, and our sponsor IBM Research that played a key role in the success of SemTab. We also thank Paul Groth and C ¸agatay Demiralp for their contributions to GitTables. Moreover, we would like to thank Sirko Schindler and Birgitta König-Ries for their contribution to BiodivTab. This work was also supported by the SIRIUS Centre for Scalable Data Access (Research Council of Norway), Samsung Research UK, the EPSRC projects UK FIRES and ConCur, and the HFRI project ResponsibleER (No 969). DO and CP were supported by FCT through LASIGE (UIDB/00408/2020 and UIDP/00408/2020). We would also like to acknowledge that the work of the challenge organisers was greatly simplified by using the EasyChair conference management system and the CEUR-WS.org open-access publication service.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">JenTab Meets SemTab 2021&apos;s New Challenges</title>
		<author>
			<persName><forename type="first">N</forename><surname>Abdelmageed</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Schindler</surname></persName>
		</author>
		<ptr target=".org" />
	</analytic>
	<monogr>
		<title level="m">Semantic Web Challenge on Tabular Data to Knowledge Graph Matching (SemTab)</title>
				<imprint>
			<publisher>CEUR-WS</publisher>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">BiodivTab: A Tabular Benchmark based on Biodiversity Research Data</title>
		<author>
			<persName><forename type="first">N</forename><surname>Abdelmageed</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Schindler</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>König-Ries</surname></persName>
		</author>
		<ptr target=".org" />
	</analytic>
	<monogr>
		<title level="m">Semantic Web Challenge on Tabular Data to Knowledge Graph Matching (SemTab)</title>
				<imprint>
			<publisher>CEUR-WS</publisher>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">DBpedia: A Nucleus for a Web of Open Data</title>
		<author>
			<persName><forename type="first">S</forename><surname>Auer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Bizer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Kobilarov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Lehmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Cyganiak</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Ives</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">The Semantic Web</title>
				<meeting><address><addrLine>Berlin Heidelberg</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2007">2007</date>
			<biblScope unit="page" from="722" to="735" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">MantisTable V: A novel and efficient approach to Semantic Table Interpretation</title>
		<author>
			<persName><forename type="first">R</forename><surname>Avogadro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Cremaschi</surname></persName>
		</author>
		<ptr target=".org" />
	</analytic>
	<monogr>
		<title level="m">Semantic Web Challenge on Tabular Data to Knowledge Graph Matching (SemTab)</title>
				<imprint>
			<publisher>CEUR-WS</publisher>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">A Framework for Quality Assessment of Semantic Annotations of Tabular Data</title>
		<author>
			<persName><forename type="first">R</forename><surname>Avogadro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Cremaschi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Jiménez-Ruiz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Rula</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">20th International Semantic Web Conference (ISWC)</title>
				<imprint>
			<date type="published" when="2021">2021</date>
			<biblScope unit="page" from="528" to="545" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Kepler-aSI at SemTab</title>
		<author>
			<persName><forename type="first">W</forename><surname>Baazouzi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Kachroudi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Faiz</surname></persName>
		</author>
		<ptr target=".org" />
	</analytic>
	<monogr>
		<title level="m">Semantic Web Challenge on Tabular Data to Knowledge Graph Matching (SemTab)</title>
				<imprint>
			<publisher>CEUR-WS</publisher>
			<date type="published" when="2021">2021. 2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">SUS: a &apos;quick and dirty&apos; usability scale</title>
		<author>
			<persName><forename type="first">J</forename><surname>Brooke</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Usability evaluation in industry</title>
		<imprint>
			<biblScope unit="volume">189</biblScope>
			<biblScope unit="issue">3</biblScope>
			<date type="published" when="1996">1996</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">STILTool: A Semantic Table Interpretation evaLuation Tool</title>
		<author>
			<persName><forename type="first">M</forename><surname>Cremaschi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Siano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Avogadro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Jiménez-Ruiz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Maurino</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">ESWC 2020 Satellite Events</title>
				<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="61" to="66" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Tough Tables: Carefully Evaluating Entity Linking for Tabular Data</title>
		<author>
			<persName><forename type="first">V</forename><surname>Cutrona</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Bianchi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Jiménez-Ruiz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Palmonari</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">19th International Semantic Web Conference (ISWC)</title>
				<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="328" to="343" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Semantically-Enabled Optimization of Digital Marketing Campaigns</title>
		<author>
			<persName><forename type="first">V</forename><surname>Cutrona</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">D</forename><surname>Paoli</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Košmerlj</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Nikolov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Palmonari</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Perales</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Roman</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">International Semantic Web Conference (ISWC)</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2019">2019</date>
			<biblScope unit="page" from="345" to="362" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Matching Web Tables with Knowledge Base Entities: From Entity Lookups to Entity Embeddings</title>
		<author>
			<persName><forename type="first">V</forename><surname>Efthymiou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Hassanzadeh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Rodriguez-Muro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Christophides</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">ISWC</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2017">2017</date>
			<biblScope unit="volume">10587</biblScope>
			<biblScope unit="page" from="260" to="277" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Schema.Org: Evolution of Structured Data on the Web</title>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">V</forename><surname>Guha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Brickley</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Macbeth</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Commun. ACM</title>
		<imprint>
			<biblScope unit="volume">59</biblScope>
			<biblScope unit="issue">2</biblScope>
			<biblScope unit="page" from="44" to="51" />
			<date type="published" when="2016-01">jan 2016</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<monogr>
		<title level="m" type="main">GitTables: A Large-Scale Corpus of Relational Tables</title>
		<author>
			<persName><forename type="first">M</forename><surname>Hulsebos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">¸</forename><surname>Demiralp</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Groth</surname></persName>
		</author>
		<idno>CoRR, abs/2106.07258</idno>
		<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">DAGOBAH: Table and Graph Contexts For Efficient Semantic Annotation Of Tabular Data</title>
		<author>
			<persName><forename type="first">V.-P</forename><surname>Huynh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Chabot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Deuzé</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Labbé</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Monnin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Troncy</surname></persName>
		</author>
		<ptr target=".org" />
	</analytic>
	<monogr>
		<title level="m">Semantic Web Challenge on Tabular Data to Knowledge Graph Matching (SemTab)</title>
				<imprint>
			<publisher>CEUR-WS</publisher>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">Resources to Benchmark Tabular Data to Knowledge Graph Matching Systems</title>
		<author>
			<persName><forename type="first">E</forename><surname>Jimenez-Ruiz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Hassanzadeh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Efthymiou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Srinivas</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">The Semantic Web: ESWC</title>
				<imprint>
			<publisher>Springer International Publishing</publisher>
			<date type="published" when="2019">2019. 2020</date>
		</imprint>
	</monogr>
	<note>SemTab</note>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Results of SemTab</title>
		<author>
			<persName><forename type="first">E</forename><surname>Jiménez-Ruiz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Hassanzadeh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Efthymiou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Srinivas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Cutrona</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Semantic Web Challenge on Tabular Data to Knowledge Graph Matching co-located with the 19th International Semantic Web Conference (ISWC 2020)</title>
				<meeting>the Semantic Web Challenge on Tabular Data to Knowledge Graph Matching co-located with the 19th International Semantic Web Conference (ISWC 2020)</meeting>
		<imprint>
			<date type="published" when="2020">2020. 2020</date>
			<biblScope unit="page" from="1" to="8" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">A large public corpus of web tables containing time and context metadata</title>
		<author>
			<persName><forename type="first">O</forename><surname>Lehmberg</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Ritze</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Meusel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Bizer</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">WWW</title>
				<imprint>
			<date type="published" when="2016">2016</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Annotating and searching web tables using entities, types and relationships</title>
		<author>
			<persName><forename type="first">G</forename><surname>Limaye</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Sarawagi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Chakrabarti</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">VLDB Endowment</title>
		<imprint>
			<biblScope unit="volume">3</biblScope>
			<biblScope unit="issue">1-2</biblScope>
			<biblScope unit="page" from="1338" to="1347" />
			<date type="published" when="2010">2010</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">Tabular Data Annotation with MTab Tool</title>
		<author>
			<persName><forename type="first">P</forename><surname>Nguyen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Yamada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Kertkeidkachorn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Ichise</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Takeda</surname></persName>
		</author>
		<author>
			<persName><surname>Semtab</surname></persName>
		</author>
		<ptr target=".org" />
	</analytic>
	<monogr>
		<title level="m">Semantic Web Challenge on Tabular Data to Knowledge Graph Matching (SemTab)</title>
				<imprint>
			<publisher>CEUR-WS</publisher>
			<date type="published" when="2021">2021. 2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">SemTab</title>
		<author>
			<persName><forename type="first">D</forename><surname>Oliveira</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Pesquita</surname></persName>
		</author>
		<idno type="DOI">10.5281/zenodo.5606585</idno>
	</analytic>
	<monogr>
		<title level="j">BioTable Dataset</title>
		<imprint>
			<date type="published" when="2021-10">2021. Oct. 2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">A framework to conduct and report on empirical user studies in semantic web contexts</title>
		<author>
			<persName><forename type="first">C</forename><surname>Pesquita</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Ivanova</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Lohmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Lambrix</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">European Knowledge Acquisition Workshop</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2018">2018</date>
			<biblScope unit="page" from="567" to="583" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<analytic>
		<title level="a" type="main">Matching HTML Tables to DBpedia</title>
		<author>
			<persName><forename type="first">D</forename><surname>Ritze</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Lehmberg</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Bizer</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 5th International Conference on Web Intelligence, Mining and Semantics, WIMS</title>
				<meeting>the 5th International Conference on Web Intelligence, Mining and Semantics, WIMS</meeting>
		<imprint>
			<publisher>ACM</publisher>
			<date type="published" when="2015">2015</date>
			<biblScope unit="volume">10</biblScope>
			<biblScope unit="page">6</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">MAGIC: Mining an Augmented Graph using INK, starting from a CSV</title>
		<author>
			<persName><forename type="first">B</forename><surname>Steenwinckel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">D</forename><surname>Turck</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Ongenae</surname></persName>
		</author>
		<ptr target=".org" />
	</analytic>
	<monogr>
		<title level="m">Semantic Web Challenge on Tabular Data to Knowledge Graph Matching (SemTab)</title>
				<imprint>
			<publisher>CEUR-WS</publisher>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<analytic>
		<title level="a" type="main">Wikidata: a free collaborative knowledge base</title>
		<author>
			<persName><forename type="first">D</forename><surname>Vrandecic</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Krötzsch</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Commun. ACM</title>
		<imprint>
			<biblScope unit="volume">57</biblScope>
			<biblScope unit="issue">10</biblScope>
			<biblScope unit="page" from="78" to="85" />
			<date type="published" when="2014">2014</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b24">
	<analytic>
		<title level="a" type="main">Knowledge Graphs 2021: A Data Odyssey</title>
		<author>
			<persName><forename type="first">G</forename><surname>Weikum</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. VLDB Endow</title>
				<meeting>VLDB Endow</meeting>
		<imprint>
			<date type="published" when="2021">2021</date>
			<biblScope unit="volume">14</biblScope>
			<biblScope unit="page" from="3233" to="3238" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b25">
	<analytic>
		<title level="a" type="main">GBMTab: A Graph-Based Method for Interpreting Semantic Table to Knowledge Graph</title>
		<author>
			<persName><forename type="first">L</forename><surname>Yang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Shen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Ding</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Jin</surname></persName>
		</author>
		<ptr target=".org" />
	</analytic>
	<monogr>
		<title level="m">Semantic Web Challenge on Tabular Data to Knowledge Graph Matching (SemTab)</title>
				<imprint>
			<publisher>CEUR-WS</publisher>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
