<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">A Principled Approach to Data Integration and Reconciliation in Data Warehousing</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Diego</forename><surname>Calvanese</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Dipartimento di Informatica e Sistemistica</orgName>
								<orgName type="institution">Università di Roma &quot;La Sapienza&quot;</orgName>
								<address>
									<addrLine>Via Salaria 113</addrLine>
									<postCode>00198</postCode>
									<settlement>Roma</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff0">
								<orgName type="department">Dipartimento di Informatica e Sistemistica</orgName>
								<orgName type="institution">Università di Roma &quot;La Sapienza&quot;</orgName>
								<address>
									<addrLine>Via Salaria 113</addrLine>
									<postCode>00198</postCode>
									<settlement>Roma</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Giuseppe</forename><surname>De Giacomo</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Dipartimento di Informatica e Sistemistica</orgName>
								<orgName type="institution">Università di Roma &quot;La Sapienza&quot;</orgName>
								<address>
									<addrLine>Via Salaria 113</addrLine>
									<postCode>00198</postCode>
									<settlement>Roma</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff0">
								<orgName type="department">Dipartimento di Informatica e Sistemistica</orgName>
								<orgName type="institution">Università di Roma &quot;La Sapienza&quot;</orgName>
								<address>
									<addrLine>Via Salaria 113</addrLine>
									<postCode>00198</postCode>
									<settlement>Roma</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Maurizio</forename><surname>Lenzerini</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Dipartimento di Informatica e Sistemistica</orgName>
								<orgName type="institution">Università di Roma &quot;La Sapienza&quot;</orgName>
								<address>
									<addrLine>Via Salaria 113</addrLine>
									<postCode>00198</postCode>
									<settlement>Roma</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff0">
								<orgName type="department">Dipartimento di Informatica e Sistemistica</orgName>
								<orgName type="institution">Università di Roma &quot;La Sapienza&quot;</orgName>
								<address>
									<addrLine>Via Salaria 113</addrLine>
									<postCode>00198</postCode>
									<settlement>Roma</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Daniele</forename><surname>Nardi</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Dipartimento di Informatica e Sistemistica</orgName>
								<orgName type="institution">Università di Roma &quot;La Sapienza&quot;</orgName>
								<address>
									<addrLine>Via Salaria 113</addrLine>
									<postCode>00198</postCode>
									<settlement>Roma</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff0">
								<orgName type="department">Dipartimento di Informatica e Sistemistica</orgName>
								<orgName type="institution">Università di Roma &quot;La Sapienza&quot;</orgName>
								<address>
									<addrLine>Via Salaria 113</addrLine>
									<postCode>00198</postCode>
									<settlement>Roma</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Riccardo</forename><surname>Rosati</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Dipartimento di Informatica e Sistemistica</orgName>
								<orgName type="institution">Università di Roma &quot;La Sapienza&quot;</orgName>
								<address>
									<addrLine>Via Salaria 113</addrLine>
									<postCode>00198</postCode>
									<settlement>Roma</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff0">
								<orgName type="department">Dipartimento di Informatica e Sistemistica</orgName>
								<orgName type="institution">Università di Roma &quot;La Sapienza&quot;</orgName>
								<address>
									<addrLine>Via Salaria 113</addrLine>
									<postCode>00198</postCode>
									<settlement>Roma</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">A Principled Approach to Data Integration and Reconciliation in Data Warehousing</title>
					</analytic>
					<monogr>
						<imprint>
							<date/>
						</imprint>
					</monogr>
					<idno type="MD5">49AD8BB7F0C89C54C679A38A117A7A2E</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-24T08:40+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Integration is one of the most important aspects of a Data Warehouse. When data passes from the sources of the application-oriented operational environment to the Data Warehouse, possible inconsistencies and redundancies should be resolved, so that the warehouse is able to provide an integrated and reconciled view of data of the organization. We describe a novel approach to data integration and reconciliation, based on a conceptual representation of the Data Warehouse application domain. The main idea is to declaratively specify suitable matching, conversion, and reconciliation operations to be used in order to solve possibile conflicts among data in different sources. Such a specification is provided in terms of the conceptual model of the application, and is effectively used during the design of the software modules that load the data from the sources into the Data Warehouse.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introduction</head><p>Information Integration is the problem of acquiring data from a set of sources that are available for the application of The copyright of this paper belongs to the paper's authors. Permission to copy without fee all or part of this material is granted provided that the copies are not made or distributed for direct commercial advantage.</p><p>of the organization is available.</p><p>The above considerations motivate the approach to information integration proposed in [CDGL • 98d], whose distinguishing feature is to exploit the possibility of representing the conceptual level of a Data Warehouse in a very expressive language and use reasoning tools to support the Data Warehouse construction, maintenance and evolution. In fact, the idea is to balance the effort of building a conceptual model of the Data Warehouse by improving the capabilities of the system in maintaining the Data Warehouse and support the incremental addition of information sources. The proposed approach follows a local as view paradigm, by explicitly requiring an enterprise conceptual model which is therefore regarded as a unified view of the data available within the organization. Most of the work on integration has been concerned with the intensional/schema level, while less attention has been devoted to the problem of data integration at the extensional level. Integration of data is, nonetheless, at the heart of Data Warehousing <ref type="bibr" target="#b13">[Inm96]</ref>. When data passes from the application-oriented operational environment to the Warehouse, possible inconsistencies and redundancies should be resolved, so that the Warehouse is able to provide an integrated and reconciled view of data of the organization. Thus, in the context of a Data Warehouse, data integration and reconciliation is the process of acquiring data from the sources and making them available within the Warehouse.</p><p>Given a request for data (e.g., for materializing a new relation in the Data Warehouse), which is formulated in terms of the global view of the corporate data, (i.e., not the language of the sources, but of the enterprise), there are several steps that enable for the acquisition of data from the sources:</p><p>1. Identification of the sources where the relevant information resides. Note that this task is typical of the local-as-view approach, and requires algorithms that are generally both sophisticated and costly <ref type="bibr" target="#b0">[AD98,</ref><ref type="bibr" target="#b17">LMSS95]</ref>.</p><p>2. Decomposition of the user request into queries to individual sources that would return the data of interest.</p><p>3. Interpretation of the data provided by a source. Interpreting data can be regarded as the task of casting them into a common representation, which can thereafter be used to manipulate the data.</p><p>4. Merging of the data. The data returned by various sources need to be combined to provide the Data Warehouse with the requested information.</p><p>In commercial environments for Data Warehouse design and management the above tasks are taken care of through ad-hoc components <ref type="bibr" target="#b16">[JLVV99]</ref>. In general, such a component provides the user with the capability of specifying the mapping between the sources and the Data Warehouse by browsing through a meta-level description of the relations of the sources. In addition, it generally provides both for automatic code generators and for the possibility of attaching procedures to accomplish ad hoc transformations and filtering of the data. Even though there are powerful and effective environments with the above features, their nature is inherently procedural and close to the notion of global as view, where the task of relating the sources with the Data Warehouse is done on a query-by-query basis.</p><p>Several recent research contributions address the same problem from a more formal perspective [HGMW • 95, Wid95, GM95, HZ96, ZHK96, ZHKF95, PGMW95, GMS94]. For example, a methodology for extracting, comparing and matching data and objects located in different sources is described in <ref type="bibr" target="#b18">[PGMW95]</ref>. The methodology is based on the Object Exchange Model, which requires the explicit semantic labeling of the objects, to support object exchange, and emphasizes the need for a tight interaction between the system and the user. However, the method remains of procedural nature, since it requires the user to build and maintain the relationship between the sources and the Data Warehouse on a query-by-query basis.</p><p>The approach proposed in <ref type="bibr" target="#b8">[GMS94]</ref> is more declarative in nature. Suitable data structures for reconciling different representations of the same data are represented in a context theory, which is used by the system to transform the queries as appropriate for gathering the data from the various sources. In such a declarative approach, the user is not directly concerned with the identification and resolution of semantic conflicts when formulating the requests for data. Rather, once the specification of the sources is available, conflicts are detected by the system, and conversion and filtering are automatically enforced. However, the method still follows the global-as-view approach, and the context theory is used as a description of reconciled data structures, rather than as the conceptual model of the corporate data.</p><p>In this paper we present the approach to data integration and reconciliation proposed within the DWQ (Data Warehouse Quality) project <ref type="bibr" target="#b15">[JJQV98,</ref><ref type="bibr" target="#b16">JLVV99]</ref>. In DWQ, the ultimate goal of source integration and data reconciliation is to represent the migration of the data from the sources to the Data Warehouse, in order to support the design of materialized views that meet user requirements, and have high quality with respect to correctness, interpretability, usefulness, believability. The method for data integration and reconciliation builds upon and extends the work in [CDGL • 98d], therefore relying on the availability of a Conceptual Model to declaratively represent the relationship between the sources and the Data Warehouse. The declarative approach is further pursued in the task of data integration and reconciliation, where the system is given a declarative description of the data in the sources and provides automatic support in satisfying the data requests for populating the Data Warehouse.</p><p>Compared with the existing proposals mentioned above, ¯It uses such correspondences for supporting the task of specifying the correct mediators for the loading of the materialized views of the Data Warehouse.</p><p>Our methodology relies on a novel query rewriting algorithm, whose role is to reformulate the query that defines the view to materialize in terms of both the source relations and the interschema correspondences. The paper is organized as follows. In Section 2, we summarize the relevant features of the proposed approach to information integration. Section 3 illustrates the method we use to describe the content of the sources at the logical level. Section 4 is devoted to a discussion of the meaning and the role of interschema correspondences. Section 5 describes the query rewriting algorithm at the basis of our approach to the design of mediators. Section 6 concludes the paper.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">The DWQ framework</head><p>In this section we briefly describe the general framework adopted in the DWQ project [CDGL • 98d]. The proposed framework allows one to explicitly model data and information needs -i.e., a specification of the data that the Data Warehouse provides to the user -at various lev-</p><formula xml:id="formula_0">els [CDGL • 98b, CDGL • 98d, CDGL • 98c]:</formula><p>¯The conceptual level contains a conceptual representation of the corporate data.</p><p>¯The logical level contains a representation in terms of a logical data model of the sources and of the data materialized in the Data Warehouse.</p><p>¯The physical level contains a store for the materialized data, wrappers for the sources and mediators for loading the materialized data store.</p><p>The relationship between the conceptual and the logical, and between the logical and the physical level is represented explicitly by specifying mappings between corresponding objects of the different levels.</p><p>We briefly describe the conceptual and logical levels, referring to the abstract architecture of DWQ as depicted in Figure <ref type="figure">1</ref>.</p><p>The Conceptual Model is a conceptual representation of the data managed by the enterprise, including a conceptual representation of the data residing in sources, and of the global concepts and relationships that are of interest to the Data Warehouse application. The conceptual model is expressed in terms of an enriched Entity-Relationship model in which complex entity and relationship expressions can be constructed and used, and in which interdependencies between elements of different sources and of the enterprise are captured using intermodel assertions [CDGL • 98b, CL93]. Intermodel assertions provide a simple and effective declarative mechanism to express the dependencies that hold between entities (i.e. classes and relationships) in different models <ref type="bibr" target="#b11">[Hul97]</ref>. The use of intermodel assertions allows for an incremental approach to the integration of the conceptual models of the sources and of the enterprise. Due to space limitations, we cannot consider this aspect in further detail, and refer the interested</p><formula xml:id="formula_1">reader to [CDGL • 98b].</formula><p>The conceptual representation contains, besides entities and relationships, also a description of domains, which are used to typify the attributes of entities and relationships.</p><p>Rather than considering only concrete domains, such as strings, integers, and reals, our approach is based on the use of abstract domains. An abstract domain may have an underlying concrete domain, but allows the designer to distinguish between the different meanings that a value of the concrete domain may have. Additionally, also Boolean combinations of domains and the possibility to construct an ISA hierarchy between domains are supported.</p><p>Example 1 Consider two attributes ½ in a source and ¾ in the Data Warehouse, both representing amounts of money. Rather than specifying that both attributes have values of type real, the designer may specify that the domain of attribute ½ is MoneyInLire while the domain of at- tribute ¾ is MoneyInEuro, both of which have real as the underlying concrete domain. In this way, it becomes possible to specify declaratively the difference between values of the two attributes, and take into account such knowledge for loading data from the source to the Data Warehouse.</p><p>We provide an example of the form of the Conceptual Model, and refer to [CDGL • 98b] for a more detailed de- scription of the adopted formalism.</p><p>Example 2 As our running example we consider an enterprise and two sources containing information about contracts between customers and departments for services, and about registration of customers at departments. Source 1 contains information about customers registered at publicrelations departments. Source 2 contains information about contracts and complete information about services. Such situation can be represented by means of the ER diagrams shown in Figure <ref type="figure" target="#fig_0">2</ref>, together with the following intermodel assertions (Ú represents ISA while represents equivalence):</p><formula xml:id="formula_2">Department ½ PrDept ¼ REG-AT½ Ú REG-AT¼ Customer½ Customer¼ Ù ´ ½ °½ ´REG-AT¼ Ù ´°¾ PrDept ¼ µµµ Customer¼ Ù ´ ½ °½ CONTRACT¼µ Ú ´ ½ °½ PROMOTION½µ Customer¾ Ú Customer¼ Ù ´ ½ °½ CONTRACT¼µ Department ¾ Ú Department ¼ Service¾ Service¼ CONTRACT¾ Ú CONTRACT¼ Customer½ Customer¼ Department ½ Department ¼</formula><p>and the following domain hierarchy:</p><formula xml:id="formula_3">PersNameString Ú String DeptNameString Ú String SSNString Ú String DeptCodeInteger Ú Integer ServNoInteger Ú Integer</formula><p>At the logical level, the logical content of each source, called the Source Schema (see Section 3), is provided in terms of a set of relational tables using the relational model. The link between the logical representation and the conceptual representation of the source is formally defined by associating with each table a query that describes its content in terms of a query over the Conceptual Model. In other words, the logical content of a source table is described in terms of a view over the Conceptual Model. To map physical structures to logical structures we make use of suitable wrappers, which encapsulate the sources. The wrapper hides how the source actually stores its data, the data model it adopts, etc., and presents the source as a set of relational tables. In particular, we assume that all attributes in the tables are of interest to the Data Warehouse application (attributes that are not of interest are hidden by the wrapper). The logical content of the materialized views constituting the Data Warehouse, called the Data Warehouse Schema (see Section 4), is provided in terms of a set of relational tables. Similarly to the case of the sources, each table of the Data Warehouse Schema is described in terms of a view over the Conceptual Model. As we said before, the way in which a view is actually materialized, starting from the data in the sources, is specified by means of mediators.</p><p>In such a framework, we have devised suitable inference techniques, which allow for carrying out several reasoning services on both the conceptual representation, such as inferring inclusion between entities and relationships, satisfiability of entities, etc. [CDGL • 98d], and the logical rep- resentation, such as query containment <ref type="bibr" target="#b1">[CDGL98a]</ref>, which is at the basis of query rewriting. The possibilities offered by such reasoning tools are used in the accomplishment of several activities concerning both the design and the operation of the Data Warehouse.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">Source schema description</head><p>In this section we focus on the specification of the logical schemas of the sources. Such schemas are intended to provide a structural description of the content of the sources, which are encapsulated by suitable wrappers.</p><p>We describe a source Ë by associating to each relational table Ì of Ë an adorned query that is constituted by a head, a body, and an adornment:</p><p>¯The head defines the relational schema of the table in terms of a name, and the number of columns.</p><p>¯The body describes the content of the table in terms of a query over the Conceptual Model.</p><note type="other">ServNo ServNoInteger Name PersNameString SSN SSNString DOB Date Name DeptNameString</note><p>Code DeptCodeInteger ¯The adornment declares the domains of the columns of the table, and also which are the attributes of the table that are used to identify an entity of the Conceptual Model.</p><formula xml:id="formula_4">1 1 3 2 2 Customer ¼ Service ¼ CONTRACT ¼ Department ¼ PrDept ¼ REG-AT ¼ SSN SSNString Name DeptNameString 1 2 PROMOTION ½ 2 1 Customer ½ REG-AT ½ Department ½ ServNo ServNoInteger Name DeptNameString Name PersNameString DOB Date 1 2 3 Customer ¾ Service ¾ CONTRACT ¾ Department ¾</formula><p>We now present in detail the notions introduced above.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1">Query over the Conceptual Model</head><p>Generally speaking, the connection to the Conceptual Model is established by defining each table as a relational query over the elements of the Conceptual Model.</p><p>A query Õ for a Conceptual Model Å is a non-recursive Datalog query, written in the form:</p><formula xml:id="formula_5">Õ´ Üµ ÓÒ ½ ´ Ü Ý ½ µ ÇÊ ¡ ¡ ¡ ÇÊ ÓÒ Ñ</formula><p>´ Ü Ý Ñ µ where each ÓÒ ´ Ü Ý µ is a conjunction of atoms or negated atoms, and Ü Ý are all the variables appearing in the conjunct. Each atom is either of the forms ´Øµ or of the form Ê´ Øµ, where Ø, Ø, and Ø ¼ are variables in Ü Ý or constants, and and Ê, and entities and relationships of Å respectively.</p><p>The semantics of queries is as follows. Given an interpretation Á of a Conceptual Model Å with interpretation domain ¡ Á , a query Õ of arity Ò is interpreted as the set Õ Á of Ò-tuples ´ ½ Ò µ, with each ¾ ¡ Á , such that, when substituting each for Ü , the formula</p><formula xml:id="formula_6">Ý ½ . ÓÒ ½ ´ Ü Ý ½ µ ÇÊ ¡ ¡ ¡ ÇÊ Ý Ñ . ÓÒ Ñ ´ Ü Ý Ñ µ evaluates to true in Á .</formula><p>The fact that a relation in a source is defined in terms of a query over the Conceptual Model confirms that we are following the local-as-view approach: each table is seen as a view of the virtual database represented by the Conceptual Model.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2">Adornment</head><p>To make the connection to the Conceptual Model precise, it is not sufficient to define each table as a relational query over the elements of the Conceptual Model. We need to make it explicit how the objects of the conceptual representation are coded into values of the logical representation. The notion of adorned query is introduced exactly for this purpose.</p><p>An adorned query is an expression of the form Ì ´ Üµ Õ´ Ü Ýµ « ½ « Ò where Ì is the name of the relational table, Ü are its at- tributes (observe that attributes denote values and not objects), Õ´ Ü Ýµ is a query as defined above, and each « is an annotation on variables appearing in Ü. In particular:</p><p>1. For each ¾ Ü, we have an annotation of the form</p><formula xml:id="formula_7">Î</formula><p>where Î is a domain expression. Such an annotation is used to specify how values bound to are represented in the table at the logical level. For example, which currency is used for a real value denoting an amount of money.</p><p>2. For each tuple of variables Þ Ü that is used for iden- tifying in Ì an object ¾ Ý mentioned in Õ´ Ü Ýµ, we have an annotation of the form</p><formula xml:id="formula_8">ÒØ Ý ´ Þ µ</formula><p>For example, the designer may assert that the attributes first name, last name, and date of birth in a table are used to identify students.</p><p>We point out that our method is able to cope with several schematic differences that may be present in the sources <ref type="bibr" target="#b19">[SK92]</ref>. We illustrate this point with the help of an example.</p><p>Example 3 Suppose that the Conceptual Model contains a relationship Service with three attributes, Date, ServiceNo, and Price, where Service´ Ë È µ means that at the date the service Ë costs È Euro. Suppose that Source Ë ½ represents the same kind of information only on Services Ú ½ and Ú ¾ , by means of two tables: v1 and v2, where v1´ Èµ means that service Ú ½ costs È Italian Lira at date , and v2´ Èµ means that service Ú ¾ costs È Italian Lira at date . Suppose that Source Ë ¾ represents the same kind of information only on Services Ú ¿ and Ú by means of a table Serv, where Serv´ µ means that services Ú ¿ and Ú cost and Euro respectively at date . Finally, suppose that Source Ë ¿ represents the information only for a certain date by means of another table Serv ¿ . The various tables in the three sources can be specified by means of the following adorned queries:</p><formula xml:id="formula_9">v1´ Èµ Service´ ¼ v1 ¼ È µ È ItalianLira Date v2´ Èµ Service´ ¼ v2 ¼ È µ È ItalianLira Date Serv´ µ Service´ ¼ v3 ¼ µ Service´ ¼ v4 ¼ µ Euro Euro Date Serv¿´Ë½ È µ Service´ Ë½ È µ Code´Ë Ë½µ È Euro ÒØ Ý´ Ë½ Ë µ Ë½ String</formula><p>The above example illustrates a case where there are various schematic differences, both among the sources, and between the sources and the Conceptual Model. The mechanisms used in our methodology for specifying adorned queries is able to cope with such differences.</p><p>The adorned query associated to a table in a source contains a lot of information that can be profitably used in analyzing the quality of the Data Warehouse design process. Indeed, the adorned query precisely formalizes the content of a source table in terms of a query over the Conceptual Model, the domains of each attribute of the table, and the attributes used to identify entities at the conceptual level. One important check that we can carry out over the logical specification of a source is whether the adorned query associated with a table in a source is consistent or not. Let É be an adorned query and let be its body. The query is said to be inconsistent with respect to the Conceptual Model Å, if for every database coherent with Å, the evaluation of with respect to is empty. An adorned query É is inconsistent with respect to the Conceptual Model Å either because the body of É is inconsistent with respect to Å, or because the annotations are incoherent with respect to what specified in Å. The inference techniques described in [CDGL • 98d] allow us to check the consistency of the relational tables defined for describing a source.</p><p>Example 2 (cont.) Assuming that in Source 1 a customer is actually identified by its social security number, and a department by its name, we can specify the relational table <ref type="table">TABLE</ref> ½ by the following adorned query:</p><formula xml:id="formula_10">TABLE½´Ë Å È µ REG-AT½´ µ PROMOTION½´ µ È false SSN´ Ë µ Name´ Åµ ÇÊ PROMOTION½´ µ È true SSN´ Ë µ Name½´ Åµ ÒØ Ý´ Ë µ Ë SSNString ÒØ Ý´ Å µ Å DeptNameString È Boolean</formula><p>Additionally, we assume that in Source 2 the actual data can be described in terms of a relational table </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Interschema Correspondences</head><p>We now describe how to define Interschema Correspondences, which are used to declaratively specify the correspondences between data in different schemas (either source schemas or data warehouse schema).</p><p>In our approach, Interschema Correspondences are defined in terms of relational tables, similarly to the case of the relations describing the sources at the logical level. The difference with source relations is that we conceive interschema correspondences as non-materialized relational tables, in the sense that their extension is computed by an associated program whenever it is needed. It follows that, to each interschema correspondence, we associate a head, a body, and an adornment. Differently from the case of a source relation, the adorment specifies which is the program that is able to compute the extension of the virtual table.</p><p>We distinguish among three types of correspondences, namely Conversion, Matching, and Reconciliation Correspondences.</p><p>Conversion Correspondences are used to specify that data in one source can be converted into data of a different source or of the data warehouse, and how this conversion is performed. They are used to anticipate several types of data conflicts that may occur in loading data.</p><p>As an example, suppose that in a table of a source costs are represented in Italian Lira, while in a table of the Data Warehouse we want to express them in Euro. Then, in order to use the source table in the rewriting of a query that defines the Data Warehouse table, it is necessary to know about the possibility of converting each amount in Italian Lira into an amount in Euro.</p><p>A Conversion Correspondence ÓÒÚ ÖØ has the following form:</p><formula xml:id="formula_11">ÓÒÚ ÖØ´ Ü Ý µ ÓÒ ´ Ü Ý Þµ through ÔÖÓ Ö Ñ´ Ü Ý Þµ</formula><p>where ÓÒ is a conjunctive query, which specifies the conditions under which the conversion is applicable, and ÔÖÓ Ö Ñ is a predicate that we assume associated to a program that performs the conversion. In general, the program needs to take into account the additional parameters specified in the condition to actually perform the conversion. The conversion has a direction. In particular, it operates from a tuple of values satisfying the conditions specified for Ü in ÓÒ to a tuple of values satisfying the conditions specified for Ý. This means that the conversion program receives as input a tuple Ü, and returns the corresponding tuple Ý, possibly using the additional parameter Þ to perform the conversion.</p><p>Matching Correspondences are used to specify how data in different sources can match. A Matching Correspondence Ñ Ø has the following form:</p><formula xml:id="formula_12">Ñ Ø ´ Ü ½ Ü µ ÓÒ ´ Ü ½ Ü Þµ through ÔÖÓ Ö Ñ´ Ü Ü Þµ</formula><p>where ÓÒ specifies the conditions under which the matching is applicable, and ÔÖÓ Ö Ñ is a predicate that we assume associated to a program that performs the matching. The program receives as input tuples of values satisfying the conditions (and possibly the additional parameters in the condition) and returns whether they match or not. Note that already specified Interschema Correspondences may be used to define new ones. As an example, the designer may want to define a Matching Correspondence between two tuples by using two already defined Conversion Correspondences, which convert to a common representation, and then by using equality. In this case, he could provide the following definition of the Matching Correspondence:</p><formula xml:id="formula_13">Ñ Ø ´ Ü Ý µ ÓÒÚ ÖØ ½ ´ Ü Þ µ ÓÒÚ ÖØ ¾ ´ Ý Þ µ ÓÒ ´ Ü Ý Þ Ûµ through ÒÓÒ</formula><p>Observe that, in this case, the program associated to the Matching Correspondence is empty, since the actual con-versions are performed by the programs associated to the Conversion Correspondences.</p><p>Reconciliation Correspondences are used to assert how we can reconcile data in different sources into data of the data warehouse. A Reconciliation Correspondence Ö ÓÒ Ð has the following form:</p><formula xml:id="formula_14">Ö ÓÒ Ð ´ Ü ½ Ü Þ µ ÓÒ ´ Ü Ü Þ Ûµ through ÔÖÓ Ö Ñ´ Ü ½ Ü Þ Ûµ</formula><p>where ÓÒ specifies the conditions under which the reconciliation is applicable, and ÔÖÓ Ö Ñ is a predicate that we assume associated to a program that performs the reconciliation. Such correspondence specifies that the tuples of values Ü ½ Ü coming from the sources are reconciled to the tuple Þ in the Data Warehouse. Therefore, the associated program receives as input tuples of values (and possibly the additional parameters in the condition) and returns a reconciled tuple. Again, a Reconciliation Correspondence could simply be defined as a combination of appropriate Matching and Conversion Correspondences, e.g.,</p><formula xml:id="formula_15">Ö ÓÒ Ð ´ Ü Ý Þ µ ÓÒÚ ÖØ ½ ´ Ü Û ½ µ ÓÒÚ ÖØ ¾ ´ Ý Û ¾ µ Ñ Ø ½ ´ Û ½ Û ¾ µ ÓÒÚ ÖØ ¿ ´ Û ½ Þ µ ÓÒ ´ Ü Ý Û ½ Û ¾ Þµ through ÒÓÒ</formula><p>In practice, several of the Interschema Correspondences that must be specified will have a very simple form, since they will correspond simply to equality in the case of a matching and to identity in the case of a conversion. Therefore, in order to simplify the task of the designer in specifying the various interschema correspondences, we assume that several correspondences are automatically asserted by default by the system. In particular, for each domain in the conceptual model, the following Interschema Correspondences are specified by default:</p><formula xml:id="formula_16">ÓÒÚ ÖØ´ µ ´ µ ´ µ through ÒØ ØÝ´ µ Ñ Ø ´ µ ´ µ ´ µ through ÒÓÒ Ö ÓÒ Ð ´ µ ´ µ ´ µ ´ µ through ÒØ ØÝ´ µ</formula><p>where ÒØ ØÝ is the program that computes the identity function for values of domain , and the matching correspondence has no associated program.</p><p>The system allows the designer to inhibit the default correspondences for a certain domain, simply by providing an alternative interschema correspondence referring to that domain.</p><p>Moreover, we assume that for each Conversion Correspondence ÓÒÚ ÖØ asserted by the designer, the system automatically asserts a new Matching Correspondence Ñ Ø as follows:</p><formula xml:id="formula_17">Ñ Ø ´ Ü Ý µ ÓÒÚ ÖØ ´ Ü Þ µ Ý Þ through ÒÓÒ</formula><p>Moreover, for each Conversion Correspondence ÓÒÚ ÖØ asserted by the designer and for each Matching Correspondence Ñ Ø asserted by the designer or by default, the system automatically asserts a new Reconciliation Correspondence Ö ÓÒ Ð as follows: In other words, we are trying to reformulate Õ in terms of the relations in the sources, and possibly in terms of the matching, conversion, and reconciliation predicates. If there are different rewritings, then we choose the best rewriting Ö with respect to suitable quality parameters. There are several criteria to be taken into consideration when evaluating the quality of a rewriting, such as:</p><formula xml:id="formula_18">Ö ÓÒ Ð ´ Ü Ý Þ µ Ñ Ø ´ Ü Ý µ ÓÒÚ ÖØ ´ Ü Þ µ through ÒÓÒ Example 2 (cont</formula><p>¯Completeness of the rewriting. Obviously, the best situation is the one where the rewriting is complete, in the sense that the rewritten query is equivalent to the original query. Such a check can be done by exploiting the algorithm for query containment.</p><p>¯Accuracy, confidence, freshness, and availability of data in the source relations that the rewriting requires to access.</p><p>The resulting query is the specification for the design of the mediator associated to Ì . The most critical step of the above method is the computation of the rewriting. Our rewriting algorithm is based on the method presented in <ref type="bibr" target="#b6">[DL97]</ref>, modified to take into account the following aspects:</p><p>¯We deal with queries whose atoms refer to a conceptual model that includes ISA assertions and a limited form of functional dependencies. Such constraints have to be considered in the computation of the rewriting.</p><p>¯We deal with queries that are disjunctions of conjunctions. It follows that the rewriting itself is in general a disjunction, and therefore, we need to deal with the problem of merging the results of several queries. This problem is addressed by the notion of merging clause.</p><p>In particular, if the query Ö computed by the rewriting is an ÓÖ-query (i.e., it is constituted by more than one disjunct), then the algorithm associates to Ö a suitable set of so-called merging clauses, taking into account that the answers to the different ÓÖ-parts of the query may contain objects and values that represent the same real world entity or the same value. A merging clause is an expression of the form merging tuple-spec ½ and ¡ ¡ ¡ and tuple-spec Ò such that matching-condition into tuple-spec Ø½ and ¡ ¡ ¡ and tuple-spec ØÑ where tuple-spec denotes a tuple returned by the -th disjunct of Ö, matching-condition specifies how to merge the various tuples denoted by tuplespec ½ ,. . . ,tuple-spec Ò , and tuple-spec Ø½ ,. . . ,tuple- spec ØÑ denote the tuples in Ì resulting from the merging.</p><p>We observe that the rewriting algorithm is able to generate one merging clause template for each pair of disjuncts that are not disjoint. Starting from such templates, the designer may either specify the such that and the into parts, depending on the intended semantics, or change the templates in order to specify a different merging plan (for example for merging three disjuncts, rather than three pairs of disjuncts).</p><p>¯The algorithm computes the maximally contained rewriting (i.e., every other rewriting is included in the one computed by the query), but we also want to inform the designer whether such a rewriting is equivalent or not to the original query. Indeed, we have devised an effective method for checking equivalence between the original query and the computed rewriting <ref type="bibr" target="#b1">[CDGL98a]</ref>.</p><p>¯Besides the relational tables in the sources, our rewriting algorithm takes into account the matching, conversion, and reconciliation predicates defined in the interschema correspondences.</p><p>¯Even when no rewriting exists for the query (i.e., when the maximally contained rewriting is empty), we want to provide the designer with useful indications on whether there is a method for enriching the Interschema Correspondences to get a non-empty rewriting. Indeed, our rewriting algorithm adopts a form of abductive reasoning that enables to single out the specification of which matching, conversion and reconciliation operations would allow to get a non-empty rewriting. This indication can be profitably used by the designer to check whether she/he can add new Interschema Correspondences in order to make the computed rewriting complete.</p><p>Example 2 (cont.) Suppose we want to store in the Data Warehouse a relation containing the information about customers that have a contract for a certain service with a department at which they are also registered, or that are eligible for a promotion. Independently from the fact that the customer has a contract, we want to include the information on whether he is eligible for a promotion. We can make use of a relational table TDW with four components, defined by the following adorned query, where we have assumed that in the Data Warehouse we want to identify customers by their SSN, services by their service number, and departments by their code: In this case the merging clause simply reduces to a disjunction.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6">Conclusions</head><p>We have described a new approach to data integration and reconciliation in Data Warehousing. The approach is based on the availability of a Conceptual Model of the corporate data, and allows the designer to declaratively specify several types of correspondences between data in different sources. Such correspondences are used by a query rewriting algorithm that supports the task of specifying the correct mediators for the loading of the materialized views of the Data Warehouse.</p><p>Based on the described methodology, we are currently implementing a design tool within the DWQ project. The tool is based on the Concept Base System <ref type="bibr" target="#b14">[Jar92]</ref>, and provides support for both schema and data integration in Data Warehousing.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 2 :</head><label>2</label><figDesc>Figure 2: Conceptual model of the application of Example 2</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head></head><label></label><figDesc>TABLE ¾ with four columns, two for the customer, one for the service the customer has registered, and one for the department. As in Source 1, in Source 2 departments are still identified by their name, but, differently from Source 1, customers are identified by their name and date of birth. Services are identified by a unique service number. Hence the following adorned query is used to specify TABLE ¾ : Name´ AEµ ServNo´Ë Á µ</figDesc><table><row><cell>TABLE¾´AE</cell><cell>Á Å µ</cell></row><row><cell cols="2">CONTRACT¾´ Ë µ Name´ Åµ</cell></row><row><cell>ÒØ Ý´ AE</cell><cell>µ AE PersNameString</cell></row><row><cell></cell><cell>Date</cell></row><row><cell cols="2">ÒØ Ý´ Á Ë µ Á ServNoInteger</cell></row><row><cell>ÒØ Ý´ Å</cell><cell>µ Å DeptNameString</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head></head><label></label><figDesc>.)  The following Conversion Correspondence specifies that the name and date of birth of a person can be converted into a Social Security Number through the program name to SSN:As we said in the introduction, the problem of data integration and reconciliation is crucial for the task of designing the mediators that load the data in the Data Warehouse.Note that the adorned query associated to a table in a source is the result of a reverse engineering analysis of the source, whereas in this case the adorned query is a specification of what we want to materialize in the table of the Data Warehouse. Note also that we express</figDesc><table><row><cell cols="6">¯a matching, conversion, or reconciliation pred-</cell></row><row><cell cols="6">icate defined in the Interschema Correspon-</cell><cell>and the Reconciliation Correspondences</cell></row><row><cell cols="2">dences.</cell><cell></cell><cell></cell><cell></cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>Ö ÓÒ Ð ½ ½´ AE</cell><cell>Ë½</cell><cell>Ë¾ µ</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>Ö ÓÒ Ð ¿ ¾´ Å½ Å¾</cell><cell>µ</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>5 Specification of mediators</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>Such a task aims at specifying, for every relation in the Data</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>Warehouse Schema, how the tuples of the relation should</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>be constructed from a suitable set of tuples extracted from</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>the sources.</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>Suppose we have decided to materialize a new relation</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>Ì in the Data Warehouse. 1 Our goal is to support the de-</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>signer in providing a formal specification for the design</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>of the mediator used to extract the correct data from the</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>sources, and to load such data in Ì . The methodology we</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>propose is based on the following steps.</cell></row><row><cell>ÓÒÚ ÖØ ½´ AE</cell><cell>Ë µ</cell><cell></cell><cell></cell><cell></cell></row><row><cell cols="6">PersNameString´AE µ Date´ µ DOB´AE µ</cell><cell>1. We apply the method described in Section 3 to provide</cell></row><row><cell cols="3">SSNString´Ëµ through name to SSN´AE</cell><cell>Ë µ</cell><cell></cell><cell>the specification of the relation Ì . In other words, we specify Ì in terms of an adorned query</cell></row><row><cell cols="6">Moreover, we add the following Conversion Correspon-dence, which represents the fact that a department name</cell><cell>Õ</cell><cell>Õ ¼ ½</cell></row><row><cell cols="6">can be converted into a department code through the pro-</cell></row><row><cell cols="3">gram dept name to code:</cell><cell></cell><cell></cell></row><row><cell>ÓÒÚ ÖØ ¾´ Å</cell><cell>µ</cell><cell></cell><cell></cell><cell></cell></row><row><cell cols="6">DeptNameString´Å µ DeptCodeInteger´ µ</cell></row><row><cell cols="5">through dept name to code´Å µ</cell><cell>the semantics of Ì again in terms of the conceptual</cell></row><row><cell cols="6">According to the above rules, the system asserts automat-ically (among others) the Matching Correspondence and Conversion Correspondences</cell><cell>model. Not only the sources, but also the relations in the Data Warehouse are seen as views of such a con-ceptual model.</cell></row><row><cell>Ñ Ø ½´ AE</cell><cell>Ë µ</cell><cell cols="3">ÓÒÚ ÖØ ½´ AE through ÒÓÒ</cell><cell>Ë½ µ Ë Ë½</cell><cell>2. We look for a rewriting of Õ in terms of the queries Õ ½ Õ × that correspond to the materialized views in the Data Warehouse. If a complete, equivalent rewrit-</cell></row><row><cell cols="2">Ñ Ø ¾´ Å Ñ Ø ¿´ Å½ Å¾ µ µ</cell><cell cols="4">ÓÒÚ ÖØ ¾´ Å through ÒÓÒ DeptNameString´Å½µ ½ µ DeptNameString´Å¾µ Å½ Å¾ through ÒÓÒ</cell><cell>½</cell><cell>ing exists, then the new table can be derived from the existing tables in the Data Warehouse. Otherwise, the algorithm is able to single out the part that cannot be derived from the Data Warehouse, and that must be loaded from the sources. In the following, Õ denotes such part.</cell></row><row><cell cols="2">ÓÒÚ ÖØ ´ Ë½ Ë¾ µ</cell><cell cols="3">SSNString´Ë½µ</cell><cell>3. We look for a rewriting of Õ in terms of the queries cor-</cell></row><row><cell></cell><cell></cell><cell cols="3">SSNString´Ë¾µ</cell><cell>responding to the tables in the sources. The rewriting</cell></row><row><cell></cell><cell></cell><cell cols="2">through</cell><cell cols="2">ÒØ ØÝ ´Ë½ Ë ¾µ</cell><cell>aims at expressing the data in Ì in terms of a disjunc-</cell></row><row><cell cols="2">ÓÒÚ ÖØ ´ È½ È¾ µ</cell><cell cols="4">Boolean´È½µ Boolean´È¾µ</cell><cell>tion of conjunctive queries where each atom refers to</cell></row><row><cell cols="2">ÓÒÚ ÖØ ´ Á½ Á¾ µ</cell><cell cols="4">through ServNoInteger´Á½µ ÒØ ØÝ ´È½ È ¾µ</cell><cell>¯a table in a source, or</cell></row><row><cell></cell><cell></cell><cell cols="4">ServNoInteger´Á¾µ</cell><cell>1 To see how DWQ addresses the issue of deciding what to materialize</cell></row><row><cell></cell><cell></cell><cell cols="2">through</cell><cell cols="2">ÒØ ØÝ ´Á½ Á ¾µ</cell><cell>in the Data Warehouse, we refer to [TLS99].</cell></row></table><note>Ò</note></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head></head><label></label><figDesc>CONTRACT¼´ Ê µ PROMOTION½´ µ SSN´ Ë µ ServNo´Ê Áµ Code´ µ true ÇÊ CONTRACT¼´ Ê µ REG-AT½´ µ PROMOTION½´ µ SSN´ Ë µ ServNo´Ê Áµ Code´ µ È falseUsing the asserted and automatically derived Interschema Correspondences, the system is able to rewrite the above query in terms of TABLE ½ in Source 1 and TABLE ¾ in Source 2 (see Section 3) as follows: È½ È¼ µ ÓÒÚ ÖØ ´ Á¾ Á¼ µ ÇÊ TABLE½´Ë½ Å ½ È ½µ Ë ½ NULL È½ true ÓÒÚ ÖØ ¾´ Å½ µ ÓÒÚ ÖØ ´ Ë½ Ë¼ µ ÓÒÚ ÖØ ´ È½ È¼ µ</figDesc><table><row><cell cols="2">TDW´Ë Á</cell><cell>È µ</cell></row><row><cell>ÇÊ</cell><cell></cell><cell></cell></row><row><cell cols="3">PROMOTION½´</cell><cell>µ SSN´ Ë µ Code´</cell><cell>µ</cell></row><row><cell>È</cell><cell cols="3">true Á NULL</cell></row><row><cell cols="3">ÒØ Ý´ Ë</cell><cell>µ Ë SSNString</cell></row><row><cell cols="4">ÒØ Ý´ Á Ê µ Á ServNoInteger</cell></row><row><cell cols="2">ÒØ Ý´</cell><cell></cell><cell>µ</cell><cell>DeptCodeInteger</cell></row><row><cell cols="2">TDW´Ë¼ Á ¼</cell><cell cols="2">È ¼µ</cell></row><row><cell cols="4">TABLE½´Ë½ Å ½ È ½µ TABLE¾´AE¾</cell><cell>Á ¾ Å ¾µ</cell></row><row><cell cols="4">Ö ÓÒ Ð ½ ½´ AE¾</cell><cell>Ë½</cell><cell>Ë¼ µ</cell></row><row><cell cols="4">Ö ÓÒ Ð ¿ ¾´ Å½ Å¾</cell><cell>µ</cell></row><row><cell cols="3">ÓÒÚ ÖØ ´</cell></row></table><note>ÈBoolean</note></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" xml:id="foot_0">D. Calvanese, G. De Giacomo, M. Lenzerini, D. Nardi, R. Rosati</note>
		</body>
		<back>
			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Complexity of answering queries using materialized views</title>
		<author>
			<persName><forename type="first">Serge</forename><surname>Abiteboul</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Oliver</forename><surname>Duschka</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 17th ACM SIGACT SIGMOD SIGART Sym. on Principles of Database Systems (PODS&apos;98)</title>
				<meeting>of the 17th ACM SIGACT SIGMOD SIGART Sym. on Principles of Database Systems (PODS&apos;98)</meeting>
		<imprint>
			<date type="published" when="1998">1998</date>
			<biblScope unit="page" from="254" to="265" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">On the decidability of query containment under constraints</title>
		<author>
			<persName><forename type="first">Diego</forename><surname>Calvanese</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Giuseppe</forename><forename type="middle">De</forename><surname>Giacomo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Maurizio</forename><surname>Lenzerini</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 17th ACM SIGACT SIGMOD SIGART Sym. on Principles of Database Systems (PODS&apos;98)</title>
				<meeting>of the 17th ACM SIGACT SIGMOD SIGART Sym. on Principles of Database Systems (PODS&apos;98)</meeting>
		<imprint>
			<date type="published" when="1998">1998</date>
			<biblScope unit="page" from="149" to="158" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Description logic framework for information integration</title>
		<author>
			<persName><forename type="first">Giuseppe</forename><forename type="middle">De</forename><surname>Cdgl • ; Diego Calvanese</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Maurizio</forename><surname>Giacomo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Daniele</forename><surname>Lenzerini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Riccardo</forename><surname>Nardi</surname></persName>
		</author>
		<author>
			<persName><surname>Rosati</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 6th Int. Conf. on the Principles of Knowledge Representation and Reasoning (KR&apos;98)</title>
				<meeting>of the 6th Int. Conf. on the Principles of Knowledge Representation and Reasoning (KR&apos;98)</meeting>
		<imprint>
			<date type="published" when="1998">1998</date>
			<biblScope unit="page" from="2" to="13" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<monogr>
		<title level="m" type="main">Schema and data integration methodology for dwq</title>
		<author>
			<persName><forename type="first">Giuseppe</forename><forename type="middle">De</forename><surname>Cdgl • ; Diego Calvanese</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Maurizio</forename><surname>Giacomo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Daniele</forename><surname>Lenzerini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Riccardo</forename><surname>Nardi</surname></persName>
		</author>
		<author>
			<persName><surname>Rosati</surname></persName>
		</author>
		<imprint>
			<date type="published" when="1998-09">September 1998</date>
		</imprint>
		<respStmt>
			<orgName>DWQ Consortium</orgName>
		</respStmt>
	</monogr>
	<note type="report_type">Technical Report DWQ-UNIROMA-004</note>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Source integration in data warehousing</title>
		<author>
			<persName><forename type="first">Diego</forename><surname>Cdgl ;</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Giuseppe</forename><forename type="middle">De</forename><surname>Calvanese</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Maurizio</forename><surname>Giacomo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Daniele</forename><surname>Lenzerini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Riccardo</forename><surname>Nardi</surname></persName>
		</author>
		<author>
			<persName><surname>Rosati</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 9th Int. Workshop on Database and Expert Systems Applications (DEXA&apos;98)</title>
				<meeting>of the 9th Int. Workshop on Database and Expert Systems Applications (DEXA&apos;98)</meeting>
		<imprint>
			<publisher>IEEE Computer Society Press</publisher>
			<date type="published" when="1998">1998</date>
			<biblScope unit="page" from="192" to="197" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Representing and using interschema knowledge in cooperative information systems</title>
		<author>
			<persName><forename type="first">Tiziana</forename><surname>Catarci</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Maurizio</forename><surname>Lenzerini</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">J. of Intelligent and Cooperative Information Systems</title>
		<imprint>
			<biblScope unit="volume">2</biblScope>
			<biblScope unit="issue">4</biblScope>
			<biblScope unit="page" from="375" to="398" />
			<date type="published" when="1993">1993</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Recursive plans for information gathering</title>
		<author>
			<persName><forename type="first">M</forename><surname>Oliver</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alon</forename><forename type="middle">Y</forename><surname>Duschka</surname></persName>
		</author>
		<author>
			<persName><surname>Levy</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 15th Int. Joint Conf. on Artificial Intelligence (IJCAI&apos;97)</title>
				<meeting>of the 15th Int. Joint Conf. on Artificial Intelligence (IJCAI&apos;97)</meeting>
		<imprint>
			<date type="published" when="1997">1997</date>
			<biblScope unit="page" from="778" to="784" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Maintenance of materialized views: Problems, techniques, and applications</title>
		<author>
			<persName><forename type="first">A</forename><surname>Gupta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><forename type="middle">S</forename><surname>Mumick</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE Bulletin of the Technical Committee on Data Engineering</title>
		<imprint>
			<biblScope unit="volume">18</biblScope>
			<biblScope unit="issue">2</biblScope>
			<biblScope unit="page" from="3" to="18" />
			<date type="published" when="1995">1995</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Context Interchange: Overcoming the challenges of large-scale interoperable database systems in a dynamic environment</title>
		<author>
			<persName><forename type="first">Stuart</forename><forename type="middle">E</forename><surname>Cheng Hian Goh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Michael</forename><surname>Madnick</surname></persName>
		</author>
		<author>
			<persName><surname>Siegel</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 3rd Int. Conf. on</title>
				<meeting>of the 3rd Int. Conf. on</meeting>
		<imprint/>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<monogr>
		<title/>
		<author>
			<persName><forename type="first">D</forename><surname>Calvanese</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>De Giacomo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Lenzerini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Nardi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Rosati</surname></persName>
		</author>
		<imprint>
			<biblScope unit="page" from="16" to="26" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">The Stanford data warehousing project</title>
		<author>
			<persName><forename type="first">Joachim</forename><surname>Hammer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Hector</forename><surname>Garcia-Molina</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jennifer</forename><surname>Widom</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Wilburt</forename><surname>Labio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Yue</forename><surname>Zhuge</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Information and Knowledge Management (CIKM&apos;94)</title>
				<imprint>
			<date type="published" when="1994">1994. 1995</date>
			<biblScope unit="volume">18</biblScope>
			<biblScope unit="page" from="41" to="48" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Managing semantic heterogeneity in databases: A theoretical perspective</title>
		<author>
			<persName><forename type="first">Richard</forename><surname>Hull</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 16th ACM SIGACT SIGMOD SIGART Sym. on Principles of Database Systems (PODS&apos;97)</title>
				<meeting>of the 16th ACM SIGACT SIGMOD SIGART Sym. on Principles of Database Systems (PODS&apos;97)</meeting>
		<imprint>
			<date type="published" when="1997">1997</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">A framework for supporting data integration using the materialized and virtual approaches</title>
		<author>
			<persName><forename type="first">Richard</forename><surname>Hull</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Gang</forename><surname>Zhou</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the ACM SIGMOD Int. Conf. on Management of Data</title>
				<meeting>of the ACM SIGMOD Int. Conf. on Management of Data</meeting>
		<imprint>
			<date type="published" when="1996">1996</date>
			<biblScope unit="page" from="481" to="492" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<monogr>
		<title level="m" type="main">Building the Data Warehouse</title>
		<author>
			<persName><forename type="first">W</forename><forename type="middle">H</forename><surname>Inmon</surname></persName>
		</author>
		<imprint>
			<date type="published" when="1996">1996</date>
			<publisher>John Wiley &amp; Sons</publisher>
		</imprint>
	</monogr>
	<note>second edition</note>
</biblStruct>

<biblStruct xml:id="b14">
	<monogr>
		<title level="m" type="main">Conceptbase V3.1 user manual</title>
		<author>
			<persName><forename type="first">M</forename><surname>Jarke</surname></persName>
		</author>
		<idno>92-17</idno>
		<imprint>
			<date type="published" when="1992">1992</date>
			<publisher>Aachener Informatik-Berichte</publisher>
			<pubPlace>Aachen, Germany</pubPlace>
		</imprint>
	</monogr>
	<note type="report_type">Technical Report</note>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Architecture and quality in data warehouses</title>
		<author>
			<persName><forename type="first">Matthias</forename><surname>Jarke</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Manfred</forename><forename type="middle">A</forename><surname>Jeusfeld</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Christoph</forename><surname>Quix</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Panos</forename><surname>Vassiliadis</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 10th Conf. on Advanced Information Systems Engineering (CAiSE&apos;98)</title>
		<title level="s">Lecture Notes in Computer Science</title>
		<meeting>of the 10th Conf. on Advanced Information Systems Engineering (CAiSE&apos;98)</meeting>
		<imprint>
			<publisher>Springer-Verlag</publisher>
			<date type="published" when="1998">1998</date>
			<biblScope unit="volume">1413</biblScope>
			<biblScope unit="page" from="93" to="113" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<monogr>
		<title level="m" type="main">Fundamentals of Data Warehouses</title>
		<author>
			<persName><forename type="first">Matthias</forename><surname>Jarke</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Maurizio</forename><surname>Lenzerini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Yannis</forename><surname>Vassiliou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Panos</forename><surname>Vassiliadis</surname></persName>
		</author>
		<imprint>
			<date type="published" when="1999">1999</date>
			<publisher>Press</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Answering queries using views</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Alon</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alberto</forename><forename type="middle">O</forename><surname>Levy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Yehoshua</forename><surname>Mendelzon</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Divesh</forename><surname>Sagiv</surname></persName>
		</author>
		<author>
			<persName><surname>Srivastava</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 14th ACM SIGACT SIGMOD SIGART Sym. on Principles of Database Systems (PODS&apos;95)</title>
				<meeting>of the 14th ACM SIGACT SIGMOD SIGART Sym. on Principles of Database Systems (PODS&apos;95)</meeting>
		<imprint>
			<date type="published" when="1995">1995</date>
			<biblScope unit="page" from="95" to="104" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">Object exchange across heterogeneous information sources</title>
		<author>
			<persName><forename type="first">Yannis</forename><surname>Papakonstantinou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Hector</forename><surname>Garcia-Molina</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jennifer</forename><surname>Widom</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 11th IEEE Int. Conf. on Data Engineering (ICDE&apos;95)</title>
				<meeting>of the 11th IEEE Int. Conf. on Data Engineering (ICDE&apos;95)</meeting>
		<imprint>
			<date type="published" when="1995">1995</date>
			<biblScope unit="page" from="251" to="260" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<monogr>
		<title level="m" type="main">So far (schematically) yet so near (semantically</title>
		<author>
			<persName><forename type="first">Amit</forename><surname>Sheth</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Vipul</forename><surname>Kashyap</surname></persName>
		</author>
		<imprint/>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<monogr>
		<title level="m">Proc. of the IFIP DS-5 Conf. on Semantics of Interoperable Database Systems</title>
				<meeting>of the IFIP DS-5 Conf. on Semantics of Interoperable Database Systems<address><addrLine>Amsterdam</addrLine></address></meeting>
		<imprint>
			<publisher>North-Holland)</publisher>
			<date type="published" when="1992">1992</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<analytic>
		<title level="a" type="main">Designing the global Data Warehouse with SPJ views</title>
		<author>
			<persName><forename type="first">Dimitri</forename><surname>Theodoratos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Spyros</forename><surname>Ligoudistianos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Timos</forename><surname>Sellis</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 11th Conf. on Advanced Information Systems Engineering (CAiSE&apos;99)</title>
				<meeting>of the 11th Conf. on Advanced Information Systems Engineering (CAiSE&apos;99)</meeting>
		<imprint>
			<date type="published" when="1999">1999</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">Information integration using logical views</title>
		<author>
			<persName><forename type="first">Jeffrey</forename><forename type="middle">D</forename><surname>Ullman</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 6th Int. Conf. on Database Theory (ICDT&apos;97)</title>
		<title level="s">Lecture Notes in Computer Science</title>
		<meeting>of the 6th Int. Conf. on Database Theory (ICDT&apos;97)</meeting>
		<imprint>
			<biblScope unit="volume">1186</biblScope>
			<biblScope unit="page" from="19" to="40" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<analytic>
		<title level="a" type="main">Special issue on materialized views and data warehousing</title>
		<author>
			<persName><forename type="first">Jennifer</forename><surname>Widom</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE Bulletin on Data Engineering</title>
		<imprint>
			<biblScope unit="volume">18</biblScope>
			<biblScope unit="issue">2</biblScope>
			<date type="published" when="1995">1995</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b24">
	<analytic>
		<title level="a" type="main">Mediators in the architecture of future information systems</title>
		<author>
			<persName><forename type="first">Gio</forename><surname>Wiederhold</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE Computer</title>
		<imprint>
			<biblScope unit="volume">25</biblScope>
			<biblScope unit="issue">3</biblScope>
			<biblScope unit="page" from="38" to="49" />
			<date type="published" when="1992">1992</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b25">
	<analytic>
		<title level="a" type="main">Generating data integration mediators that use materializations</title>
		<author>
			<persName><forename type="first">Gang</forename><surname>Zhou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Richard</forename><surname>Hull</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Roger</forename><surname>King</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">J. of Intelligent Information Systems</title>
		<imprint>
			<biblScope unit="volume">6</biblScope>
			<biblScope unit="page" from="199" to="221" />
			<date type="published" when="1996">1996</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b26">
	<analytic>
		<title level="a" type="main">Using object matching and materialization to integrate heterogeneous databases</title>
		<author>
			<persName><forename type="first">Gang</forename><surname>Zhou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Richard</forename><surname>Hull</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Roger</forename><surname>King</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jean-Claude</forename><surname>Franchitti</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the 3rd Int. Conf. on Cooperative Information Systems (CoopIS&apos;95)</title>
				<meeting>of the 3rd Int. Conf. on Cooperative Information Systems (CoopIS&apos;95)</meeting>
		<imprint>
			<date type="published" when="1995">1995</date>
			<biblScope unit="page" from="4" to="18" />
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
