<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">GenAll Algorithm: Decorating Galois lattice with minimal generators</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Ben</forename><surname>Sondess</surname></persName>
							<email>sondess.bentekaya@laposte.net</email>
							<affiliation key="aff0">
								<orgName type="department" key="dep1">Département des Sciences de l&apos;Informatique</orgName>
								<orgName type="department" key="dep2">Faculté des Sciences</orgName>
								<orgName type="institution">Tunis Campus Universitaire</orgName>
								<address>
									<postCode>1060</postCode>
									<settlement>Tunis</settlement>
									<country key="TN">Tunisie</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><roleName>Sadok</roleName><forename type="first">Ben</forename><surname>Tekaya</surname></persName>
							<affiliation key="aff0">
								<orgName type="department" key="dep1">Département des Sciences de l&apos;Informatique</orgName>
								<orgName type="department" key="dep2">Faculté des Sciences</orgName>
								<orgName type="institution">Tunis Campus Universitaire</orgName>
								<address>
									<postCode>1060</postCode>
									<settlement>Tunis</settlement>
									<country key="TN">Tunisie</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Yahya</forename><surname>Yahia</surname></persName>
							<affiliation key="aff0">
								<orgName type="department" key="dep1">Département des Sciences de l&apos;Informatique</orgName>
								<orgName type="department" key="dep2">Faculté des Sciences</orgName>
								<orgName type="institution">Tunis Campus Universitaire</orgName>
								<address>
									<postCode>1060</postCode>
									<settlement>Tunis</settlement>
									<country key="TN">Tunisie</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><surname>Slimani</surname></persName>
							<affiliation key="aff0">
								<orgName type="department" key="dep1">Département des Sciences de l&apos;Informatique</orgName>
								<orgName type="department" key="dep2">Faculté des Sciences</orgName>
								<orgName type="institution">Tunis Campus Universitaire</orgName>
								<address>
									<postCode>1060</postCode>
									<settlement>Tunis</settlement>
									<country key="TN">Tunisie</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">S</forename><forename type="middle">Ben</forename><surname>Tekaya</surname></persName>
							<affiliation key="aff1">
								<orgName type="department" key="dep1">Département des Sciences de l&apos;Informatique</orgName>
								<orgName type="department" key="dep2">Faculté des Sciences</orgName>
								<orgName type="institution">Tunis Campus Universitaire</orgName>
								<address>
									<postCode>1060</postCode>
									<settlement>Tunis</settlement>
									<country key="TN">Tunisie</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">S</forename><forename type="middle">Ben</forename><surname>Yahia</surname></persName>
							<affiliation key="aff1">
								<orgName type="department" key="dep1">Département des Sciences de l&apos;Informatique</orgName>
								<orgName type="department" key="dep2">Faculté des Sciences</orgName>
								<orgName type="institution">Tunis Campus Universitaire</orgName>
								<address>
									<postCode>1060</postCode>
									<settlement>Tunis</settlement>
									<country key="TN">Tunisie</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Y</forename><surname>Slimani</surname></persName>
							<email>yahya.slimani@fst.rnu.tn</email>
							<affiliation key="aff1">
								<orgName type="department" key="dep1">Département des Sciences de l&apos;Informatique</orgName>
								<orgName type="department" key="dep2">Faculté des Sciences</orgName>
								<orgName type="institution">Tunis Campus Universitaire</orgName>
								<address>
									<postCode>1060</postCode>
									<settlement>Tunis</settlement>
									<country key="TN">Tunisie</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">GenAll Algorithm: Decorating Galois lattice with minimal generators</title>
					</analytic>
					<monogr>
						<imprint>
							<date/>
						</imprint>
					</monogr>
					<idno type="MD5">FD65A2465D0997AAA053E19F3DF9892C</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-23T20:17+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Data Mining</term>
					<term>Formal Concept Analysis</term>
					<term>Generic association rules</term>
					<term>Generator</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>The problem of relevance and the usefulness of extracted association rules is becoming of primary importance, since an overwhelming number of association rules may be derived. This paper proposes an algorithm, called GenAll, to build a formal concept lattice, in which each formal concept is "decorated" by its minimal generators. The main characteristic of this algorithm is to use a refinement process of upper cover lists to determine, in a simultaneous manner, the set of formal concepts, their underlying partial order and the set of minimal generators associated to each formal concept. Experimental results have showed that the proposed algorithm is specially efficient for dense formal contexts compared to that of Nourine et al.. Response times pointed out by GenAll algorithm largely outperform those of Nourine et al..</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introduction</head><p>Data Mining is a discipline which aims to discover regularities, in voluminous datasets, commonly expressed by association rules <ref type="bibr" target="#b0">[1]</ref>. Several studies in particular underlined the prohibitive number of association rules drawn from even reasonably sized datasets <ref type="bibr" target="#b1">[2]</ref>. This fact bootstrapped the development of more acute techniques or methods to reduce the size of the reported rule sets. In this context, the battery of results provided by the Formal Concept Analysis (FCA) permitted to define "irreductible" nucleus of association rule subset better known as generic bases. These bases constitute reduced sets of informative rules allowing to preserve the most relevant rules, without loss of information. The generation of these informative association rules relies on the extraction of formal concepts, its associated minimal generators and the underlying partial order <ref type="bibr" target="#b2">[3]</ref>. In this paper, our main claim is that there is no single algorithm allowing to obtain the generic base of association rules. In fact, a critical survey of dedicated literature pointed out the following remarks:</p><p>1. Oriented Data Mining algorithms generate the set of closed itemsets 1  and the set of associated minimal generators <ref type="bibr" target="#b3">[4]</ref><ref type="bibr" target="#b4">[5]</ref><ref type="bibr" target="#b5">[6]</ref>. The underlying order between closed itemsets is badly missing or not of interest. To obtain the underlying partial relation, one can use the algorithm proposed by Valtchev et al. <ref type="bibr" target="#b6">[7]</ref> to construct the covering graph. 2. Oriented formal concept algorithms generate the set of formal concepts and the underlying order <ref type="bibr" target="#b7">[8]</ref>. Here, the set of associated minimal generators is missing. In this case, we can apply the JEN algorithm <ref type="bibr" target="#b8">[9]</ref> to catch out minimal generators given that the covering graph is already generated.</p><p>In this paper, we propose a new algorithm, called GenAll, merging the above mentioned views. Indeed, aiming to derive generic bases of association rules, we propose to build the graph in which each formal concept is decorated by its associated minimal generators. As a starting point, we choose to improve the algorithm presented by Nourine et al. <ref type="bibr" target="#b10">[10]</ref>. This choice can be justified by the fact that the latter presents the best theoretical complexity, even though practical experiments highlighted that it is the worst compared to other algorithms <ref type="bibr" target="#b7">[8,</ref><ref type="bibr" target="#b11">11]</ref>. Hence, a careful review of Nourine et al. algorithm showed that it operates in two steps: An original approach, based on a special trie, to discover and store formal concepts. A costly repeated access to originally resident disk to compute the underlying order between formal concepts. However, the bad results of Nourine et al. algorithm are not a fatality and we feel that its bad performances can be largely improved using the GenAll algorithm proposed in this paper. The remainder of the paper is organized as follows: Section 2 briefly sketches the basic FCA constructs and stresses on the link between the notion of minimal blocker and minimal generator. Section 3 discusses in depth the Genall algorithm. Results of experiments carried out on benchmarking datasets are reported in Section 4. Finally, section 5 concludes this paper and points out some research directions for future work. We define two functions summarizing links between subsets of objects and subsets of attributes induced by R, that map sets of objects to sets of attributes and vice versa. Thus, for a set O ⊆ O, we define:</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">Basic notions</head><formula xml:id="formula_0">φ(O) = {a|∀o, o ∈ O ⇒ (o, a) ∈ R} and for a set A ⊆ I, ψ(A) = {o|∀a, a ∈ A ⇒ (o, a) ∈ R}.</formula><p>Both functions φ and ψ form a Galois connection between the sets P(I) and P(O) <ref type="bibr" target="#b12">[12]</ref>. Consequently, both compound operators of φ and ψ are closure operators, in particular ω = φ • ψ is a closure operator.</p><p>A formal concept is a pair C K = (O, A), where O is called extent, and A is a closed itemset<ref type="foot" target="#foot_0">2</ref> , called intent. Furthermore, both O and A are related through the Galois connection, i.e., φ(O) = A and ψ(A) = O. Let an itemset A ⊆ I, the support of the itemset A in the context K is defined by: support(A) = |ψ(A)| |O| . Face: Let C K = (O, A) be a formal concept and let pred i (C K ) be the i th immediate predecessor of C K in a Galois lattice extracted from a context K. The i th face of the formal concept C K corresponds to the difference between its intent and the intent of its i th predecessor <ref type="bibr" target="#b13">[13]</ref>.</p><p>Let p be the number of immediate predecessors of the formal concept C K . The family of faces F C K of the formal concept C K is expressed by the following relation: <ref type="bibr" target="#b13">[13]</ref>. For example, according to the formal concept lattice presented on the figure <ref type="figure" target="#fig_4">3</ref>, F (abde,23) K = {b, e}. Minimal blocker: Let G = {G 1 , ..., G n } be a family of n sets. A blocker B of the family G is a set where the intersection with all sets G i ∈ G is not empty <ref type="bibr" target="#b13">[13]</ref>. A blocker B of family of <ref type="bibr" target="#b13">[13]</ref>. Given the family of faces F (abde,23) K = {b, e}, the minimal blocker B = {be} where the intersection with the two faces is always different of the empty set. Generator: Let C K be a formal concept and F C K its family of faces. The set G of the generators associated with the intent A of the formal concept C K , corresponds to the minimal blockers associated to the family of faces F C K <ref type="bibr" target="#b13">[13]</ref>. Equivalently, An itemset g ⊆ I is called minimal generator of a formal concept C K = (O, A), if and only if ω(g) = A and ∃g 1 ⊂ g such that ω(g 1 ) = A <ref type="bibr" target="#b14">[14]</ref>. Minimal association rules: An association rule is an assorted statistical metric implication between itemsets of the form r : X ⇒ (Y − X) in which X and Y are frequent itemsets (their supports are at least equal to a minimal threshold, called minsup), and X ⊂ Y . Itemsets X and (Y − X) are called, respectively, antecedent and conclusion of the rule r. The valid association rules are those whose measure of confidence Conf (r) = support (Y )  support(X) is greater than or equal to the minimal threshold of confidence, named minconf . An association rule whose confidence is equal to 1 is called exact association rule. Otherwise it is called approximative association rule. <ref type="bibr">Bastide et al.</ref> characterized what they called "the generic base for exact association rules" (adapting the global implication base of Guigues and Duquenne <ref type="bibr" target="#b15">[15]</ref>). The generic base for exact association rules is defined as follows : Trie<ref type="foot" target="#foot_1">3</ref> : It is a research tree, whose elements are stored in a condensed way. The "trie" used by the Nourine algorithm presented in <ref type="bibr" target="#b10">[10]</ref> is a special trie, whose edges are labelled by attributes of formal concepts, and only some nodes are labelled by the extents of formal concepts. The way going from the root node towards a labelled node by an extent form a formal concept. As shown in Figure <ref type="figure" target="#fig_1">1</ref>, (acde, 34) is a formal concept. </p><formula xml:id="formula_1">F C K = {A − Intent(pred i (C K )}, i ∈ {1, . . . , p}</formula><formula xml:id="formula_2">G = {G 1 , ..., G n } is said to be minimal if ∃B 1 ⊂ B and ∀G i ∈ G, B 1 ∩ G i = ∅</formula></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">The proposed GenAll algorithm</head><p>In this section, we present a new algorithm, called GenAll, to derive generic bases of association rules. To do so, it gathers in a single pass all the required informations which are : the set of formal concepts and their associated minimal generators, the underlying partial order.</p><p>The GenAll algorithm is inspired in particular from the first part of the Nourine et al. algorithm <ref type="bibr" target="#b10">[10]</ref>, stressing on the construction of the formal concept trie. The choice of this algorithm is motivated by the fact that it presents the best theoretical complexity <ref type="bibr" target="#b7">[8,</ref><ref type="bibr" target="#b10">10,</ref><ref type="bibr" target="#b11">11]</ref>. In addition to its linear complexity, the originality of this algorithm resides in its way to discover formal concepts and their storage in a special trie.</p><p>However, this algorithm presents some disadvantages, that we can summarize as follows:</p><p>1. A costly systematic back to the dataset, originally resident disk, to compute the covering graph of the formal concepts, 2. Ignore the determination of minimal generators associated to each formal concept.</p><p>To avoid these disadvantages, we present a new algorithm, relying on the construction of a trie to calculate on the fly, the set of formal concepts, their associated minimal generators and the partial order between these formal concepts. Notations used by GenAll algorithm are given in Table <ref type="table" target="#tab_0">1</ref>, while the pseudocode is illustrated by Algorithm 1. In each iteration, the algorithm builds the set of formal concepts, the set of candidate minimal generators and a potential order, which has to be later refined, between the already discovered formal concepts. Each transaction is passed only once. Each iteration consists of two steps. The first calculates the formal concepts and generates a potential order between formal concepts, whereas the second aims to refine the order and to compute the associated minimal generators. These two steps are discussed in the following. For Each concept Ci ∈ F Do 6: </p><formula xml:id="formula_3">C.intent = Ci.intent ∩ t 7: If C.intent ∈ F Then 8: F = F ∪ C 9: C.extent = Ci.extent ∪ t.T ID 10: C.ImmSucc = {t} ∪ {Ci} \ {C} 11: L = L ∪ C {/*</formula></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1">Generation of formal concepts</head><p>In this step (lines 4-24), we start by initializing the L list with the empty set. This list will be useful for the refinement of the set of the immediate successors of each formal concept found in an iteration. To calculate the set of formal concepts, we perform an intersection between the intent of each formal concept of the family F and each transaction of the dataset. Two cases are distinguishable:</p><p>1. The intent does not exist in the family F (a new formal concept is found): it must then be added to the family F . Then, the associated formal concept extent is calculated (line 9). We can notice that all the transactions of the formal context are formal concepts. Hence, the result of the intersection of the transactions with the formal concept where its intent is equal to the set of attributes of the formal context, is equal to the attributes of the transaction.</p><p>Potential immediate successors of this new formal concept are firstly initialized with the formal concept utilized to generate it (line 10). Indeed, to determine potential immediate successor of a formal concept, we should distinguish two particular cases: If the generated formal concept is equal to the transaction, then only the formal concept used in this intersection can be an immediate successor. Otherwise both the transaction and the formal concept are considered as potential immediate successors of the formal concept. Thereafter, this new formal concept is added to the list L (line 11). It is important to mention that this insertion is performed by maintaining an ascending order of formal concept intents cardinality. 2. The intent already exists in the family F : the formal concept extent (line 13) should be updated, and we check whether the concept is already existing in the list L (lines 14-15). Aiming to update the immediate successors list ImmSucc of the formal concept, and given that for each formal concept we maintain an ImmSucc list, we build a list LP containing the formal concepts used to generate it. This list is necessary, to be able to make comparisons and to update the ImmSucc list. Indeed, for each element in LP and for each element in ImmSucc list, we test the inclusion of these formal concepts using the Compare-concept function (line 20). The Compare-concept function is applied to update the ImmSucc list of the formal concepts under consideration. This list has to be modified in two cases: the first case, the element of LP is smaller (in term of inclusion) than one element of ImmSucc. In this case, the old immediate successor will be replaced by the new one. The second case, the two elements are incomparable: a new successor will then be added to the ImmSucc list of the formal concept under concern.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2">Refinement of immediate successor list and determination of minimal generators</head><p>We notice that the formal concepts, found in an iteration, represent a branch of the lattice, i.e., for each formal concept (except the largest one), we find another formal concept calculated in the same iteration, which covers it. For that, we traverse the L list of formal concepts found in an iteration and for each formal concept we call the find-Succ function (lines 25-26). This function is based on the fact that a formal concept of cardinality n (i.e., the length of its intent is equal to n) is at least covered by a formal concept of cardinality (n + 1) or higher. For that, and since the L list is sorted according to the cardinality's intent ascending order, we seek for each formal concept of cardinality n, a formal concept which covers it in the list of cardinality (n+1). In the event of defect, we pass to the cardinality (n + 2), until we find a minimal covering formal concept. Then, we compare these formal concepts to update the ImmSucc list.</p><p>Once the list of the immediate successors (ImmSucc) of the current formal concept is built, we traverse this list and for each immediate successor we have to compute the set of its minimal generators. To obtain such result, we calculate the associated faces of successors (line 28), then we compare them to the corresponding list of faces by calling the Compare-face function (line 30). The set of faces list f ace of the formal concept (immediate successor) can be modified in two cases as illustrated by the following:</p><p>1. The f ace is smaller (in term of inclusion) than an element of list f ace: in this case, we calculate the dif f erence f ace, and we replace the old face by the new one. If the obtained dif f erence f ace does not exist in one of the faces of this formal concept, then we remove each generator containing this difference. This removal is necessary, since a non existing attribute in the faces cannot exist in the generators. 2. The f ace is incomparable with all the faces of list f ace: in this case, it will be added to list f ace.</p><p>When the faces list is updated, the Compute-Min-blockers function of the JEN algorithm <ref type="bibr" target="#b8">[9]</ref> is applied for determining the minimal generators.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Example</head><p>Let us consider the Formal context illustrated by Figure <ref type="figure" target="#fig_4">3</ref> with the set of attributes I = {a, b, c, d, e, f } and the set of transactions of the formal context, denoted from 1 to 4. Firstly, the family F is initialized with the set of attributes  The application of the intersection operation with the single formal concept of the family F gives raise to a new formal concept with an intent equal to {abd}. The extent of this new formal concept, denoted C 2 , will be later computed. Thus, the family F is updated by adding C 2 : F = {(abcdef, ∅), (abd, ∅)}. At first, the set of immediate successor of C 2 is initialized with C 1 = (abcdef, ∅) and the extent of C 2 is initialized with union of C 1 .extent and the transaction ID under concern. Hence, C 2 .extent = {1} and C 2 .ImmSucc = {(abcdef, ∅)} and the L list is initialized with {(abd, 1)}.</p><formula xml:id="formula_4">R a b c d e f 1 × × × 2 × × × × 3 × × × × × 4 × × × × ×</formula></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Second step: Refinement of immediate successor list and determination of minimal generators</head><p>For the single formal concept in L = {(abd, 1)} we haven't to update the ImmSucc list of this concept, and (abd, 1).ImmSucc = {(abcdef, ∅)}. For each immediate successor we compute its minimal generators set using the Compute-Min-blockers function of the JEN algorithm <ref type="bibr" target="#b8">[9]</ref>. The f ace of the formal concept (abcdef, ∅) = cef. Then C 1 .list g en = {c, e, f }.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Transaction 2:</head><p>Let now,consider the second transaction {abde}: {abcdef } ∩ {abde} = {abde}, {abde} is an intent of a new formal concept. F = {(abcdef, ∅), (abd, 1), (abde, ∅)}, C 3 .extent = {2}, then: F = {(abcdef, ∅), (abd, 1), (abde, 2)} C 3 .ImmSucc = {(abcdef, ∅)} and L = {(abde, 2)}. These process above is repeated for the second formal concept in the family F : {abd} ∩ {abde} = {abd}, this set is an intent of an existing formal concept. Then we update the extent of this formal concept (line 13): C 2 .extent = {12} and L = {(abd, 12), (abde, 2), } C 2 .LP = {abde}. Using Compare-concept function we have {abde} ⊂ {abcdef } then we update C 2 .ImmSucc = {abde}. The second step is performed and we obtain:</p><formula xml:id="formula_5">C 2 .ImmSucc = {(abde, 2)} and C 3 .ImmSucc = {(abcdef, ∅)}.</formula><p>At the end of the thirst transaction we obtain: F = {(abcdef, ∅), (abd, 123), (abde, 23), (abcde, 3)}  <ref type="bibr">(ad, 1234)</ref>. The later formal concept, has as immediate successor the formal concept (acdef, 4). But we find in L list, a formal concept with cardinality 3 who can cover the formal concept (ad, 1234). Then we replace (acdef, 4) by (ade, 234) because {ade} ⊂ {acdef }. The same process is done for the other formal concepts. We have at the end: </p><formula xml:id="formula_6">C 2 .ImmSucc = {(abde, 23)} C 3 .ImmSucc = {(abcde, 3)} C 4 .ImmSucc = {(abcdef, ∅)} C 1 .list gen = {f } C 2 .list gen = {a, b, d} C 3 .list gen = {e}</formula></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Experimental results</head><p>We implemented the Nourine et al. and GenAll algorithms in C on a Linux platform with a Mandrake distribution to assess their relative performances. Both algorithms used the same data structure. Our experiments were carried out on Pentium IV with CPU clock rate of 2Ghz and 512Mb of main memory. To examine the practical efficiency of our algorithm, we run experiments on real and synthetic datasets, whose characteristics are detailed in what follows.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1">Test data</head><p>Hence the results depend on the dataset density, algorithms were tested on two types of datasets: synthetic data, which mimic market basket, and dense datasets, which belong to the domain of statistical databases. All these test datasets are freely available on Internet<ref type="foot" target="#foot_2">4</ref> . Chess base is derived from steps of chess game. Typically, real datasets are very dense. We also chose some synthetic databases. Generally, synthetic datasets are sparse compared to real ones.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2">Computational experiments</head><p>In the sequel, we assess performances of Nourine et al. and GenAll algorithms both on sparse and dense datasets. Case of sparse datasets: For this type of datasets, the list of successors associated to a given formal concept changes frequently. Thus, comparisons of all the list of immediate successors will be carried out. Given that the successor list of the formal concept is long, in sparse datasets compared to that of the dense datasets, the step of updating the immediate successor list consumes more time comparatively to the Nourine et al. algorithm <ref type="bibr" target="#b10">[10]</ref>, in which this comparison is not performed. Indeed, this algorithm calculates, in each step, only the immediate predecessor list of each formal concept. Hence,an important factor in the performance of the algorithm is the average length of the transaction. In fact, the smaller the number of items per transaction is, the faster the GenAll algorithm, for determining formal concepts and their underlying order.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Case of dense datasets:</head><p>The number of reported formal concepts is by far more important than that obtained for sparse datasets. This will slacken the corresponding execution time compared to that of sparse datasets. From the point of view of execution time, we noticed that the list of immediate successors does not change frequently as it is the case for sparse datasets.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3">Relative performances of Nourine and GenAll algorithms</head><p>In what follows, we will put the focus on comparing performances pointed out by GenAll algorithm and those obtained by Nourine et al. algorithm <ref type="bibr" target="#b10">[10]</ref>, in the context of formal concept discovery and their underlying partial order. The behavior of both algorithms is somehow different for different levels of density.</p><p>According to Figure <ref type="figure" target="#fig_6">5</ref>, we note that the GenAll algorithm largely outperforms Nourine et al. algorithm, especially on dense datasets. Indeed, the time ratio is very large (the average is 40 and sometimes reaches 83). However, ac-  </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5">Conclusion</head><p>We proposed a new algorithm for the construction at the same time the set of formal concepts, its underlying order and the determination of the set of minimal generators associated to each formal concept. GenAll algorithm <ref type="bibr" target="#b16">[16]</ref> performs only one scan on the datasets. We conduced a comparison of performances of two algorithms : GenAll and Nourine et al. algorithms. The results shows that GenAll outperforms largely Nourine et al. algorithm on dense datasets. By using the algorithm output, the derivation of the generic bases for the rules of association can be performed in a straightforward manner. In the near future, we plan to tackle two issues. Firstly, we try to improve Genall performances by using advanced data structures to reduce the retrieval operations cost. Secondly, we propose to examine the potential benefits of a parallel version of GenAll algorithm on a MIMD machine (IBM SP2 with 32 processors).</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Formal Concept:</head><label></label><figDesc>Let us consider an formal context K = (O, I, R), where O is a set of objects, I is a set of attributes (or items) and R is a binary relation between the objects and the attributes (R ⊆ O × I). Each couple (o, a) ∈ R expresses that the transaction o ∈ O contains the attribute a ∈ I. Within a context, objects are denoted by numbers and attributes by letters.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Fig. 1 .</head><label>1</label><figDesc>Fig. 1. Example of trie structure</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>Algorithm 1</head><label>1</label><figDesc>Construction of formal concept lattice decorated by the minimal generators 1: Algorithm GenAll Input : Formal context K Output : Lexicographic tree of F(trie), ImmSucc of each formal concept, list gen Begin 2: F = (∅, I) {/* step 1: Generation of formal concepts */} 3: For Each transaction t ∈ K Do 4: L = ∅ 5:</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_3"><head>Fig. 2 .</head><label>2</label><figDesc>Fig. 2. Formal context K.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_4"><head>Fig. 3 .</head><label>3</label><figDesc>Fig. 3. Formal concept lattice extracted from the context K decorated by some minimal generators.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_5"><head>C 4 .</head><label>4</label><figDesc>list gen = {c} Transaction 4= {acdef } At the first step we obtain: F = {(abcdef, ∅), (abd, 123), (abde, 23), (abcde, 3), (acdef, 4), (ad, 1234), (ade, 234), (acde, 34)} C 5 .ImmSucc = {(abcdef, ∅)} C 6 .ImmSucc = {(abd, 123), (acdef, 4)} C 7 .ImmSucc = {(abde, 23), ((acdef, 4)} C 8 .ImmSucc = {(abcde, 3), (acdef, 4)} L = {(ad, 1234), (ade, 234), (acde, 34), (acdef, 4)}. Let consider the first formal concept in L list,</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_6"><head>C 5 .</head><label>5</label><figDesc>Figure2depicts the formal concept lattice associated to the formal context K, presented in Figure3. A sketch of some minimal generators, indicated by dotted lines, are decorating formal concepts nodes.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_7"><head>Fig. 4 .</head><label>4</label><figDesc>Fig. 4. Execution time of Nourine et al. and GenAll algorithms (case of sparse dataset)</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_8"><head>Fig. 5 .</head><label>5</label><figDesc>Fig. 5. Execution time of Nourine et al. and GenAll algorithms (case of dense data)</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1 .</head><label>1</label><figDesc>ImmSucc List of immediate successors of the formal concept list face List of faces of the formal concept list gen List of the minimal generators of the formal concept Notations</figDesc><table><row><cell></cell><cell>End If</cell></row><row><cell></cell><cell>{/* Updating C.ImmSucc */}</cell></row><row><cell>17:</cell><cell>C.LP = {t} ∪ {Ci} \ {C}</cell></row><row><cell>18:</cell><cell>For Each Pi ∈ C.LP Do</cell></row><row><cell>19:</cell><cell>For Each Succ ∈ C.ImmSucc Do</cell></row><row><cell>20:</cell><cell>Compare-concept(Succ, Pi)</cell></row><row><cell>21:</cell><cell>End For</cell></row><row><cell>22:</cell><cell>End For</cell></row><row><cell>23:</cell><cell>End If</cell></row><row><cell>24:</cell><cell>End For{/* Refinement of immediate successor list and determination of minimal</cell></row><row><cell></cell><cell>generators*/}</cell></row><row><cell>25:</cell><cell>For Each concept Ci ∈ L Do</cell></row><row><cell>26:</cell><cell>Ci.ImmSucc = find-Succ(Ci, L)</cell></row><row><cell>27:</cell><cell>For Each Succ ∈ Ci.ImmSucc Do</cell></row><row><cell>28:</cell><cell>face = Succ \ Ci</cell></row><row><cell>29:</cell><cell>For Each facei ∈ Succ.list face Do</cell></row><row><cell>30:</cell><cell>Succ.list face = compare-face(face, facei)</cell></row><row><cell>31:</cell><cell>End For</cell></row><row><cell>32:</cell><cell>End For</cell></row><row><cell>33:</cell><cell>End For</cell></row><row><cell cols="2">34: End For</cell></row></table><note>Sorted insertion */} 12: Else 13: C.extent = C.extent ∪ Ci.extent ∪ t.T ID 14: If C ∈ L Then 15: L = L ∪ C {/* Sorted insertion */} 16:</note></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_0">an itemset is a set of items or attributes.</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_1">From reTrieval.</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_2">http://fimi.cs.helsinki.fi/data</note>
		</body>
		<back>
			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Fast algorithms for mining association rules</title>
		<author>
			<persName><forename type="first">R</forename><surname>Agrawal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Skirant</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 20th International Conference on Very Large Databases</title>
				<meeting>the 20th International Conference on Very Large Databases<address><addrLine>Santiago, Chile</addrLine></address></meeting>
		<imprint>
			<date type="published" when="1994">1994</date>
			<biblScope unit="page" from="478" to="499" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Mining Non-Redundant Association Rules</title>
		<author>
			<persName><forename type="first">M</forename><surname>Zaki</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Data Mining and Knowledge Discovery</title>
				<imprint>
			<date type="published" when="2004">2004</date>
			<biblScope unit="page" from="223" to="248" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Approches d&apos;extraction de règles d&apos;association basées sur la correspondance de Galois</title>
		<author>
			<persName><forename type="first">S</forename><surname>Benyahia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">M</forename><surname>Nguifo</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Ingénierie des Systèmes d&apos;Information</title>
		<imprint>
			<biblScope unit="volume">3</biblScope>
			<biblScope unit="issue">4</biblScope>
			<biblScope unit="page" from="23" to="55" />
			<date type="published" when="2004">2004</date>
			<publisher>ISI</publisher>
		</imprint>
	</monogr>
	<note>Hermès-Lavoisier</note>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">CHARM: An efficient algorithm for closed itemset mining</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">J</forename><surname>Zaki</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">J</forename><surname>Hsiao</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2nd SIAM International Conference on Data Mining</title>
				<meeting>the 2nd SIAM International Conference on Data Mining<address><addrLine>Arlington</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2002">2002</date>
			<biblScope unit="page" from="34" to="43" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Closet: An efficient algorithm for mining frequent closed itemsets</title>
		<author>
			<persName><forename type="first">J</forename><surname>Pei</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Han</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Mao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Nishio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Tang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Yang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the ACM SIGMOD DMKD&apos;00</title>
				<meeting>the ACM SIGMOD DMKD&apos;00<address><addrLine>Dallas,TX</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2002">2002</date>
			<biblScope unit="page" from="21" to="30" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<monogr>
		<title level="m" type="main">Data Mining : algorithmes d&apos;extraction et de réduction des règles d&apos;association dans les bases de données</title>
		<author>
			<persName><forename type="first">N</forename><surname>Pasquier</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2000">2000</date>
			<pubPlace>France</pubPlace>
		</imprint>
		<respStmt>
			<orgName>Université de Clermont-Ferrand II</orgName>
		</respStmt>
	</monogr>
	<note type="report_type">Doctorat d&apos;université</note>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">A fast algorithm for building the hasse diagram of a galois lattice</title>
		<author>
			<persName><forename type="first">P</forename><surname>Valtchev</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Missaoui</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Lebrun</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Colloque LaCIM 2000</title>
				<meeting>the Colloque LaCIM 2000<address><addrLine>Montréal (CA</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2000">2000</date>
			<biblScope unit="page" from="293" to="306" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Comparing performance of algorithms for generating concept lattices</title>
		<author>
			<persName><forename type="first">S</forename><surname>Kuznetsov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Obedkov</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">JETAI</title>
		<imprint>
			<biblScope unit="volume">14</biblScope>
			<biblScope unit="page" from="189" to="216" />
			<date type="published" when="2002">2002</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">JEN : un algorithme efficace de construction de générateurs pour l&apos;identification des règles d&apos;association</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">L</forename><surname>Floc'h</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Fisette</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Missaoui</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Valtchev</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Godin</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Numéro spécial de la revue des Nouvelles Technologies de l&apos;Information</title>
		<imprint>
			<biblScope unit="volume">1</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title/>
	</analytic>
	<monogr>
		<title level="j">Editions Cépaduès</title>
		<imprint>
			<biblScope unit="volume">1</biblScope>
			<biblScope unit="page" from="135" to="146" />
			<date type="published" when="2003">2003</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">A fast algorithm for building lattices</title>
		<author>
			<persName><forename type="first">L</forename><surname>Nourine</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Raynaud</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Information Processing Letters</title>
		<imprint>
			<biblScope unit="page" from="199" to="214" />
			<date type="published" when="1999">1999</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Etude et conception d&apos;algorithmes de génération de concepts formels</title>
		<author>
			<persName><forename type="first">H</forename><surname>Fu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">M</forename><surname>Nguifo</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Revue Ingénierie des Systèmes d&apos;Information</title>
		<imprint>
			<biblScope unit="volume">9</biblScope>
			<biblScope unit="page" from="109" to="132" />
			<date type="published" when="2004">2004</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<monogr>
		<author>
			<persName><forename type="first">M</forename><surname>Barbut</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Monjardet</surname></persName>
		</author>
		<title level="m">Ordre et classification. Algèbre et Combinatoire</title>
				<meeting><address><addrLine>Tome</addrLine></address></meeting>
		<imprint>
			<publisher>Hachette</publisher>
			<date type="published" when="1970">1970</date>
			<biblScope unit="volume">II</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Scientific discovery through iterative transformation of concept lattices</title>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">L</forename><surname>Pfaltz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">M</forename><surname>Taylor</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of Workshop on Discrete Applied Mathematics in conjunction with the 2nd SIAM International Conference on Data Mining</title>
				<meeting>Workshop on Discrete Applied Mathematics in conjunction with the 2nd SIAM International Conference on Data Mining<address><addrLine>Arlington</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2002">2002</date>
			<biblScope unit="page" from="65" to="74" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">Mining minimal non-redundant association rules using frequent closed itemsets</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Bastide</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Pasquier</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Taouil</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Lakhal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Stumme</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the International Conference DOOD&apos;2000</title>
		<title level="s">Lecture Notes in Computer Sciences</title>
		<meeting>the International Conference DOOD&apos;2000</meeting>
		<imprint>
			<publisher>Springer-Verlag</publisher>
			<date type="published" when="2000">2000</date>
			<biblScope unit="page" from="972" to="986" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Familles minimales d&apos;implications informatives résultant d&apos;un tableau de données binaires</title>
		<author>
			<persName><forename type="first">J</forename><surname>Guigues</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Duquenne</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Mathématiques et Sciences Humaines</title>
		<imprint>
			<biblScope unit="page" from="5" to="18" />
			<date type="published" when="1986">1986</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">Algorithme de construction d&apos;un treillis des concepts formels et de détermination des générateurs minimaux</title>
		<author>
			<persName><forename type="first">S</forename><surname>Bentekaya</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Benyahia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Slimani</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 7th African Conference on Research in Computer Science</title>
				<meeting>the 7th African Conference on Research in Computer Science<address><addrLine>Tunis</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2004">2004</date>
			<biblScope unit="page" from="247" to="254" />
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
