<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Using Big Data Classification and Mining for the Decision-making 2.0 Process</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Rhizlane</forename><surname>Seltani</surname></persName>
							<email>sel.rhizlane@gmail.com</email>
							<affiliation key="aff0">
								<orgName type="department">LIROSA Laboratory Faculty of Science</orgName>
								<orgName type="laboratory">Information Technology and Modeling Systems Research Unit</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">LIROSA Laboratory Faculty of Science</orgName>
								<orgName type="laboratory">Information Technology and Modeling Systems Research Unit</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="laboratory">Information Technology and Modeling Systems Research</orgName>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="department">Computer Science</orgName>
								<orgName type="laboratory" key="lab1">Operational Research and Applied Unit</orgName>
								<orgName type="laboratory" key="lab2">LIROSA Laboratory Statistics Laboratory Faculty of Science</orgName>
								<orgName type="institution">Abdelmalek Essaadi University</orgName>
							</affiliation>
							<affiliation key="aff4">
								<orgName type="department">Faculty of Science</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<settlement>Morocco Tetuan</settlement>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Noura</forename><surname>Aknin</surname></persName>
							<email>aknin@ieee.org</email>
							<affiliation key="aff0">
								<orgName type="department">LIROSA Laboratory Faculty of Science</orgName>
								<orgName type="laboratory">Information Technology and Modeling Systems Research Unit</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">LIROSA Laboratory Faculty of Science</orgName>
								<orgName type="laboratory">Information Technology and Modeling Systems Research Unit</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="laboratory">Information Technology and Modeling Systems Research</orgName>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="department">Computer Science</orgName>
								<orgName type="laboratory" key="lab1">Operational Research and Applied Unit</orgName>
								<orgName type="laboratory" key="lab2">LIROSA Laboratory Statistics Laboratory Faculty of Science</orgName>
								<orgName type="institution">Abdelmalek Essaadi University</orgName>
							</affiliation>
							<affiliation key="aff4">
								<orgName type="department">Faculty of Science</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<settlement>Morocco Tetuan</settlement>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Souad</forename><surname>Amjad</surname></persName>
							<email>amjad_souad@uae.ma</email>
							<affiliation key="aff0">
								<orgName type="department">LIROSA Laboratory Faculty of Science</orgName>
								<orgName type="laboratory">Information Technology and Modeling Systems Research Unit</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">LIROSA Laboratory Faculty of Science</orgName>
								<orgName type="laboratory">Information Technology and Modeling Systems Research Unit</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="laboratory">Information Technology and Modeling Systems Research</orgName>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="department">Computer Science</orgName>
								<orgName type="laboratory" key="lab1">Operational Research and Applied Unit</orgName>
								<orgName type="laboratory" key="lab2">LIROSA Laboratory Statistics Laboratory Faculty of Science</orgName>
								<orgName type="institution">Abdelmalek Essaadi University</orgName>
							</affiliation>
							<affiliation key="aff4">
								<orgName type="department">Faculty of Science</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<settlement>Morocco Tetuan</settlement>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Kamal</forename><forename type="middle">Eddine</forename><surname>El Kadiri</surname></persName>
							<email>elkadiri@uae.ma</email>
							<affiliation key="aff0">
								<orgName type="department">LIROSA Laboratory Faculty of Science</orgName>
								<orgName type="laboratory">Information Technology and Modeling Systems Research Unit</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">LIROSA Laboratory Faculty of Science</orgName>
								<orgName type="laboratory">Information Technology and Modeling Systems Research Unit</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="laboratory">Information Technology and Modeling Systems Research</orgName>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="department">Computer Science</orgName>
								<orgName type="laboratory" key="lab1">Operational Research and Applied Unit</orgName>
								<orgName type="laboratory" key="lab2">LIROSA Laboratory Statistics Laboratory Faculty of Science</orgName>
								<orgName type="institution">Abdelmalek Essaadi University</orgName>
							</affiliation>
							<affiliation key="aff4">
								<orgName type="department">Faculty of Science</orgName>
								<orgName type="institution">Abdelmalek Essaadi University Tetuan</orgName>
								<address>
									<settlement>Morocco Tetuan</settlement>
									<country key="MA">Morocco</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Using Big Data Classification and Mining for the Decision-making 2.0 Process</title>
					</analytic>
					<monogr>
						<imprint>
							<date/>
						</imprint>
					</monogr>
					<idno type="MD5">A40D1A34C903DD3877C32E27B3A8E078</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-24T11:56+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Web 2.0</term>
					<term>Big Data</term>
					<term>Decision-making</term>
					<term>Data Classification</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Web 2.0 is a revolution that has affected all areas, especially those of the new technology. Several new concepts have emerged, and a large number of innovative applications continue to come out every day. However, the social networking remains the racehorse of web 2.0, giving the user at the same time, a space for communication and for information sharing, which generates too much data, variable and characterized by a great creation speed. So, we can call them big data, and consider them a very rich and interesting basis for decision-making.</p><p>Big Data is a type of data which are characterized by the veracity, important volumes, and increasing variety and velocity, which makes their treatment and their processing by traditional database management tools a very difficult task. To overcome this problem, we opt for the big data classification process.</p><p>In this paper, we make a study of some big data classification methods, which are the most significant to be used to classify big data dedicated to decision-making, we detect their points of strength and weakness. Then we propose a framework summarizing the process of the formulation of the decision from the web 2.0 content, based on the big data classification, and we specify the criteria to be taken into account when choosing the big data classification methods intended for the decision-making.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>I. INTRODUCTION</head><p>The large variety of applications that appeared after the emergence of the web 2.0, produce a huge mass of various and diverse data. This wealth of information is a very important resource that we want to exploit to enrich our decision-making systems, to generate more meaningful and relevant decisions.</p><p>To classify and process data, various algorithms and techniques can be used. These methods differ depending on data types. In the case of big data, to retrieve information, there are various analysis techniques with different orientations and results, such as Representation-learning methods based geometric information, Stream Classification Algorithms, Associative Classifiers, etc.</p><p>In this paper, we discuss some methods that we can use to classify big data in order to elaborate decisions, report the strengths and the weaknesses. And therefore, present our global framework of decision-making 2.0 based on big data classification by describing the key pillars to be considered, to lead well the classification process for the purpose of decision-making.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>II. WEB 2.0</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A. Definition</head><p>The web 2.0 is a combination of technologies, business plans and social skills, which allow users to create web content, and to be more involved in the process of the management of this content. It has brought many creative concepts and techniques that did not exist before and which made the electronic life simpler and more enjoyable <ref type="bibr" target="#b0">[1]</ref> <ref type="bibr" target="#b1">[2]</ref>. With the web 2.0, a new era of web use is born. Several applications have been developed and which have enriched our lives by allowing more of interactivity and collaboration, such as blogs and social networks <ref type="bibr" target="#b2">[3]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>B. Architecture and Principals</head><p>Web 2.0 is based on a varied and robust architecture, founded on the introduction of new principles such as collaboration and interactivity, and the use of new applications like web interface design techniques, those of content syndication, XHTML, URL, etc <ref type="bibr" target="#b3">[4]</ref>.</p><p>There are several emerging principles with the appearance of web 2.0, the most notable:</p><p> Collaboration: This is an important aspect which describes when a user has the opportunity to contribute in the creation of the web content by creating its own content.</p><p> Interactivity: one of the introduced principles by the web 2.0, interactivity is reflected by the interaction of the user with the web content and with other users.</p><p>These two principles constitute new trends that have changed our lives and our way of working, they are the basis of social networks, blogs, wikis, etc.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>III. BIG DATA</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A. Definition</head><p>The term big data refers to data sets exchanged by connected objects in the web, and whose volumes are important and the variety and the velocity are increased <ref type="bibr" target="#b4">[5]</ref>. It is a compilation of data sets which are characterized by complexity and large volume, so their management and processing constitute a difficult task if we use traditional database management tools <ref type="bibr" target="#b5">[6]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>B. Characteristics</head><p>Compared to other types of data, big data are different and have some specifications. These differences concern several facets as the data format, their volume, the time required for their creation, and their nature.</p><p>The principal features are: Data volume, data velocity, data variety, and data veracity. We can consider these elements as the characterizing pillars of big data (Fig. <ref type="figure" target="#fig_0">1</ref>.), and which make their processing and their analysis a special challenge.  Data Volume: refers to a very important quantity of generated information. Data is considered as big data if their size is very large, so we cannot control them to make analysis easily.</p><p> Data Variety: This makes analyzing this type of data a very difficult mission. We have more different data presentation formats: text, audio, image, etc.</p><p> Data Velocity: It refers to the speed of creation and generation of data, which have been increased with the different new web applications.</p><p> Data Veracity: Data veracity refers to the anomalies in data. Veracity in data analysis constitutes the biggest challenge to overcome, because, veracity of data sources can largely affect the precision of analyzes.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>IV. BIG DATA CLASSIFICATION FOR DECISION-MAKING</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A. Clustering</head><p>Clustering (also called Cluster Analysis), is a task of data mining, which means the mission of assembling a set of objects, by the way that, objects which belong to the same group have more similarities than with those belonging to others groups. A group is called a cluster. The clustering was used for the first time in the classification tasks by Cattell in 1943 for personality psychology classification <ref type="bibr" target="#b6">[7]</ref>. Many clustering algorithms exist. Making the choice about which algorithm we must use, depends on the used cluster models <ref type="bibr" target="#b7">[8]</ref>. Among the most distinctive cluster models, we find: Centroid models, Distribution models, Group models, and Connectivity models.</p><p>In addition to its important role in the classification task, clustering has several advantages, such as the definition of information relating to the data, which were not revealed before, as associations, so we can look for new patterns. Also, clustering provides a logical structure which makes results read and interpreted easily. But it is not the case, if we opt for a large scale of clusters, because there are no definitive methods to determine precisely the suitable number of clusters.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>B. Decision Trees</head><p>The decision tree is a technique which we can use for classification tasks, by creating a model to predict the output value based on a number of input values <ref type="bibr" target="#b8">[9]</ref> [10]. To use decision trees for classification, we construct trees starting by the root of the tree, and subsequently, proceeding down to its leaves.</p><p>A classification rule is developed based on example objects, which are known by their values of a collection of attributes. Then, the decision tree is expressed in function of the same attributes <ref type="bibr" target="#b10">[11]</ref>. Decision trees constitute a good way to well represent decisions. An example of a decision tree form is shown in the Fig. <ref type="figure" target="#fig_1">2</ref>. The decision trees are characterized by the robustness and the simplicity of understanding and interpreting. What is important about decision trees is that we can treat categorical and numerical data. On the other hand, decision trees are instable, since a miniature change in the input data can affect the entire tree, by causing large changes in it.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>C. Support Vector Machines</head><p>Support vector machines, more usually SVMs, were introduced the first time for binary classification. They refer to a collection of methods used for regression and classification, to analyze data in order to verify to which category an element belongs <ref type="bibr" target="#b11">[12]</ref>. They can be used in several ways depending on the nature of their application, such as, text categorization, recognition of images, handwriting code, bioinformatics, etc. Some of the advantages of using SVM algorithms are: the robustness, the ability to learn well using a few parameters, and the computational efficiency. On the other hand, apply SVM can at times require taking into consideration many aspects of learning methods <ref type="bibr" target="#b12">[13]</ref>, SVM is oriented to be applicable directly in the case of two-class tasks. For that reason, when we deal with a multi-class task, we must use algorithms that can reduce it to a set of binary problems, or take account of all the classes at once by giving one formulation of optimization for all the data. Different methods of treating multi-class support vector machines continue to emerge <ref type="bibr" target="#b13">[14]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>D. Associative Classification</head><p>Associative classification refers to a classification which is based on the use of association rules, by combining both classification and mining of associations <ref type="bibr">[15] [16]</ref>. Compared to other approaches, it is considered a highly accurate and competitive method, and can be applied in different ways <ref type="bibr">[17] [18]</ref> [19] <ref type="bibr" target="#b19">[20]</ref>. We can define three types of associative classification systems:  Classification by Emerging Patterns: based on emerging patterns from a sample, which means event associations whose supports vary, depending on the dataset <ref type="bibr" target="#b20">[21]</ref>.</p><p> Classification based on High-Order Pattern: is a classification system, which uses the algorithm of high-order pattern discovery, which detects considerable connection or association patterns using residual analysis in statistics <ref type="bibr" target="#b21">[22]</ref>.</p><p> Associative Classifiers based on the Apriori Algorithm: the Apriori Algorithm is an algorithm which proceeds by determining the prevalent items in the database. So, we can define association rules to wrap up trends in the database, many applications in various domains were done using this technique, such as market basket analysis <ref type="bibr" target="#b22">[23]</ref>.</p><p>Associative classification provides a high accuracy and it is easy to understand. However, it presents some challenges, like the lack of obvious criteria to classify objects. Since it is based on a large number of rules, the process of its elaboration is a time-consuming task, and it becomes a difficult task to select the suitable ones to develop the classifier.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>V. BIG DATA CLASSIFICATION AS A BASIS OF DECISION-MAKING 2.0</head><p>A. The Data Generation Process Web 2.0 is a very important source of information. The user interacts continuously with the web content through collaborative applications, such as blogs, social networks, etc. With the increase of the number of actors on the web, the rate of information circulating on its channels increases. This large data flow generates the phenomenon of big data. Hence, web 2.0 is a rich platform of information, which can be treated to generate significant data. The user is primarily a passive actor, becomes in an instant an active actor, by transmitting opinions, which we propose to treat to ensure the mission of decision-making. These opinions can take, for example, the form of:  A solution to a particular problem: a problem can be solved quickly and efficiently if the process of the generation of the solution is collaborative. So the reviews, including those of experts, about an issue may be of great use to make decisions to solve a given problem.</p><p> A feedback to a given subject: any feedback contains in itself a notice that we can use to extract useful information which enriches the process of the decision making.</p><p> A proposal for improvement: in any field, application, or system, we always look for ways of improvement, especially in the case of business.</p><p>Opinions of clients and in particular those which are the most affected by the service, constitute a very important resource of inspiration to make the right decision of improvement.</p><p> A complaint about a process, a product, a service: as with proposals for improvement, complaints also lead to the generation of significant decisions about a product, a process, a service, etc.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>B. Decision-Making 2.0 Based Big Data Classification Model</head><p>To exploit the generated data on the web 2.0, it is necessary to isolate the significant information. Circulating data through the web 2.0 applications such as social networks have the characteristics that make them a part of what is called big data. To process them, we proposed to adopt a classification process.</p><p>When we want to treat data based on the web 2.0 content, in order to make decisions. A simple comment or tweet can generate a large data stream, through feedbacks of users. Taking account of these data in decision-making is very important to harness the collective intelligence.</p><p>After a preliminary process of data streams, to centralize those that meet our study needs, comes the classification phase to derive classified data according to specific parameters that depend on the issue in question. Finally, we get the basis of decision-making. The framework which presents the general process starting with the creation of the data on the web and ending with the decision-making is represented in the Fig. <ref type="figure" target="#fig_2">3</ref>. In the decision-making 2.0 process, the classification serves as a passage from the raw data to the classified ones, which will be used later to generate decisions. Data which circulate across the web, especially in social networks, blogs, etc, are difficult to track and manage. So to overcome this problem, our classification process should follow some specifications to properly carry out this mission.</p><p>Taking into consideration our aim, which is decisionmaking based on the content reflected by the comments and the feedbacks of users, and to provide relevant decision, which must be generated based on meaningful data, our classification process must be efficient and suits our purpose.</p><p>As already mentioned, the classification methods have drawbacks as advantages. That is why, we opt for a combination, to elaborate a multiple classification model to exploit the strengths of the cited methods, taking into account different parameters, as shown in the Fig. <ref type="figure" target="#fig_3">4</ref>.</p><p> Accuracy: the classification process must guarantee high accuracy, to ensure the relevance of our decisions, which is a very important factor for the evaluation of the quality of the decision.</p><p> Facility of understanding: it is essential that classification must be a process that provides results which are easy to understand. It means also, that results must be interpreted without difficulties.</p><p> Flexibility: flexibility is represented by the fact that the classification can take into consideration categorical data, and not just the numerical ones, for more significant and common decisions. In this paper, we gave a vision on the results of a developed study of the big data classification tools, we presented a summary of the results concerning the techniques that we can use to treat data coming from web 2.0, to ensure the decision-making mission. Then, we presented a general framework of the entire process and mentioned the criteria to take into consideration when choosing the classification method.</p><p>To exploit the strengths of the cited methods, we opt for a combination, to develop a multiple classification model, so that we can ensure three pillars of big data classification for a decision-making 2.0 process, which are accuracy, facility of understanding and flexibility.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Fig. 1 .</head><label>1</label><figDesc>Fig. 1. Characterizing Pillars of Big Data</figDesc><graphic coords="2,79.55,574.70,171.75,116.25" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Fig. 2 .</head><label>2</label><figDesc>Fig. 2. A General Form of a Decision Tree</figDesc><graphic coords="3,59.30,54.25,212.15,208.45" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>Fig. 3 .</head><label>3</label><figDesc>Fig. 3. Process of the Generation of the Decision 2.0 Based on the Big Data Classification</figDesc><graphic coords="4,53.65,433.30,223.55,234.00" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_3"><head>Fig. 4 .</head><label>4</label><figDesc>Fig. 4. Pillars of Big Data Classification for a Decision-making 2.0 Process Model</figDesc><graphic coords="4,355.90,455.15,177.00,147.75" type="bitmap" /></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" xml:id="foot_0">Proceedings of the International Conference on Big Data Cloud and Applications Tetuan, Morocco, May 25 -26, 2015</note>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>ACKNOWLEDGMENT</head><p>The authors of this paper would like to thank our Research Team, Information Technology and Modeling Systems Research Unit, and more generally, the Computer Science, Operational Research and Applied Statistics Laboratory, from the Faculty of Science, Abdelmalek Essaadi University of Tetuan, Morocco, for their great support.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">What is Web 2.0: Design patterns and business models for the next generation of software</title>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">O</forename><surname>Reilly</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Communications &amp; strategies</title>
		<imprint>
			<biblScope unit="issue">1</biblScope>
			<biblScope unit="page">17</biblScope>
			<date type="published" when="2007">2007</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Web 2.0 principles and best practices</title>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">O</forename><surname>Reilly</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Musser</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">O&apos;Reilly Radar</title>
				<imprint>
			<date type="published" when="2006">2006</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Structure and evolution of online social networks</title>
		<author>
			<persName><forename type="first">R</forename><surname>Kumar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Novak</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Tomkins</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Link mining: models, algorithms, and applications</title>
				<meeting><address><addrLine>New York</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2010">2010</date>
			<biblScope unit="page" from="337" to="357" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<monogr>
		<author>
			<persName><forename type="first">T</forename><surname>O'reilly</surname></persName>
		</author>
		<title level="m">What is web 2.0</title>
				<imprint>
			<publisher>O&apos;Reilly Media, Inc</publisher>
			<date type="published" when="2009">2009</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<monogr>
		<author>
			<persName><forename type="first">P</forename><surname>Zikopoulos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Eaton</surname></persName>
		</author>
		<title level="m">Understanding big data. Analytics for enterprise class hadoop and streaming data</title>
				<imprint>
			<date type="published" when="2012">2012</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Big data for development: challenges &amp; opportunities</title>
		<author>
			<persName><forename type="first">E</forename><surname>Letouzé</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">UN Global Pulse</title>
		<imprint>
			<biblScope unit="volume">47</biblScope>
			<date type="published" when="2012">2012</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">The description of personality: basic traits resolved into clusters</title>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">B</forename><surname>Cattell</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Abnormal and Social Psychology</title>
		<imprint>
			<biblScope unit="volume">38</biblScope>
			<biblScope unit="page" from="476" to="506" />
			<date type="published" when="1943">1943</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Why so many clustering algorithms -a position paper</title>
		<author>
			<persName><forename type="first">V</forename><surname>Estivill-Castro</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">ACM SIGKDD Explorations Newsletter</title>
		<imprint>
			<biblScope unit="volume">4</biblScope>
			<biblScope unit="issue">1</biblScope>
			<biblScope unit="page" from="65" to="75" />
			<date type="published" when="2002">2002</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<monogr>
		<title level="m" type="main">Data mining with decision trees: theory and applications</title>
		<author>
			<persName><forename type="first">L</forename><surname>Rokach</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2008">2008</date>
			<publisher>World Scientific Pub Co Inc. ISBN</publisher>
			<biblScope unit="page" from="978" to="9812771711" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Decision trees: a recent overview</title>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">B</forename><surname>Kotsiantis</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Artificial Intelligence Review</title>
		<imprint>
			<biblScope unit="volume">39</biblScope>
			<biblScope unit="issue">4</biblScope>
			<biblScope unit="page" from="261" to="283" />
			<date type="published" when="2013">2013</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Induction of decision trees</title>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">R</forename><surname>Quinlan</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Machine learning</title>
		<imprint>
			<biblScope unit="volume">1</biblScope>
			<biblScope unit="issue">1</biblScope>
			<biblScope unit="page" from="81" to="106" />
			<date type="published" when="1986">1986</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<monogr>
		<title level="m" type="main">The nature of statistical learning</title>
		<author>
			<persName><forename type="first">V</forename><forename type="middle">N</forename><surname>Vapnik</surname></persName>
		</author>
		<imprint>
			<date type="published" when="1995">1995</date>
			<publisher>Springer-Verlag</publisher>
			<pubPlace>New York</pubPlace>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<monogr>
		<title level="m" type="main">Support vector machines</title>
		<author>
			<persName><forename type="first">I</forename><surname>Steinwart</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Christmann</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2008">2008</date>
			<publisher>Springer Science &amp; Business Media</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">A comparison of methods for multiclass support vector machines</title>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">W</forename><surname>Hsu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">J</forename><surname>Lin</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE Transactions on</title>
		<imprint>
			<biblScope unit="volume">13</biblScope>
			<biblScope unit="issue">2</biblScope>
			<biblScope unit="page" from="415" to="425" />
			<date type="published" when="2002">2002</date>
		</imprint>
	</monogr>
	<note>Neural Networks</note>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">From association to classification: Inference using weight of evidence</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">K C</forename><surname>Wong</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE Trans. On Knowledge and Data Engineering</title>
		<imprint>
			<biblScope unit="volume">15</biblScope>
			<biblScope unit="issue">3</biblScope>
			<biblScope unit="page" from="764" to="767" />
			<date type="published" when="2003">2003</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">CPAR: Classification based on predictive association rules</title>
		<author>
			<persName><forename type="first">X</forename><surname>Yin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Han</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings 2003 SIAM International Conference on Data Mining(SDM&apos;03)</title>
				<meeting>2003 SIAM International Conference on Data Mining(SDM&apos;03)<address><addrLine>San Francisco, CA</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2003-05">May 2003</date>
			<biblScope unit="page" from="331" to="335" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">CAEP:classification by aggregating emerging patterns</title>
		<author>
			<persName><forename type="first">G</forename><surname>Dong</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Wong</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Li</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of The Second International Conference on Discovery Science (DS&apos;99)</title>
				<meeting>The Second International Conference on Discovery Science (DS&apos;99)<address><addrLine>Japan</addrLine></address></meeting>
		<imprint>
			<date type="published" when="1999-12">December 1999</date>
			<biblScope unit="page" from="43" to="55" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">DeEPS: a new instance-based lazy discovery and classification system</title>
		<author>
			<persName><forename type="first">J</forename><surname>Li</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Dong</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Ramamohanarao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Wong</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Machine Learning</title>
		<imprint>
			<biblScope unit="volume">54</biblScope>
			<biblScope unit="issue">2</biblScope>
			<biblScope unit="page" from="99" to="124" />
			<date type="published" when="2004">2004</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">CMAR: accurate and efficient classification based on multiple class-association rules</title>
		<author>
			<persName><forename type="first">W</forename><surname>Li</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Han</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Pei</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of The 2001 IEEE International Conference on Data Mining (ICDM&apos;01)</title>
				<meeting>The 2001 IEEE International Conference on Data Mining (ICDM&apos;01)<address><addrLine>San Jose, CA</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2001-11">November 2001</date>
			<biblScope unit="page" from="369" to="376" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">Integrating classification and association rule mining</title>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">L W H Y</forename><surname>Ma</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Fourth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</title>
				<meeting>the Fourth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining<address><addrLine>New York, NY</addrLine></address></meeting>
		<imprint>
			<date type="published" when="1998-08">August 1998</date>
			<biblScope unit="page" from="80" to="86" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">Efficient mining of emerging patterns: discovering trends and differences</title>
		<author>
			<persName><forename type="first">G</forename><surname>Dong</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Li</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Fifth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</title>
				<editor>
			<persName><forename type="first">S</forename><surname>Chaudhui</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">D</forename><surname>Madigan</surname></persName>
		</editor>
		<meeting>the Fifth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining<address><addrLine>San Diego, CA</addrLine></address></meeting>
		<imprint>
			<publisher>ACM Press</publisher>
			<date type="published" when="1999">1999</date>
			<biblScope unit="page" from="43" to="52" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<monogr>
		<title level="m" type="main">High-order pattern discovery and analysis of discretevalued data sets</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Wang</surname></persName>
		</author>
		<imprint>
			<date type="published" when="1997">1997</date>
			<pubPlace>Waterloo, Ontario, Canada</pubPlace>
		</imprint>
		<respStmt>
			<orgName>University of Waterloo</orgName>
		</respStmt>
	</monogr>
	<note type="report_type">PhD thesis</note>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">Fast algorithms for mining association rules</title>
		<author>
			<persName><forename type="first">R</forename><surname>Agrawal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Srikant</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. 20th int. conf. very large data bases, VLDB</title>
				<meeting>20th int. conf. very large data bases, VLDB</meeting>
		<imprint>
			<date type="published" when="1994">1994</date>
			<biblScope unit="volume">1215</biblScope>
			<biblScope unit="page" from="487" to="499" />
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
