<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Using SVM and Clustering Algorithms in IDS Systems</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Peter</forename><surname>Scherer</surname></persName>
							<email>peter.scherer@vsb.cz</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Martin</forename><surname>Vicher</surname></persName>
							<email>martin.vicher@vsb.cz</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Pavla</forename><surname>Dráždilová</surname></persName>
							<email>pavla.drazdilova@vsb.cz</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Jan</forename><surname>Martinovič</surname></persName>
							<email>jan.martinovic@vsb.cz</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Jiří</forename><surname>Dvorský</surname></persName>
							<email>jiri.dvorsky@vsb.cz</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Václav</forename><surname>Snášel</surname></persName>
							<email>vaclav.snasel@vsb.cz</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">Department of Computer Science</orgName>
								<orgName type="institution" key="instit1">FEI</orgName>
								<orgName type="institution" key="instit2">VSB -Technical University of Ostrava</orgName>
								<address>
									<addrLine>17. listopadu 15, 708 33</addrLine>
									<settlement>Ostrava-Poruba</settlement>
									<country key="CZ">Czech Republic</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Using SVM and Clustering Algorithms in IDS Systems</title>
					</analytic>
					<monogr>
						<imprint>
							<date/>
						</imprint>
					</monogr>
					<idno type="MD5">886DFF071ECE0F466C8E969BF48A4E03</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-24T18:44+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Intrusion Detection System</term>
					<term>K-means</term>
					<term>Farthest First Traversal</term>
					<term>COB-WEB/CLASSIT</term>
					<term>SVM</term>
					<term>clustering</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Intrusion Detection System (IDS) is a system, that monitors network traffic and tries to detect suspicious activity. In this paper we discuss the possibilities of application of clustering algorithms and Support Vector Machines (SVM) for use in the IDS. There we used K-means, Far-thestFirst and COBWEB algorithms as clustering algorithms and SVM as classification SVM of type 1, known too as C-SVM. By appropriate choosing of kernel and SVM parameters we achieved improvements in detection of intrusion to system. Finally, we experimentally verified the efficiency of applied algorithms in IDS.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introduction</head><p>Three criteria are important for computer systems security: confidentiality, integrity and availability. Computer security is defined as a protection against threads for these criteria. The major manners of computer security are techniques like user authentication, data encryption, avoiding programming errors and firewalls. They are known as first line of defense. The last line of defense is used Intrusion Detection System (IDS). An Intrusion Detection System is software application (device respectively) that monitors network and system activities for malicious attempts, threads or policy violations and produces reports and statistics. Several machine-learning paradigms including soft computing approach <ref type="bibr" target="#b1">[2]</ref>, neural networks and fuzzy inference system <ref type="bibr" target="#b10">[11]</ref>, genetic algorithms <ref type="bibr" target="#b12">[14]</ref>, Bayesian network, matrix factorization approach <ref type="bibr" target="#b14">[16]</ref>, multivariate adaptive regression splines etc. have been investigated for the design of IDS. In this paper we investigate and evaluate the performance of Farthest First Traversal, K-means, COBWEB/CLASSIT clustering algorithms and classification via Support Vector Machines. The motivation for using the clustering algorithms and SVM is to improve the accuracy of the Intrusion Detection System.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">Clustering Algorithms and Their Classification</head><p>Cluster analysis is the process of grouping the objects (usually represented as a vector of measurements, or a point in a multidimensional space) so that the objects of one cluster are similar to each other whereas objects of different clusters are dissimilar.</p><p>Clustering is the unsupervised classification of objects (observations, data items, instances, cases, patterns, or feature vectors) into groups, clusters. In <ref type="bibr" target="#b3">[4]</ref> author cite that from a machine learning perspective, clusters correspond to hidden patterns, the search for clusters is unsupervised learning, and the resulting system represents a data concept. Therefore, clustering is unsupervised learning of a hidden data concept.</p><p>The applications of clustering often deal with large datasets and data with many attributes. Clustering is related to many other fields. The classic introduction to clustering in pattern recognition is given in <ref type="bibr" target="#b6">[7]</ref>. Machine learning clustering algorithms were applied to image segmentation and computer vision <ref type="bibr" target="#b11">[12]</ref>. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1">Classification of Clustering Algorithms</head><p>The various clustering algorithms can be classified according to how they create clusters of objects. Such division of clustering algorithms is shown in Fig. <ref type="figure" target="#fig_0">1</ref>.</p><p>For our intention of using the clustering algorithms in an IDS, we need algorithms that can determine the jurisdiction of the object X to cluster, even if the object X was not included in the set of objects, from which we generate clusters. For this purpose we chose the algorithms K-means, Farthest First Traversal (they are partitional algorithms) and Cobweb/CLASSIT (this is a conceptual clustering algorithm).</p><p>Partitional Algorithms Partitional algorithms divide the objects into several disjoint sets and creates a one level of non-overlapping clusters. But the problem is to determine how many clusters has algorithm detect.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Algorithms of Conceptual Clustering</head><p>Algorithms of conceptual clustering create by incremental way, the structure of the data by division of observed objects into subclasses. The result of these algorithms is a classification tree. Each node of the tree contains the objects of its child nodes, so root of this tree contains a all objects. According to the above classification are a these algorithms hierarchical, incremental algorithms that combine both -aggregation and division approach.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2">Farthest First Traversal</head><p>Farthest first traversal (FFT) algorithm is partitional clustering algorithm. This algorithm first select K objects as the centers of clusters and then assign other objects into the cluster (according to measure of dissimilarity to centers of the clusters). The first center of cluster is chosen randomly, the second center of cluster as most dissimilar to first center of cluster and every other center of cluster is chosen as the one whose value of measure of dissimilarity <ref type="bibr" target="#b8">[9]</ref> to the previously selected centers of the clusters is greatest.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.3">K-means</head><p>Algorithm K-means, according to the classification above is partitional clustering algorithm. The main idea of the algorithm is to find K centers (one for each cluster) of clusters. The question is, how choose these centers of clusters, because this choice will significantly affect the resulting clusters. The best would be to pick center of cluster least similar to each other. The next step is assign each object from data set to the center of cluster, to which is most similar. Once this occurs, the next step in the classification is to determine the new center of each cluster (centers are derived from clusters of objects). Again, is performed the classification of objects into different clusters according to their dissimilarity <ref type="bibr" target="#b8">[9]</ref> with new centers of clusters. These steps are repeated until we find out that centers of clusters no longer change or until is achieved maximum number of repetitions.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.4">COBWEB/CLASSIT</head><p>This incremental clustering algorithm creates a hierarchical structure of clusters by using four operators (operator for creating a new cluster, inserting an object into an existing cluster, union of two clusters into one cluster and splitting cluster into two clusters) <ref type="bibr" target="#b7">[8]</ref> and the categorization utility <ref type="bibr" target="#b13">[15]</ref>. When processing object into the cluster is always used one of the operators, but always are tested all four operators and categorization utility evaluate distribution of clusters after applying one of the operator. Finally, as the resulting distribution is chosen distribution that was evaluated (by using a categorization utility) as the best.</p><p>3 Classification SVM of type 1 (C-SVM) and their parameters  </p><formula xml:id="formula_0">1 2 w 2 + C i ε i (1)</formula><p>with respect to: 1</p><formula xml:id="formula_1">− ε i − y i (w • x i + b) ≤ 0, −ε i ≤ 0, i = 1, 2 . . . , m</formula><p>Regardless of having some elements misclassified (Fig. <ref type="figure" target="#fig_2">2</ref>) it is possible to balance between the incorrectly classified instances and the width of the separating margin. In this context, the positive slack variables ε i and the penalty parameter C are introduced. Slacks represents the distances of misclassified points to the initial hyper-plane, while parameter C models the penalty for misclassified training points, that trades-off the margin size for the number of erroneous classifications (bigger the C smaller the number of misclassifications and smaller the margin). The goal is to find a hyper-plane that minimizes misclassification errors while maximizing the margin between classes. This optimization problem is usually solved in its dual form (dual space of Lagrange multipliers):</p><formula xml:id="formula_2">w * = m i=1 α i y i x i<label>(2)</label></formula><p>where C ≥ α i ≥ 0, i = 1, . . . , m, and where w * is a linear combination of training examples for an optimal hyper-plane. However, it can be shown that w * represents a linear combination of Support Vectors x i for which the corresponding α i Langrangian multipliers are non-zero values. Support Vectors for which C &gt; α i &gt; 0 condition holds, belong either to h 1 or h −1 . Let x a and x b be two such Support Vectors (C &gt; α a , α b &gt; 0) for which y a = 1 and y b = −1. Now b could be calculated from b * = 0.5w * (x a + x b ), so that classification (decision) function finally becomes:</p><formula xml:id="formula_3">f (x) = sgn m i=1 α i y i (x i • x) + b *<label>(3)</label></formula><p>To solve non-linear classification , one can propose the mapping of instances to a so-called feature space of very high dimension: ϕ : R n → R d , n d i.e. x → ϕ(x). The basic idea of this mapping into a high dimensional space is to transform the non-linear case into linear and then use the general algorithm already explained above Eqs. (1), (2), and (3). In such space, dot-product from Eq. (3) transforms into ϕ(x i ) • ϕ(x). A certain class of functions called kernels <ref type="bibr" target="#b5">[6]</ref> for which k(x, y) = ϕ(x) • ϕ(y) holds, are called kernels. They represent dot-products in some high dimensional dot-product spaces (feature spaces), and yet could be easily recomputed into the original space. As example was chosen a Radial Basis Function Eq. ( <ref type="formula" target="#formula_4">4</ref>), also known as Gaussian kernel <ref type="bibr" target="#b0">[1]</ref>, and was one of implemented kernels in the experimenting procedure.</p><formula xml:id="formula_4">k(x, y) = exp(−γ x − y 2 )<label>(4)</label></formula><p>Now Eq. ( <ref type="formula" target="#formula_3">3</ref>) becomes:</p><formula xml:id="formula_5">f (x) = sgn m i=1 α i y i k(x i • x) + b *<label>(5)</label></formula><p>After removing all training data that are not Support Vectors and retraining the classifier, the same result would be obtained <ref type="bibr" target="#b5">[6]</ref> by applying the function above. Thus, one depicted, Support Vectors could replace the entire training set, which is the central idea of SVM implementation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Experiments</head><p>The data used for training and testing was prepared by the Agency DARPA intrusion detection evaluation program in 1998 at MIT Lincoln Labs [13]. Experiments were performed on a collection containing five pairs of data sets: the learning set (5092 vectors of 42 attributes) and testing set (6890 vectors of 42 attributes). Each pair represents a learning and testing data for one type of five classes of network attacks. Individual vectors describing the network traffic are described by 41 attributes (range 0 − 1, is therefore not necessary to normalization). The 42 nd attribute was used in learning process. The attribute determines type of network attack in the question. In the case of testing, the existence of the attribute was neglected. We measure only classification accuracy of the vector, that describes the network attack.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1">Classification Using SVM type 1 (C-SVM)</head><p>It is necessary to determine the appropriate combination of parameters C and γ for better efficiency. In our experiment, the parameter C is in the range of 2 −5 and 2 15 in increments of powers of 2 and a parameter γ is in the range of 2 −15 and 2 3 in increments of powers of 2. We used 110 combinations of parameters C γ in total. In the case of same results of prediction with different parameters C and γ, the combination of parameters with the lowest time-intensive calculation model was chosen. In Tables 1,2, 3, and 4 is possible to see the best result combination.</p><p>The four most utilized kernel functions (linear, polynomial, RBF and sigmoid) was used for process of learning. As technology, we used library LibSVM <ref type="bibr" target="#b4">[5]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2">Classification with Algorithm Farthest First Traversal</head><p>During experiments with the algorithm Farthest First Traversal we tried to reveal the effect of number of generated clusters on success rate of the classification of network traffic, and on training time. The measure used by this algorithm was cosine measure. Tables <ref type="table" target="#tab_5">5 and 6</ref> shows results of each experiments with algorithm FFT. Of these it is possible to deduce that the time of training increases with the number of generated clusters. We tried to optimize this algorithm by using data structure KD-tree. Training time of this algorithm with and without using of KD-tree is shown in Tables <ref type="table" target="#tab_5">5 and 6</ref>. As you can see in the Tables <ref type="table" target="#tab_5">5 and 6</ref> training time of this algorithm with using KD-tree was reduced by almost half. Table <ref type="table" target="#tab_6">7</ref> presents the results of the algorithm FFT with using a KD-tree for each class of attack. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3">Classification with Algorithm K-means</head><p>During experiments with algorithm K-means we tried to reveal the influence of the number of generated clusters on training time and success rate of the network traffic classification. The measure that was used by this algorithm was cosine measure. In Tables 8, 9 and 10 are shown results for each experiment. Of these it is possible to deduce that the time of training is increasing with the number of generated clusters. We tried to optimize this algorithm by using data structure KD-tree. Training time of this algorithm with and without using of KD-tree is shown in Tables <ref type="table" target="#tab_8">8 and 9</ref>. As you can see in the Tables <ref type="table" target="#tab_8">8 and 9</ref>, training time of this algorithm with using KD-tree not declined as significantly as at algorithm FFT. For certain number of generated clusters was training time even worse than at algorithm without using KD-tree. This is due overhead of creating KD-tree in each iteration of the algorithm and for a small number of generated clusters is more effective search cluster, where object fall, sequentially than by using KD-tree. Table <ref type="table" target="#tab_9">10</ref> presents the results of algorithm K-means using a KD-tree for each class of attack.  </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5">Conclusion</head><p>In this paper we have described the method for the illustrated prediction accuracy by using clustering algorithms and SVM in the IDS. In Table <ref type="table" target="#tab_12">13</ref> for each used algorithm is shown success rate for each class of attack. The best average success rate has SVM algorithm, more than 99% (best of all is algorithm SVM that is using the RBF kernel, it has a success rate 99.722%). The average success rate of other algorithms was between 91.228% and 98.998%. It will be useful to compare these two methods on other document collections. In our future work we will investigate other kernel functions to search for better attacks prediction in the IDS, SVM paralelization and optimalization clustering algorithms.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Fig. 1 .</head><label>1</label><figDesc>Fig. 1. A taxonomy of clustering approaches.</figDesc><graphic coords="2,45.94,271.41,346.42,101.51" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>3. 1</head><label>1</label><figDesc>Support Vector Machines Classifier Support Vector Machine (SVM) is a preferably technique for linear binary data classification. In [10] authors state that a classification task usually involves separating data into training and testing sets. Each instance in the training set contains one target value (i.e. the class labels) and several attributes (i.e. the features or observed variables). The goal of SVM is to produce a model (based on the training data) which predicts the target values of the test data given only the test data attributes.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>Fig. 2 .</head><label>2</label><figDesc>Fig. 2. General linear binary classification case.</figDesc><graphic coords="4,105.57,219.81,227.17,195.08" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1 .</head><label>1</label><figDesc>Classification using linear kernel.</figDesc><table><row><cell cols="3">Attack type Training time C</cell><cell>γ</cell><cell>Accuracy</cell></row><row><cell></cell><cell>(s)</cell><cell></cell><cell></cell><cell>(%)</cell></row><row><cell>Normal</cell><cell>0.71</cell><cell cols="2">2 −1 2 −1</cell><cell>99.55</cell></row><row><cell>Probe</cell><cell>0.25</cell><cell cols="2">2 3 2 −1</cell><cell>99.81</cell></row><row><cell>DOS</cell><cell>0.35</cell><cell cols="2">2 7 2 −3</cell><cell>99.81</cell></row><row><cell>U2R</cell><cell>0.17</cell><cell cols="2">2 3 2 −3</cell><cell>99.80</cell></row><row><cell>R2L</cell><cell>0.35</cell><cell cols="2">2 5 2 −5</cell><cell>99.64</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2 .</head><label>2</label><figDesc>Classification using polynomial kernel.</figDesc><table><row><cell cols="3">Attack type Training time C</cell><cell>γ</cell><cell>Accuracy</cell></row><row><cell></cell><cell>(s)</cell><cell></cell><cell></cell><cell>(%)</cell></row><row><cell>Normal</cell><cell>0.78</cell><cell cols="2">2 13 2 −7</cell><cell>99.83</cell></row><row><cell>Probe</cell><cell>0.24</cell><cell cols="2">2 −3 2 −1</cell><cell>99.81</cell></row><row><cell>DOS</cell><cell>0.47</cell><cell cols="2">2 9 2 −5</cell><cell>97.18</cell></row><row><cell>U2R</cell><cell>0.16</cell><cell cols="2">2 15 2 −5</cell><cell>99.80</cell></row><row><cell>R2L</cell><cell>0.24</cell><cell cols="2">2 15 2 −5</cell><cell>99.71</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 3 .</head><label>3</label><figDesc>Classification using RBF kernel.</figDesc><table><row><cell cols="3">Attack type Training time C</cell><cell>γ</cell><cell>Accuracy</cell></row><row><cell></cell><cell>(s)</cell><cell></cell><cell></cell><cell>(%)</cell></row><row><cell>Normal</cell><cell>0.88</cell><cell cols="2">2 1 2 −3</cell><cell>99.87</cell></row><row><cell>Probe</cell><cell>0.26</cell><cell cols="2">2 5 2 −5</cell><cell>99.90</cell></row><row><cell>DOS</cell><cell>0.29</cell><cell cols="2">2 15 2 −7</cell><cell>99.88</cell></row><row><cell>U2R</cell><cell>0.18</cell><cell cols="2">2 9 2 −3</cell><cell>99.83</cell></row><row><cell>R2L</cell><cell>0.37</cell><cell cols="2">2 13 2 −7</cell><cell>99.75</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Table 4 .</head><label>4</label><figDesc>Classification using sigmoid kernel.</figDesc><table><row><cell cols="3">Attack type Training time C</cell><cell>γ</cell><cell>Accuracy</cell></row><row><cell></cell><cell>(s)</cell><cell></cell><cell></cell><cell>(%)</cell></row><row><cell>Normal</cell><cell>0.95</cell><cell cols="2">2 5 2 −5</cell><cell>99.58</cell></row><row><cell>Probe</cell><cell>0.38</cell><cell cols="2">2 7 2 −5</cell><cell>99.88</cell></row><row><cell>DOS</cell><cell>0.43</cell><cell cols="2">2 15 2 −9</cell><cell>99.83</cell></row><row><cell>U2R</cell><cell>0.20</cell><cell cols="2">2 5 2 −3</cell><cell>99.83</cell></row><row><cell>R2L</cell><cell>0.42</cell><cell cols="2">2 11 2 −7</cell><cell>99.65</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_4"><head>Table 5 .</head><label>5</label><figDesc>Results of algorithm FFT for class of attack Normal without using KD-Tree.</figDesc><table><row><cell cols="3">Number of clusters Training time Accuracy</cell></row><row><cell></cell><cell>(s)</cell><cell>(%)</cell></row><row><cell>10</cell><cell>2.99</cell><cell>74.82</cell></row><row><cell>20</cell><cell>6.89</cell><cell>74.73</cell></row><row><cell>30</cell><cell>8.42</cell><cell>81.86</cell></row><row><cell>40</cell><cell>12.72</cell><cell>77.90</cell></row><row><cell>50</cell><cell>15.21</cell><cell>77.29</cell></row><row><cell>100</cell><cell>25.24</cell><cell>82.03</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_5"><head>Table 6 .</head><label>6</label><figDesc>Results of algorithm FFT for class of attack Normal with using KD-Tree.</figDesc><table><row><cell cols="3">Number of clusters Training time Accuracy</cell></row><row><cell></cell><cell>(s)</cell><cell>(%)</cell></row><row><cell>10</cell><cell>1.64</cell><cell>74.82</cell></row><row><cell>20</cell><cell>5.38</cell><cell>74.73</cell></row><row><cell>30</cell><cell>4.54</cell><cell>81.86</cell></row><row><cell>40</cell><cell>5.96</cell><cell>77.90</cell></row><row><cell>50</cell><cell>7.51</cell><cell>77.29</cell></row><row><cell>100</cell><cell>16.31</cell><cell>82.03</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_6"><head>Table 7 .</head><label>7</label><figDesc>Results of algorithm FFT for each class of attack with using KD-Tree.</figDesc><table><row><cell cols="3">Attack type Training time Accuracy</cell></row><row><cell></cell><cell>(s)</cell><cell>(%)</cell></row><row><cell>Normal</cell><cell>5.96</cell><cell>84.92</cell></row><row><cell>Probe</cell><cell>5.94</cell><cell>98.77</cell></row><row><cell>DOS</cell><cell>6.18</cell><cell>82.64</cell></row><row><cell>U2R</cell><cell>5.85</cell><cell>95.04</cell></row><row><cell>R2L</cell><cell>5.99</cell><cell>99.27</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_7"><head>Table 8 .</head><label>8</label><figDesc>Results of algorithm K-means for class of attack Normal without using KD-Tree.</figDesc><table><row><cell cols="3">Number of clusters Training time Accuracy</cell></row><row><cell></cell><cell>(s)</cell><cell>(%)</cell></row><row><cell>10</cell><cell>29.53</cell><cell>94.71</cell></row><row><cell>20</cell><cell>46.69</cell><cell>99.93</cell></row><row><cell>30</cell><cell>60.89</cell><cell>98.64</cell></row><row><cell>40</cell><cell>74.88</cell><cell>99.62</cell></row><row><cell>50</cell><cell>82.24</cell><cell>99.46</cell></row><row><cell>100</cell><cell>147.70</cell><cell>98.27</cell></row><row><cell cols="3">4.4 Classification with Algorithm COBWEB/CLASSIT</cell></row><row><cell cols="3">To achieve the best success rate is necessary to determine values of parameters Acuity and Cutoff. These parameters must be selected manually and is not known method how select the best combination. Based on experiments with the values of these parameters, when the values for the parameter Acuity were changed in the interval 0.225 to 0.01 with step 0.025 with the constant value of parameter Cutoff 0.1 and experiments when parameter Acuity had constant value 0.</cell></row></table><note>1 and values of parameter Cutoff were changed in the interval 0.1 − 1 with step 0.1. We have chosen values for parameter Acuity 0.1 and for parameter Cutoff 0.6. Table11shown the results of the algorithm COBWEB/CLASSIT for each class of attack.</note></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_8"><head>Table 9 .</head><label>9</label><figDesc>Results of algorithm K-means for class of attack Normal with using KD-Tree.</figDesc><table><row><cell cols="3">Number of clusters Training time Accuracy</cell></row><row><cell></cell><cell>(s)</cell><cell>(%)</cell></row><row><cell>10</cell><cell>36.21</cell><cell>94.71</cell></row><row><cell>20</cell><cell>49.83</cell><cell>99.93</cell></row><row><cell>30</cell><cell>56.92</cell><cell>98.64</cell></row><row><cell>40</cell><cell>67.88</cell><cell>99.62</cell></row><row><cell>50</cell><cell>71.20</cell><cell>99.46</cell></row><row><cell>100</cell><cell>107.68</cell><cell>98.27</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_9"><head>Table 10 .</head><label>10</label><figDesc>Results of algorithm K-means for each class of attack with using KD-Tree.</figDesc><table><row><cell cols="3">Attack type Training time Accuracy</cell></row><row><cell></cell><cell>(s)</cell><cell>(%)</cell></row><row><cell>Normal</cell><cell>71.80</cell><cell>99.46</cell></row><row><cell>Probe</cell><cell>79.14</cell><cell>98.19</cell></row><row><cell>DOS</cell><cell>98.59</cell><cell>99.91</cell></row><row><cell>U2R</cell><cell>95.04</cell><cell>99.97</cell></row><row><cell>R2L</cell><cell>101.11</cell><cell>97.46</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_10"><head>Table 11 .</head><label>11</label><figDesc>Results of algorithm COBWEB/CLASSIT for each class of attack.</figDesc><table><row><cell cols="3">Attack type Training time Accuracy</cell></row><row><cell></cell><cell>(s)</cell><cell>(%)</cell></row><row><cell>Normal</cell><cell>284.72</cell><cell>83.73</cell></row><row><cell>Probe</cell><cell>356.98</cell><cell>97.79</cell></row><row><cell>DOS</cell><cell>260.07</cell><cell>83.12</cell></row><row><cell>U2R</cell><cell>265.33</cell><cell>93.58</cell></row><row><cell>R2l</cell><cell>216.78</cell><cell>97.92</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_11"><head>Table 12 .</head><label>12</label><figDesc>Classification using SVM.</figDesc><table><row><cell>Attack type</cell><cell>SVM kernel</cell></row><row><cell></cell><cell>linear polynomial RBF sigmoid</cell></row><row><cell>Normal</cell><cell>99.550 99.830 99.870 99.580</cell></row><row><cell>Probe</cell><cell>99.810 99.810 99.900 99.880</cell></row><row><cell>DOS</cell><cell>99.810 97.180 99.880 99.830</cell></row><row><cell>U2R</cell><cell>99.800 99.800 99.830 99.830</cell></row><row><cell>R2L</cell><cell>99.640 99.710 99.750 99.650</cell></row><row><cell>Average</cell><cell>99.722 99.266 99.846 99.754</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_12"><head>Table 13 .</head><label>13</label><figDesc>Classification using clustering algorithm.</figDesc><table><row><cell cols="3">Attack type FFT K-means COBWEB/CLASSIT</cell></row><row><cell>Normal</cell><cell>84.92 99.46</cell><cell>83.73</cell></row><row><cell>Probe</cell><cell>98.77 98.19</cell><cell>97.79</cell></row><row><cell>DOS</cell><cell>82.64 99.91</cell><cell>83.12</cell></row><row><cell>U2R</cell><cell>95.04 99.97</cell><cell>93.58</cell></row><row><cell>R2L</cell><cell>99.27 97.46</cell><cell>97.92</cell></row><row><cell>Average</cell><cell>92.128 98.998</cell><cell>91.228</cell></row></table></figure>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgment</head><p>This work is partially supported by Grant of Grant Agency of Czech Republic No. 205/09/1079, and SGS, VSB -Technical University of Ostrava, Czech Republic, under the grant No. SP2011/172.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<monogr>
		<title level="m" type="main">Support Vector Machines for pattern classification</title>
		<author>
			<persName><forename type="first">S</forename><surname>Abe</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2005">2005</date>
			<publisher>Springer</publisher>
			<pubPlace>London</pubPlace>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Soft Computing Models for Network Intrusion Detection Systems</title>
		<author>
			<persName><forename type="first">A</forename><surname>Abraham</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Jain</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Classification and Clustering for Knowledge Discovery Studies in Computational Intelligence</title>
				<imprint>
			<date type="published" when="2005">2005</date>
			<biblScope unit="page" from="191" to="207" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<monogr>
		<title level="m" type="main">Initializing k-means using genetic algorithms</title>
		<author>
			<persName><forename type="first">B</forename><surname>Al-Shboul</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S.-H</forename><surname>Myaeng</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2009">2009</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">A Survey of Clustering Data Mining Techniques</title>
		<author>
			<persName><forename type="first">P</forename><surname>Berkhin</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Grouping Multidimensional Data</title>
				<imprint>
			<date type="published" when="2002">2002</date>
			<biblScope unit="page" from="25" to="71" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<monogr>
		<title level="m" type="main">LIBSVM: a library for support vector machines</title>
		<author>
			<persName><forename type="first">Chih-Chung</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Chih-Jen</forename><surname>Lin</surname></persName>
		</author>
		<ptr target="http://www.csie.ntu.edu.tw/~cjlin/libsvm" />
		<imprint>
			<date type="published" when="2001">2001</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<monogr>
		<title level="m" type="main">An Introduction to Support Vector Machines and other kernel-based learning methods</title>
		<author>
			<persName><forename type="first">N</forename><surname>Cristiani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Shawe-Taylor</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2000">2000</date>
			<publisher>Cambridge University Press</publisher>
			<pubPlace>Cambridge</pubPlace>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<monogr>
		<title level="m" type="main">Pattern Classification and Scene Analysis</title>
		<author>
			<persName><forename type="first">R</forename><surname>Duda</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Hart</surname></persName>
		</author>
		<imprint>
			<date type="published" when="1973">1973</date>
			<publisher>John Wiley &amp; Sons</publisher>
			<pubPlace>New York</pubPlace>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<monogr>
		<title level="m" type="main">Knowledge Acquisition Via Incremental Conceptual Clustering</title>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">H</forename><surname>Fisher</surname></persName>
		</author>
		<imprint>
			<date type="published" when="1987">1987</date>
			<publisher>Kluwer Academic Publisher</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Data Clustering Theory</title>
		<author>
			<persName><forename type="first">G</forename><surname>Gan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Ma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Wu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Algorithms and Applications</title>
				<imprint>
			<publisher>ASASIAM</publisher>
			<date type="published" when="2007">2007</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">A Practical Guide to Support Vector Classification</title>
		<author>
			<persName><forename type="first">C</forename><surname>Hsu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Lin</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">journal Bioinformatics</title>
		<imprint>
			<date type="published" when="2003">2003</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Adaptive Neuro-Fuzzy Intrusion Detection Systems</title>
		<author>
			<persName><forename type="first">S</forename><surname>Chavan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Shah</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Dave</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Mukherjee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Abraham</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Sanyal</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">International Conference on Information Technology: Coding and Computing (ITCC&apos;04)</title>
				<imprint>
			<date type="published" when="2004">2004</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Image segmentation using clustering</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">K</forename><surname>Jain</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">J</forename><surname>Flynn</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Advances in Image Understanding: A Festschrift for Azriel Rosenfeld</title>
				<imprint>
			<publisher>IEEE Press</publisher>
			<date type="published" when="1996">1996</date>
			<biblScope unit="page" from="65" to="83" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Survey: Using Genetic Algorithm Approach in Intrusion Detection Systems Techniques</title>
		<author>
			<persName><forename type="first">S</forename><surname>Owais</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Snasel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Kromer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Abraham</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Computer Information Systems and Industrial Management Applications</title>
				<imprint>
			<date type="published" when="2008">2008</date>
			<biblScope unit="page" from="300" to="307" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<monogr>
		<title level="m" type="main">Incremental hierarchical clustering of text documents</title>
		<author>
			<persName><forename type="first">N</forename><surname>Sahoo</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2006">2006</date>
			<publisher>Jamie Callan</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">Matrix Factorization Approach for Feature Deduction and Design of Intrusion Detection Systems</title>
		<author>
			<persName><forename type="first">V</forename><surname>Snasel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Platos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Kromer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Abraham</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">The Fourth International Conference on Information Assurance and Security</title>
				<imprint>
			<date type="published" when="2008">2008</date>
			<biblScope unit="page" from="172" to="179" />
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
