<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Diagnosing Advanced Persistent Threats: A Position Paper</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Rui</forename><surname>Abreu</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">Palo Alto Research Center</orgName>
								<address>
									<addrLine>3333 Coyote Hill Rd</addrLine>
									<postCode>94304</postCode>
									<settlement>Palo Alto</settlement>
									<region>CA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Danny</forename><surname>Bobrow</surname></persName>
							<email>bobrow@parc.com</email>
							<affiliation key="aff0">
								<orgName type="institution">Palo Alto Research Center</orgName>
								<address>
									<addrLine>3333 Coyote Hill Rd</addrLine>
									<postCode>94304</postCode>
									<settlement>Palo Alto</settlement>
									<region>CA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Hoda</forename><surname>Eldardiry</surname></persName>
							<email>hoda.eldardiry@parc.com</email>
							<affiliation key="aff0">
								<orgName type="institution">Palo Alto Research Center</orgName>
								<address>
									<addrLine>3333 Coyote Hill Rd</addrLine>
									<postCode>94304</postCode>
									<settlement>Palo Alto</settlement>
									<region>CA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Alexander</forename><surname>Feldman</surname></persName>
							<email>afeldman@parc.com</email>
							<affiliation key="aff0">
								<orgName type="institution">Palo Alto Research Center</orgName>
								<address>
									<addrLine>3333 Coyote Hill Rd</addrLine>
									<postCode>94304</postCode>
									<settlement>Palo Alto</settlement>
									<region>CA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">John</forename><surname>Hanley</surname></persName>
							<email>john.hanley@parc.com</email>
							<affiliation key="aff0">
								<orgName type="institution">Palo Alto Research Center</orgName>
								<address>
									<addrLine>3333 Coyote Hill Rd</addrLine>
									<postCode>94304</postCode>
									<settlement>Palo Alto</settlement>
									<region>CA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Tomonori</forename><surname>Honda</surname></persName>
							<email>tomo.honda@parc.com</email>
							<affiliation key="aff0">
								<orgName type="institution">Palo Alto Research Center</orgName>
								<address>
									<addrLine>3333 Coyote Hill Rd</addrLine>
									<postCode>94304</postCode>
									<settlement>Palo Alto</settlement>
									<region>CA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Johan</forename><surname>De Kleer</surname></persName>
							<email>dekleer@parc.com</email>
							<affiliation key="aff0">
								<orgName type="institution">Palo Alto Research Center</orgName>
								<address>
									<addrLine>3333 Coyote Hill Rd</addrLine>
									<postCode>94304</postCode>
									<settlement>Palo Alto</settlement>
									<region>CA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Alexandre</forename><surname>Perez</surname></persName>
							<email>aperez@parc.com</email>
							<affiliation key="aff0">
								<orgName type="institution">Palo Alto Research Center</orgName>
								<address>
									<addrLine>3333 Coyote Hill Rd</addrLine>
									<postCode>94304</postCode>
									<settlement>Palo Alto</settlement>
									<region>CA</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Dave</forename><surname>Archer</surname></persName>
							<affiliation key="aff1">
								<orgName type="institution">Galois</orgName>
								<address>
									<addrLine>Inc., 421 SW 6th Avenue, Suite 300</addrLine>
									<postCode>97204</postCode>
									<settlement>Portland</settlement>
									<region>OR</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">David</forename><surname>Burke</surname></persName>
							<email>davidb@galois.com</email>
							<affiliation key="aff1">
								<orgName type="institution">Galois</orgName>
								<address>
									<addrLine>Inc., 421 SW 6th Avenue, Suite 300</addrLine>
									<postCode>97204</postCode>
									<settlement>Portland</settlement>
									<region>OR</region>
									<country key="US">USA</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Diagnosing Advanced Persistent Threats: A Position Paper</title>
					</analytic>
					<monogr>
						<imprint>
							<date/>
						</imprint>
					</monogr>
					<idno type="MD5">D783B7717448E3768AC0604E4F2B3B37</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-19T16:00+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>When a computer system is hacked, analyzing the root-cause (for example entry-point of penetration) is a diagnostic process. An audit trail, as defined in the National Information Assurance Glossary, is a securityrelevant chronological (set of) record(s), and/or destination and source of records that provide evidence of the sequence of activities that have affected, at any time, a specific operation, procedure, or event. After detecting an intrusion, system administrators manually analyze audit trails to both isolate the root-cause and perform damage impact assessment of the attack. Due to the sheer volume of information and low-level activities in the audit trails, this task is rather cumbersome and time intensive. In this position paper, we discuss our ideas to automate the analysis of audit trails using machine learning and model-based reasoning techniques. Our approach classifies audit trails into the high-level activities they represent, and then reasons about those activities and their threat potential in real-time and forensically. We argue that, by using the outcome of this reasoning to explain complex evidence of malicious behavior, we are equipping system administrators with the proper tools to promptly react to, stop, and mitigate attacks.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introduction</head><p>Today, enterprise system and network behaviors are typically "opaque": stakeholders lack the ability to assert causal linkages in running code, except in very simple cases. At best, event logs and audit trails can offer some partial information on temporally and spatially localized events as seen from the viewpoint of individual applications. Thus current techniques give operators little system-wide situational awareness, nor any viewpoint informed by a long-term perspective. Adversaries have taken advantage of this opacity by adopting a strategy of persistent, low-observability operation from inside the system, hiding effectively through the use of long causal chains of system and application code. We call such adversaries advanced persistent threats, or APTs.</p><p>To address current limitations, this position paper discusses a technique that aims to track causality across the enterprise and over extended periods of time, identify subtle causal chains that represent malicious behavior, localize the code at the roots of such behavior, trace the effects of other malicious actions descended from those roots, and make recommendations on how to mitigate those effects. By doing so, the proposed approach aims to enable stakeholders to understand and manage the activities going on in their networks. The technique exploits both current and novel forms of local causality to construct higher-level observations, long-term causality in system information flow. We propose to use a machine learning approach to classify segments of low-level events by the activities they represent, and reasons over these activities, prioritizing candidate activities for investigation. The diagnostic engine investigates these candidates looking for patterns that may represent the presence of APTs. Using pre-defined security policies and related mitigations, the approach explains discovered APTs and recommends appropriate mitigations to operators. We plan to leverage models of APT and normal business logic behavior to diagnose such threats. Note that the technique is not constrained by availability of human analysts, but can benefit by human-onthe-loop assistance.</p><p>The approach discussed in the paper will offer unprecedented capability for observation of long-term, subtle system-wide activity by automatically constructing such global, long-term causality observa-tions. The ability to automatically classify causal chains of events in terms of abstractions such as activities, will provide operators with a unique capability to orient to long-term, system-wide evidence of possible threats. The diagnostic engine will provide a unique capability to identify whether groups of such activities likely represent active threats, making it easier for operators to decide whether long-term threats are active, and where they originate, even before those threats are identified by other means. Thus, the approach will pave the way for the first automated, longhorizon, continuously operating system-wide support for an effective defender Observe, Orient, Decide, and Act (OODA) loop.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">Running Example</head><p>The methods proposed in this article are illustrated on a realistic running example. The attackers in this example use sophisticated and recently discovered exploits to gain access to the victim's resources. The attack is remote and does not require social engineering or opening a malicious email attachment. The methods that we propose, however, are not limited to this class of attacks. What follows is a detailed chronology of the events:</p><p>1. The attacker uses the APACHE httpd server, a cgi-bin script, and the SHELLSHOCK vulnerability (GNU bash exploit registered in the Common Vulnerabilities and Exposures database as CVE 2014-6271 (see https://nvd.nist. gov/) to gain remote shell access to the victim's front-end. It is now possible for the attacker to execute processes on the front-end as the nonprivileged user www-data.</p><p>2. The attacker notices that the front-end is running an unpatched UBUNTU LINUX OS version 13.1. The attacker uses the nc Linux utility to copy an exploit for obtaining root privileges. The particular exploit that the attacker uses utilizes the x32 recvmmsg() kernel vulnerability registered in the Common Vulnerabilities and Exposures (CVE) database as CVE 2014-0038. After running the copied binary for a few minutes the attacker gains root access to the front-end host.</p><p>3. The attacker installs a root-kit utility that intercepts all input to ssh; 4. A system administrator uses the compromised ssh to connect to the back-end revealing his backend password to the attacker;</p><p>5. The attacker uses the compromised front-end to bypass firewalls and uses the newly acquired back-end administrator's password to access the back-end;</p><p>6. The attacker uses a file-tree traversing utility on the back-end that collects sensitive data and consolidates it in an archive file;</p><p>7. The attacker sends the archive file to a third-party hijacked computer for analysis.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">Auditing and Instrumentation</head><p>Almost all computing systems of sufficiently highlevel (with the exception of some embedded systems) leave detailed logs of all system and application activities. Many UNIX variants such as LINUX log via the syslog daemon, while WINDOWS TM uses the event log service. In addition to the usual logging mechanisms, there is a multitude of projects related to secure and detailed auditing. An audit log is more detailed trail of any security or computation-related activity such as file or RAM access, system calls, etc. Depending on the level of security we would like to provide, there are several methods for collecting input security-related information. On one extreme, it is possible to use the existing log files. On the other extreme there are applications for collecting detailed information about the application execution. One such approach <ref type="bibr" target="#b0">[1]</ref> runs the processes of interest through a debugger and logs every memory read and write access.</p><p>It is also possible to automatically inject logging calls in the source files before compiling them, allowing us to have static or dynamic logging or a combination of the two. Log and audit information can be signed, encrypted and sent in real-time to a remote server to make system tampering and activity-hiding more difficult. All these configuration decisions impose different trade-offs in security versus computational and RAM load <ref type="bibr" target="#b1">[2]</ref> and depend on the organizational context.  Figure <ref type="figure" target="#fig_2">2</ref> shows part of the logs collected for our running example. The first entry is when the attacker exploits the SHELLSHOCK vulnerability through a CGI script of the web server. The second entry shows syslog strace-like message resulting from the kernel escalation. Finally, the attacker uses the grep command on the back-end server to search for sensitive information and the call is recorded by the audit system.</p><p>It is often the case that the raw system and security log files are preprocessed and initial causal links are computed. If we trace the exec, fork, and join POSIX system calls, for example, it is possible to add graph-like structure to the log files computing provenance graphs. Another method for computing local causal links is to consider shared resources, e.g., two threads reading and writing the same memory address <ref type="bibr" target="#b0">[1]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Activity Classification</head><p>The Activity Classifier continuously annotates audit trails with semantic tags describing the higher-order activity they represent. For example, 'remote shell access', 'remote file overwrite', and 'intra-network data query' are possible activity tags. These tags are used by the APT Diagnostics Engine to enable higher-order reasoning about related activities, and to prioritize activities for possible investigation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1">Hierarchical semantic annotation of audit trails</head><p>A key challenge in abstracting low-level events into higher-order activity patterns that can be reasoned about efficiently is that such patterns can be described at multiple levels of semantic abstraction, all of which may be useful in threat analysis. Indeed, higher-order abstractions may be composed of lower-order abstractions that are in turn abstractions of low-level events. For example, a sequential set of logged events such as 'browser forking bash', 'bash initiating Netcat', and 'Netcat listening to new port', might be abstracted as the activity 'remote shell access'. The set of activities, 'remote shell access', and 'escalation of privilege' can be abstracted as the activity 'remote root shell access'. We approach activity annotation as a supervised learning problem that uses classification techniques to generate activity tags for audit trails. Table <ref type="table" target="#tab_0">1</ref> shows multiple levels of activity classifications for the above APT example.</p><p>Table <ref type="table" target="#tab_0">1</ref> represents one possible classificationenriched audit trail for such an APT. There can be many relatively small variations. For example, obscuring the password file could be done using other programs. A single classifier only allows for a single level of abstraction, and a single leap from low-level events to very abstract activities (for example, from 'bash execute perl' level to 'extracting modified file' level) will have higher error caused by these additional variations.</p><p>To obtain several layers of abstraction for reasoning over, and thus reduce overall error in classification, we use a multi-level learning strategy that models information at multiple levels of semantic abstraction using multiple classifiers. Each classifier solves the problem at one abstraction level, by mapping from a lower-level (fine) feature space to the next higher-level conceptual (coarse) feature space.</p><p>The activity classifier rely on both a vocabulary of activities and a library of patterns describing these activities that will be initially defined manually. This vocabulary and pattern set reside in a Knowledge Base.</p><p>In our training approach, results from training lower level classifiers are used as training data for higher level classifiers. In this way, we coherently train all classifiers by preventing higher-level classifiers from being trained with patterns that will never be generated by their lower-level precursors. We use an ensemble learning approach to achieve accurate classification. This involves stacking together both bagged and boosted models to reduce both variance and bias error components <ref type="bibr" target="#b2">[3]</ref>. The classification algorithm will be trained using an online-learning technique and integrated within an Active Learning Framework to improve classification of atypical behaviors.</p><p>Generating Training Data for Classification To build the initial classifier, training data is generated using two methods. First, an actual deployed system is used to collect normal behavior data, and a Subject Matter Expert manually labels it. Second, a testing platform is used to generate data in a controlled environment, particularly platform dependent vulnerability-related behavior. In addition, to generate new training data of previously unknown behavior, we use an Active Learning framework as described in Section 5. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5">Prioritizer</head><p>As the Activity Classifier annotates audit trails with activity descriptors, the two (parallel) next steps in our workflow are to 1) prioritize potential threats to be referred to the Diagnostic Engine (see Section 6) for investigation, and 2) prioritize emergent activities that (after suitable review and labeling) are added to the activity classifier training data. This module prioritizes activities by threat severity and confidence level. This prioritization process presents three key challenges.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.1">Threat-based rank-annotation of activities</head><p>One challenge in ranking activities according to their threat potential is the complex (and dynamic) notion of what constitutes a threat. Rankings based on matching to known prior threats is necessary, but not sufficient. An ideal ranking approach should take known threats into account, while also proactively considering the unknown threat potential of new kinds of activities.</p><p>Another such challenge is that risk may be assessed at various levels of activity abstraction, requiring that overall ranking must be computed by aggregating risk assessments at multiple abstraction levels. We implement two ranking approaches: a supervised ranker based on previously known threats and an unsupervised ranker that considers unknown potential threats.</p><p>Supervised ranking using APT classification to catch known threats. The goal of APT classification is to provide the diagnostic engine with critical APT related information such as APT Phase, severity of attack, and confidence level associated with APT tagging for threat prioritization. Since the audit trails are annotated hierarchically into different granularity of actions, multiple classifiers will be built to consider each hierarchical level separately. APT classifiers are used to identify entities that are likely to be instances of known threats or phases of an APT attack. Two types of classifiers are used. The first classifier is hand-coded and the second classifier is learned from training data.</p><p>The hand-coded classifier is designed to have high precision, using hand-coded rules, mirroring SIEM and IDS systems. Entities tagged by this classifier are given the highest priority for investigation. The second classifier, which is learned from training data, will provide higher recall at the cost of precision. Activities are ranked according to their threat level by aggregating a severity measure (determined by classified threat type) and a confidence measure. We complement the initial set of training data to calibrate our classifiers by using an Active Learning Framework, which focuses on improving the classification algorithm through occasional manual labeling of the most critical activities in the audit trails.</p><p>Unsupervised ranking using normalcy characterization to catch unknown threats. The second component of the prioritizer is a set of unsupervised normalcy rankers, which rank entities based on their statistical "normalcy". Activities identified as unusual will be fed to the Active Learning framework to check if any of them are "unknown" APT activities. This provides a mechanism for detecting "unknown" threats while also providing feedback to improve the APT classifier.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.2">Combining Multiple Rankings</head><p>One of the key issues with combining the outputs of multiple risk ranking is dealing with two-dimensional risk (severity, confidence) scores that may be on very different scales. A diverse set of score normalization techniques have been proposed <ref type="bibr">[4; 5; 6]</ref> to deal with this issue, but no single technique has been found to be superior over all the others. An alternative to combining scores is to combine rankings <ref type="bibr" target="#b6">[7]</ref>. Although converting scores to rankings does lose information, it remains an open question if the loss in information is compensated for by the convenience of working with the common scale of rankings.</p><p>We will develop combination techniques for weighted risk rankings based on probabilistic rank aggregation methods. This approach builds on our own work <ref type="bibr" target="#b7">[8]</ref> that shows the robustness of the weighted ranking approach. We also build on principled methods for combining ranking data found in the statistics and information retrieval literature.</p><p>Traditionally, the goal of rank aggregation [9; 10] is to combine a set of rankings of the same candidates into a single consensus ranking that is "better" than the individual rankings. We extend the traditional approach to accommodate the specific context of weighted risk ranking. First, unreliable rankers will be identified and either ignored or down-weighted, lest their rankings decrease the quality of the overall consensus <ref type="bibr">[7; 10]</ref>. Second, we will discount excessive correlation among rankers, so that a set of highly redundant rankers do not completely outweigh the contribution of other alternative rankings. To address these two issues, we will associate a probabilistic latent variable Z i with the i'th entity of interest, which indicates whether the entity is anomalous or normal. Then, we will build a probabilistic model that allows us to infer the posterior distribution over the Z i based on the observed rankings produced by each of the input weighted risk rankings. This posterior probability of Z i being normal will then be used as the weighted risk rank. Our model will make the following assumptions to account for both unreliable and correlated rankers: 1) Anomalies are ranked lower than all normal instances and these ranks tend to be concentrated near the lower rankings of the provided weighted risk rankings, and 2) Normal data instances tend to be uniformly distributed near the higher rankings of the weighted risk rankings.</p><p>There are various ways to build a probabilistic model that reflects the above assumptions and allows for the inference of the Z i variables through Expectation-Maximization <ref type="bibr" target="#b10">[11]</ref>. In addition to these assumptions, we will explore allowing other factors to influence the latent Z i variables, such as features of the entities as well as feedback provided by an expert analysts.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6">Diagnosis</head><p>We view the problem of detecting, isolating, and explaining complex APT campaigns behavior from rich activity data is a diagnostic problem. We will use an AI-based diagnostic reasoning to guide the global search for possible vulnerabilities that enabled the breach. Model-based diagnosis (MBD) <ref type="bibr" target="#b11">[12]</ref> is a particularly compelling approach as it supports reasoning over complex causal networks (for example, having multiple conjunctions, disjunctions, and negations) and identifies often subtle combinations of root causes of the symptoms (the breach).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6.1">An MBD approach for APT detection and isolation: Motivation</head><p>Attack detection and isolation are two distinct challenges. Often diagnostic approaches use separate models for detection and isolation <ref type="bibr" target="#b12">[13]</ref>. MBD however uses a single model, to combine these two reasonings. The security model contains both part of the security policy (that communicating with certain blacklisted hosts may indicate an information leak) and information about the possible locations and consequence of a vulnerability (a privilege escalation may lead to an information leak). The security model also contains abstract security constraints such as if a process requires authentication, a password must be read and compared against.</p><p>The diagnostic approach takes into consideration the bootstrapping of an APT which we consider the root-cause of the attack. What enables a successful APT is either a combination of software component vulnerabilities or the combined use of social engineering and insufficiency of the organizational security policies. We use MBD for computing the set of simultaneously exploited vulnerabilities that allowed the deployment of the APT. Computing such explanations is possible because MBD reasons in terms of multiplefaults <ref type="bibr" target="#b13">[14]</ref>. In our running example this set would include both the fact the the web server has been exploited due to the Shellshock vulnerability and that a the attacker gained privileged access on the front-end due to the use of the X64_32 escalation vulnerability.</p><p>The abstract security model is used to gather information about types of attacks the system is vulnerable to, and to aid deciding the set of actions required to stop an APT campaign (policy enforcement). Various heuristics exist to find the set of meaningful diagnosis candidates. As an example, one might be interested in the minimal set of actions to stop the attack <ref type="bibr">[15; 16]</ref> or select those candidates that capture significant probability mass <ref type="bibr" target="#b16">[17]</ref>. In the rest of this section, for illustration purposes, we use minimality as the heuristic of interest. MBD is the right tool for dealing with computation of diagnosis candidates as it offers several ways to address the modeling and computational complexity <ref type="bibr">[18; 19]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6.2">Detection and Isolation of Attacks from Abstract Security Model and Sensor Data</head><p>The abstract security model provides an abstraction mechanism that is originally missing in the audit trails.</p><p>More precisely what is not in the audit trails and what is in the security model is how to connect (possibly disconnected) activities for the purpose of global reasoning. The abstract security model and the sensor data collected from the audit trails are provided as inputs to an MBD algorithms that performs the highlevel reasoning about possible vulnerabilities and attacks similar to what a human security officer would do.</p><p>The information in the "raw" audit trails is of too high fidelity <ref type="bibr" target="#b1">[2]</ref> and low abstraction to be used by a "crude" security model. That is the reason the diagnostic engine needs the machine learning module to temporally and spatially group nodes in the audit trails and to provide semantically rich variable/value sensor data about actions, suitable for MBD. Notice that in this process, the audit trail structure is translated to semantic categories, i.e., the diagnostic engine receives as observations time-series of sensed actions.</p><p>The listing that follows next shows an abstract security model for the running example in the LYDIA language <ref type="bibr" target="#b19">[20]</ref>. This bears some resemblance to PRO-LOG, except that LYDIA is a language for modelbased diagnosis of logical circuits while PROLOG is for Horn-style reasoning. The use of LYDIA is for illustration purposes only, in reality computer systems can be much more easily modeled as state machines. There is a significant body of literature dealing with diagnosis of discrete-event systems <ref type="bibr">[21; 22; 23]</ref>, to name just a few. LYDIA translates the model to an internal propositional logic formula. Part of this internal representation is shown in figure <ref type="figure" target="#fig_4">3</ref>, which uses the standard VLSI <ref type="bibr" target="#b23">[24]</ref> notation to denote AND-gates, OR-gates, and NOT-gates. Wires are labeled with variable names. Boolean circuits (matching propositional logic), however, have limited expressiveness and modeling secu-  rity constraints in it is notoriously difficult, hence we plan to create or use specialized modal logic similar to the one proposed in <ref type="bibr" target="#b24">[25]</ref>.</p><p>Notice that the format of the Boolean circuit shown in figure <ref type="figure" target="#fig_4">3</ref> is very close to the one used in Truth Maintenance System (TMS) <ref type="bibr" target="#b25">[26]</ref>. The only assumable variable in figure <ref type="figure" target="#fig_4">3</ref> is buffer_overflow_vuln and its default value is false (i.e., there is no buffer overflow vulnerability in the web server process).</p><p>We next show how a reasoning engine can discover a conflict through forward and backward propagation. Looking at figure <ref type="figure" target="#fig_4">3</ref>, it is clear that r must be true because it is an input to an AND-gate whose output is set to true. Therefore either p or q (or both) must be true. This means that either buffer_overflow_vuln or leak_pw must be false. If we say that leak_pw is assumed to be true (measured or otherwise inferred), then leak_pw and buffer_overflow_vuln are together part of a conflict. It means that the reasoning engine has to change one of them to resolve the contradiction.</p><p>Based on the observation from our running example and a TMS constructed from the security model shown in figure <ref type="figure" target="#fig_4">3</ref>, the hitting set algorithm computes two possible diagnostic hypotheses: (1) the attacker gained a shell access through a web-server vulnerability and the attacker performed privilege escalation or (2) the attacker injected binary code through a buffer overflow and the attacker performed privilege escalation.</p><p>If we use LYDIA to compute the set of diagnoses for the running example, we get the following two (ambiguous) diagnoses for the root-cause of the penetration:</p><p>$ lydia example.lm example.obs d1 = { fe.escalation_vuln, fe.httpd_shell_vuln } d2 = { fe.buffer_overflow_vuln, fe.escalation_vuln } MBD uses probabilities to computes a sequence of possible diagnoses ordered by likelihood. This probability can be used for many purposes: decide which diagnosis is more likely to be the true fault explanation, whether there is the need for consider further evidence from the logs or limit the number of diagnoses that need to be identified. Many policies exist to compute these probabilities <ref type="bibr">[27; 28]</ref>.</p><p>For illustration purposes we consider that the diag-noses for the running example are ambiguous. Before we discuss methods for dealing with this ambiguity, we address the major research challenge of model generation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6.3">Model Generation</head><p>The abstract vulnerability model can either be constructed manually or semi-automatically. The challenge with modeling is that an APT campaign generally exploits unknown vulnerabilities. Therefore, our approach to address this issue is to construct the model which captures expected behavior (known goods) of the system. Starting from generic parameterized vulnerability models and security objectives, the abstract vulnerability model can be extended with information related to known vulnerabilities (known bads).</p><p>Generating the model can be done either manually or semi-automatically. We will explore venues to generate this model manually, which requires significant knowledge about potential security vulnerabilities, while being error prone and not detailed enough. Amongst company specific requirements, we envisage the abstract model to capture the most common attacks that target software systems, as described in the Common Attack Pattern Enumeration and Classification (CAPEC 1 ). The comprehensive list of known attacks has been designed to better understand the perspective of an attacker exploiting the vulnerabilities and, from this knowledge, devise appropriate defenses.</p><p>As modeling is challenging, we propose to explore semi-automatic approaches to construct models. The semi-automatic method is suitable to addressing the modeling because in security, similarly to diagnosis, there is (1) component models and (2) structure. While it is difficult to automate the building of component models (this may even require natural language parsing of databases such as CAPEC), it is feasible to capture diagnosis-oriented information from structure (physical networking or network communication).</p><p>Yet another approach to semi-automatically generate the model is to learn it from executions of the system (e.g., during regression testing, just before deployment). This approach to system modeling is inspired by the work in automatic software debugging work <ref type="bibr" target="#b28">[29]</ref>, where modeling of program behavior is done in terms of abstraction of program traces -known as spectra <ref type="bibr" target="#b29">[30]</ref>, abstracting from modeling specific components and data dependencies</p><p>The outlined approaches to construct the abstract vulnerability model entail different costs and diagnostic accuracies. As expected, manually building the model is the most expensive one. Note that building the model is a time-consuming and error-prone task. The two semi-automatic ways also entail different costs: one exploits the available, static information and the other requires the system to be executed to compute a meaningful set of executions. We will investigate the trade-offs between modeling approaches 1 http://capec.mitre.org/ and their diagnostic accuracy in the context of transparent computing.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="7">Conclusions</head><p>Identifying the root-cause and perform damage impact assessment of advanced persistent threats can be framed as a diagnostic problem. In this paper, we discuss an approach that leverages machine learning and model-based diagnosis techniques to reason about potential attacks.</p><p>Our approach classifies audit trails into high-level activities, and then reasons about those activities and their threat potential in real-time and forensically. By using the outcome of this reasoning to explain complex evi-dence of malicious behavior, the system administrators is provided with the proper tools to promptly react to, stop, and mitigate attacks.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: Network topology for the attack The network topology used for our running example is shown in figure 1. The attack is executed over several days. It starts by (1) compromising the web server front-end, followed by (2) a reconnaissance phase and (3) compromising the data storage back end and ultimately extracting and modifying sensitive information belonging to the victim. Both the front-end and the back end in this example run unpatched UBUNTU 13.1 LINUX OS on an IN-TEL R SANDY BRIDGE TM architecture.What follows is a detailed chronology of the events:</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head></head><label></label><figDesc>:11.239.64.213 -[22/Apr/2014 06:30:24 +0200] "GET /cgi-bin/test.cgi HTTP/1.1" 401 381 . . . front_end.rsyslogd.log:recvmsg(3, msg_name(0) = NULL, msg_iov(1) = ["29/Apr/2014 22:15:49 ...", 8096], msg_controllen = 0, msg_flags = MSG_CTRUNC, MSG_DONTWAIT) = 29 . . . back_end:auditctl:type = SYSCALL msg = audit(1310392408.506:36): arch = c000003e syscall = 2 success = yes exit = 3 a0 = 7fff2ce9471d a1 = 0 a2 = 61f768 a3 = 7fff2ce92a20 items = 1 ppid = 20478 pid = 21013 auid = 1000 uid = 0 gid = 0 euid = 0 suid = 0 fsuid = 0 egid = 0 sgid = 0 fsgid = 0 ses = 1 comm = "grep" exe = "/bin/grep" . . .</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>Figure 2 :</head><label>2</label><figDesc>Figure 2: Part of log files related to the attack from the running example</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_4"><head>Figure 3 :</head><label>3</label><figDesc>Figure 3: Part of the abstract security model for the running example</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1 :</head><label>1</label><figDesc>Sample classification problem for running example</figDesc><table><row><cell>Activity 1 Remote Shell Access Shell Shock</cell><cell>Activity 2 Remote File Overwrite Trojan Installation</cell><cell>Activity 3 Modified File Download Password Exfiltration</cell></row><row><cell>Browser (Port 80) fork bash bash fork Netcat Netcat listen to port 8080</cell><cell>Netcat listen to Port 8443 Port 8443 receive binary file binary file overwrites libns.so</cell><cell>Netcat listen to Port 8443 Port 8443 fork bash bash execute perl Perl overwrite /tmp/stolen_pw Port 8443 send /tmp/stolen_pw</cell></row></table></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" xml:id="foot_0">Proceedings of the 26 th International Workshop on Principles of Diagnosis</note>
		</body>
		<back>
			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">High accuracy attack provenance via binarybased execution partition</title>
		<author>
			<persName><forename type="first">Hyung</forename><surname>Kyu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Xiangyu</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Dongyan</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><surname>Xu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 20th Annual Network and Distributed System Security Symposium</title>
				<meeting>the 20th Annual Network and Distributed System Security Symposium<address><addrLine>San Diego, CA</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2013-02">February 2013</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">LogGC: Garbage collecting audit log</title>
		<author>
			<persName><forename type="first">Hyung</forename><surname>Kyu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Xiangyu</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Dongyan</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><surname>Xu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2013 ACM SIGSAC Conference on Computer and Communications Security</title>
				<meeting>the 2013 ACM SIGSAC Conference on Computer and Communications Security<address><addrLine>Berlin, Germany</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2013">2013</date>
			<biblScope unit="page" from="1005" to="1016" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">A review on ensembles for the class imbalance problem: Bagging-, boosting-, and hybrid-based approaches</title>
		<author>
			<persName><forename type="first">M</forename><surname>Galar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Fernández</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Barrenechea</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Bustince</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Herrera</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">IEEE Transactions onSystems, Man, and Cybernetics, Part C: Applications and Reviews</title>
		<imprint>
			<biblScope unit="volume">42</biblScope>
			<biblScope unit="issue">4</biblScope>
			<biblScope unit="page" from="463" to="484" />
			<date type="published" when="2012-07">July 2012</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Outlier ensembles: Position paper</title>
		<author>
			<persName><forename type="first">C</forename><surname>Charu</surname></persName>
		</author>
		<author>
			<persName><surname>Aggarwal</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">ACM SIGKDD Explorations Newsletter</title>
		<imprint>
			<biblScope unit="volume">14</biblScope>
			<biblScope unit="issue">2</biblScope>
			<biblScope unit="page" from="49" to="58" />
			<date type="published" when="2013">2013</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Converting output scores from outlier detection algorithms into probability estimates</title>
		<author>
			<persName><forename type="first">Jing</forename><surname>Gao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Pang-Ning</forename><surname>Tan</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Sixth International Conference on Data Mining</title>
				<meeting>the Sixth International Conference on Data Mining</meeting>
		<imprint>
			<publisher>IEEE</publisher>
			<date type="published" when="2006-12">December 2006</date>
			<biblScope unit="page" from="212" to="221" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Interpreting and unifying outlier scores</title>
		<author>
			<persName><forename type="first">Hans-Peter</forename><surname>Kriegel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Peer</forename><surname>Kröger</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Erich</forename><surname>Schubert</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Arthur</forename><surname>Zimek</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Eleventh SIAM International Conference on Data Mining</title>
				<meeting>the Eleventh SIAM International Conference on Data Mining</meeting>
		<imprint>
			<date type="published" when="2011-04">April 2011</date>
			<biblScope unit="page" from="13" to="24" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">On evaluation of outlier rankings and outlier scores</title>
		<author>
			<persName><forename type="first">Erich</forename><surname>Schubert</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Remigius</forename><surname>Wojdanowski</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Arthur</forename><surname>Zimek</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Hans-Peter</forename><surname>Kriegel</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Twelfth SIAM International Conference on Data Mining</title>
				<meeting>the Twelfth SIAM International Conference on Data Mining</meeting>
		<imprint>
			<date type="published" when="2012-04">April 2012</date>
			<biblScope unit="page" from="1047" to="1058" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Multi-source fusion for anomaly detection: using across-domain and across-time peer-group consistency checks</title>
		<author>
			<persName><forename type="first">Hoda</forename><surname>Eldardiry</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Kumar</forename><surname>Sricharan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Juan</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">John</forename><surname>Hanley</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Bob</forename><surname>Price</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Oliver</forename><surname>Brdiczka</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Eugene</forename><surname>Bart</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Wireless Mobile Networks, Ubiquitous Computing, and Dependable Applications (JoWUA)</title>
		<imprint>
			<biblScope unit="volume">5</biblScope>
			<biblScope unit="issue">2</biblScope>
			<biblScope unit="page" from="39" to="58" />
			<date type="published" when="2014">2014</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">An efficient boosting algorithm for combining preferences</title>
		<author>
			<persName><forename type="first">Yoav</forename><surname>Freund</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Raj</forename><forename type="middle">D</forename><surname>Iyer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Robert</forename><forename type="middle">E</forename><surname>Schapire</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Yoram</forename><surname>Singer</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Machine Learning Research</title>
		<imprint>
			<biblScope unit="volume">4</biblScope>
			<biblScope unit="page" from="933" to="969" />
			<date type="published" when="2003-11">Nov. 2003</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Bayesian aggregation of order-based rank data</title>
		<author>
			<persName><forename type="first">Ke</forename><surname>Deng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Simeng</forename><surname>Han</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Kate</forename><forename type="middle">J</forename><surname>Li</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jun S</forename><surname>Liu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of the American Statistical Association</title>
		<imprint>
			<biblScope unit="volume">109</biblScope>
			<biblScope unit="issue">507</biblScope>
			<biblScope unit="page" from="1023" to="1039" />
			<date type="published" when="2014">2014</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Maximum likelihood from incomplete data via the EM algorithm</title>
		<author>
			<persName><forename type="first">Nan</forename><forename type="middle">M</forename><surname>Arthur P Dempster</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Donald</forename><forename type="middle">B</forename><surname>Laird</surname></persName>
		</author>
		<author>
			<persName><surname>Rubin</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of the royal statistical society. Series B</title>
		<imprint>
			<biblScope unit="volume">39</biblScope>
			<biblScope unit="issue">1</biblScope>
			<biblScope unit="page" from="1" to="38" />
			<date type="published" when="1977">1977</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">One step lookahead is pretty good</title>
		<author>
			<persName><forename type="first">Johan</forename><surname>De Kleer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Olivier</forename><surname>Raiman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Mark</forename><surname>Shirley</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Readings in Model-Based Diagnosis</title>
				<meeting><address><addrLine>San Francisco, CA</addrLine></address></meeting>
		<imprint>
			<publisher>Morgan Kaufmann Publishers</publisher>
			<date type="published" when="1992">1992</date>
			<biblScope unit="page" from="138" to="142" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Empirical evaluation of diagnostic algorithm performance using a generic framework</title>
		<author>
			<persName><forename type="first">Alexander</forename><surname>Feldman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Tolga</forename><surname>Kurtoglu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Sriram</forename><surname>Narasimhan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Scott</forename><surname>Poll</surname></persName>
		</author>
		<author>
			<persName><forename type="first">David</forename><surname>Garcia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Johan</forename><surname>De Kleer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Lukas</forename><surname>Kuhn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Arjan</forename><surname>Van Gemund</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">ternational Journal of Prognostics and Health Management</title>
		<imprint>
			<biblScope unit="page" from="1" to="28" />
			<date type="published" when="2010">2010</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Diagnosing multiple faults</title>
		<author>
			<persName><forename type="first">Johan</forename><surname>De</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Kleer</forename></persName>
		</author>
		<author>
			<persName><forename type="first">Brian</forename><surname>Williams</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Artificial Intelligence</title>
		<imprint>
			<biblScope unit="volume">32</biblScope>
			<biblScope unit="issue">1</biblScope>
			<biblScope unit="page" from="97" to="130" />
			<date type="published" when="1987">1987</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">Automated generation and analysis of attack graphs</title>
		<author>
			<persName><forename type="first">Oleg</forename><surname>Sheyner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Joshua</forename><surname>Haines</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Somesh</forename><surname>Jha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Richard</forename><surname>Lippmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jeannette</forename><forename type="middle">M</forename><surname>Wing</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceeding of the 2002 IEEE Symposium on Security and Privacy</title>
				<meeting>eeding of the 2002 IEEE Symposium on Security and Privacy</meeting>
		<imprint>
			<publisher>IEEE</publisher>
			<date type="published" when="2002-05">May 2002</date>
			<biblScope unit="page" from="273" to="284" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Modeling and detection of complex attacks</title>
		<author>
			<persName><forename type="first">Ahmet</forename><surname>Seyit</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Bülent</forename><surname>Camtepe</surname></persName>
		</author>
		<author>
			<persName><surname>Yener</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceeding of the Third International Conference on Security and Privacy in Communications Networks</title>
				<meeting>eeding of the Third International Conference on Security and Privacy in Communications Networks</meeting>
		<imprint>
			<date type="published" when="2007-09">September 2007</date>
			<biblScope unit="page" from="234" to="243" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">A low-cost approximate minimal hitting set algorithm and its application to model-based diagnosis</title>
		<author>
			<persName><forename type="first">Rui</forename><surname>Abreu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Arjan Jc</forename><surname>Van Gemund</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Eighth Symposium on Abstraction, Reformulation and Approximation</title>
				<meeting>the Eighth Symposium on Abstraction, Reformulation and Approximation</meeting>
		<imprint>
			<date type="published" when="2009-07">July 2009</date>
			<biblScope unit="page" from="2" to="9" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Approximate model-based diagnosis using greedy stochastic search</title>
		<author>
			<persName><forename type="first">Alexander</forename><surname>Feldman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Gregory</forename><surname>Provan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Arjan</forename><surname>Van Gemund</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Artificial Intelligence Research</title>
		<imprint>
			<biblScope unit="volume">38</biblScope>
			<biblScope unit="page" from="371" to="413" />
			<date type="published" when="2010">2010</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">A distributed approach to diagnosis candidate generation</title>
		<author>
			<persName><forename type="first">Nuno</forename><surname>Cardoso</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Rui</forename><surname>Abreu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Progress in Artificial Intelligence</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2013">2013</date>
			<biblScope unit="page" from="175" to="186" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">All roads lead to fault diagnosis: Model-based reasoning with LY-DIA</title>
		<author>
			<persName><forename type="first">Alexander</forename><surname>Feldman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jurryt</forename><surname>Pietersma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Arjan</forename><surname>Van Gemund</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Eighteenth Belgium-Netherlands Conference on Artificial Intelligence (BNAIC&apos;06)</title>
				<meeting>the Eighteenth Belgium-Netherlands Conference on Artificial Intelligence (BNAIC&apos;06)<address><addrLine>Namur, Belgium</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2006-10">October 2006</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">Failure diagnosis using discreteevent models</title>
		<author>
			<persName><forename type="first">Meera</forename><surname>Sampath</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Raja</forename><surname>Sengupta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Stephane</forename><surname>Lafortune</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Kasim</forename><surname>Sinnamohideen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Demosthenis</forename><forename type="middle">C</forename><surname>Teneketzis</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Control Systems Technology</title>
		<imprint>
			<biblScope unit="volume">4</biblScope>
			<biblScope unit="issue">2</biblScope>
			<biblScope unit="page" from="105" to="124" />
			<date type="published" when="1996">1996</date>
		</imprint>
	</monogr>
	<note>IEEE Transactions on</note>
</biblStruct>

<biblStruct xml:id="b21">
	<analytic>
		<title level="a" type="main">Incremental diagnosis of discreteevent systems</title>
		<author>
			<persName><forename type="first">Alban</forename><surname>Grastien</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Marie-Odile</forename><surname>Cordier</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Christine</forename><surname>Largouët</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">DX</title>
		<imprint>
			<date type="published" when="2005">2005</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<monogr>
		<title level="m" type="main">Conflict-based diagnosis of discrete event systems: theory and practice</title>
		<author>
			<persName><forename type="first">Alban</forename><surname>Grastien</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Patrik</forename><surname>Haslum</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Sylvie</forename><surname>Thiébaux</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2012">2012</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<monogr>
		<author>
			<persName><forename type="first">Behrooz</forename><surname>Parhami</surname></persName>
		</author>
		<title level="m">Computer Arithmetic: Algorithms and Hardware Designs</title>
				<meeting><address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Oxford University Press, Inc</publisher>
			<date type="published" when="2009">2009</date>
		</imprint>
	</monogr>
	<note>2nd edition</note>
</biblStruct>

<biblStruct xml:id="b24">
	<analytic>
		<title level="a" type="main">A logic for reasoning about security</title>
		<author>
			<persName><forename type="first">Janice</forename><surname>Glasgow</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Glenn</forename><surname>Macewen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Prakash</forename><surname>Panangaden</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">ACM Transactions on Computer Systems</title>
		<imprint>
			<biblScope unit="volume">10</biblScope>
			<biblScope unit="issue">3</biblScope>
			<biblScope unit="page" from="226" to="264" />
			<date type="published" when="1992-08">August 1992</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b25">
	<monogr>
		<title level="m" type="main">Building Problem Solvers</title>
		<author>
			<persName><forename type="first">Kenneth</forename><surname>Forbus</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Johan</forename><surname>De Kleer</surname></persName>
		</author>
		<imprint>
			<date type="published" when="1993">1993</date>
			<publisher>MIT Press</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b26">
	<analytic>
		<title level="a" type="main">Diagnosing multiple persistent and intermittent faults</title>
		<author>
			<persName><forename type="first">Johan</forename><surname>De</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Kleer</forename></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceeding of the 2009 International Joint Conference on Artificial Intelligence</title>
				<meeting>eeding of the 2009 International Joint Conference on Artificial Intelligence</meeting>
		<imprint>
			<date type="published" when="2009-07">July 2009</date>
			<biblScope unit="page" from="733" to="738" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b27">
	<analytic>
		<title level="a" type="main">A new bayesian approach to multiple intermittent fault diagnosis</title>
		<author>
			<persName><forename type="first">Rui</forename><surname>Abreu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Peter</forename><surname>Zoeteweij</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Arjan</forename><forename type="middle">J C</forename><surname>Van Gemund</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceeding of the 2009 International Joint Conference on Artificial Intelligence</title>
				<meeting>eeding of the 2009 International Joint Conference on Artificial Intelligence</meeting>
		<imprint>
			<date type="published" when="2009-07">July 2009</date>
			<biblScope unit="page" from="653" to="658" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b28">
	<analytic>
		<title level="a" type="main">Spectrum-based multiple fault localization</title>
		<author>
			<persName><forename type="first">Rui</forename><surname>Abreu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Peter</forename><surname>Zoeteweij</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Arjan Jc</forename><surname>Van Gemund</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 24th IEEE/ACM International Conference on Automated Software Engineering</title>
				<meeting>the 24th IEEE/ACM International Conference on Automated Software Engineering</meeting>
		<imprint>
			<date type="published" when="2009-11">November 2009</date>
			<biblScope unit="page" from="88" to="99" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b29">
	<analytic>
		<title level="a" type="main">An empirical investigation of the relationship between spectra differences and regression faults</title>
		<author>
			<persName><forename type="first">Mary</forename><surname>Jean</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Harrold</forename></persName>
		</author>
		<author>
			<persName><forename type="first">Gregg</forename><surname>Rothermel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Kent</forename><surname>Sayre</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Rui</forename><surname>Wu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Liu</forename><surname>Yi</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Software Testing Verification and Reliability</title>
		<imprint>
			<biblScope unit="volume">10</biblScope>
			<biblScope unit="issue">3</biblScope>
			<biblScope unit="page" from="171" to="194" />
			<date type="published" when="2000">2000</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
