<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-title-group>
        <journal-title>August</journal-title>
      </journal-title-group>
    </journal-meta>
    <article-meta>
      <title-group>
        <article-title>Methods for Explainable Malware Analysis</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <string-name>Martin Homola</string-name>
          <email>homola@fmph.uniba.sk</email>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Peter Anthony</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Iveta Bečková</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Ján Kľuka</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Ján Mojžiš</string-name>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Peter Švec</string-name>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Štefan Balogh</string-name>
          <email>stefan.balogh@stuba.sk</email>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Franco Alberto Cardillo</string-name>
          <email>francoalberto.cardillo@ilc.cnr.it</email>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Franca Debole</string-name>
          <email>franca.debole@isti.cnr.it</email>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Umberto Straccia</string-name>
          <email>umberto.straccia@isti.cnr.it</email>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Martin Kenyeres</string-name>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Francesco Giannini</string-name>
          <email>francesco.giannini@sns.it</email>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Michelangelo Diligenti</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Marco Gori</string-name>
          <email>marco.gori@unisi.it</email>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Tomáš Bisták</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Daniel Trizna</string-name>
          <email>daniel.trizna@csirt.sk</email>
          <xref ref-type="aff" rid="aff0">0</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Zekeri Adams</string-name>
          <email>zekeri.adams@fmph.uniba.sk</email>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <aff id="aff0">
          <label>0</label>
          <institution>CSIRT.SK, Ministry of Investment, Regional Development and Informatization</institution>
          ,
          <addr-line>Pribinova 25, 81109 Bratislava</addr-line>
          ,
          <country country="SK">Slovakia</country>
        </aff>
        <aff id="aff1">
          <label>1</label>
          <institution>Comenius University in Bratislava</institution>
          ,
          <addr-line>Mlynská dolina, 84248 Bratislava</addr-line>
          ,
          <country country="SK">Slovakia</country>
        </aff>
        <aff id="aff2">
          <label>2</label>
          <institution>Department of Information Engineering and Mathematics, University of Siena</institution>
          ,
          <addr-line>Via Roma 56, 53100, Siena</addr-line>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff3">
          <label>3</label>
          <institution>Faculty of Sciences</institution>
          ,
          <addr-line>Scuola Normale Superiore, Piazza dei Cavalieri 7, 56126 Pisa</addr-line>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff4">
          <label>4</label>
          <institution>Institute of Informatics, Slovak Academy of Sciences</institution>
          ,
          <addr-line>Dúbravská cesta 9, 84507 Bratislava</addr-line>
          ,
          <country country="SK">Slovakia</country>
        </aff>
        <aff id="aff5">
          <label>5</label>
          <institution>Istituto di Linguistica Computazionale, CNR</institution>
          ,
          <addr-line>Via Giuseppe Moruzzi 1, 56127 Pisa</addr-line>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff6">
          <label>6</label>
          <institution>Istituto di Scienza e Tecnologie dell'Informazione, CNR</institution>
          ,
          <addr-line>Via Giuseppe Moruzzi 1, 56127 Pisa</addr-line>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff7">
          <label>7</label>
          <institution>Slovak Technical University</institution>
          ,
          <addr-line>Ilkovičova 3, 84104 Bratislava</addr-line>
          ,
          <country country="SK">Slovakia</country>
        </aff>
      </contrib-group>
      <pub-date>
        <year>2019</year>
      </pub-date>
      <volume>5</volume>
      <issue>2022</issue>
      <fpage>0000</fpage>
      <lpage>0003</lpage>
      <abstract>
        <p>The inevitable rise of machine learning in malware analysis puts forward the need for human-understandable explanations of the learned results. We point out how the ontological representation of malware data provides a suitable language for the construction of such explanations. We then focus on possible methods that enable producing such explanations and we reflect on our experience with them in the context of the EMBER dataset. Proceedings of the Joint Ontology Workshops (JOWO) - Episode XI: The Sicilian Summer under the Etna, co-located with the 15th ∗Corresponding author.</p>
      </abstract>
      <kwd-group>
        <kwd>Malware analysis</kwd>
        <kwd>explainable AI</kwd>
        <kwd>ontology</kwd>
        <kwd>EMBER dataset</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>1. Introduction</title>
      <p>The essential role of machine learning (ML) in malware analysis has now become unquestionable
[1, 2, 3, 4, 5, 6, 7, 8, 9]. But it has also become apparent [10, 11, 12] that such an instrument must be
implemented in a trustworthy and interpretable fashion – so as to empower malware analysts not
only with an eficient tool to classify samples into
malware and benign but also to provide suitable
justifications for such a classification. A key asset in this endeavour is the availability of suitable
datasets collecting suficient amounts of preclassified samples of quality data. In the malware domain,
this role has been predominantly filled by EMBER [ 13] and SoReL-20M [14] datasets collecting samples
extracted by static analysis of malware and benign PE files (i.e. Windows executable files). Building
on the assumption that good justifications require a well-established, shared language to be expressed
in, we have developed the PE Malware Ontology [15], which provides a suitable vocabulary for their
construction and captures essential domain knowledge that may potentially aid both classification and
justification tasks.
nEvelop-O</p>
      <p>CEUR
Workshop</p>
      <p>ISSN1613-0073</p>
      <p>The explanations/justifications themselves may be obtained in diverse ways, and they may have
diferent forms. We will focus predominantly on methods loosely falling under the umbrella of structured
machine learning (SML) [16], which enables to find justifications in the form of symbolic expressions
with two important properties – (i) they are, to a large extent, human readable and interpretable and
(ii) they may be used as symbolic classifiers, i.e. they may be evaluated as true or false w.r.t. each data
sample and possibly some background knowledge base  . In a general SML setting, we assume a formal
symbolic language ℒ where we abstract from the particular syntax and semantics – we only assume
that one can express equivalence (≡), a predicate being true for an individual ( () ), and entailment: an
expression being true in a knowledge base ( ⊧  ).</p>
      <p>Given a knowledge base  , the task of SML is to generate expressions  , which characterize the
input samples  =  + ∪  −, where  + are positive examples and  − are negative examples. Formally,
the goal is to find a target expression  , so that  ∪ { TARGET ≡ } ⊧ TARGET () for all  ∈  + and
 ∪ { TARGET ≡ } ⊧ ̸ TARGET () for all  ∈  −. Of course, such precise characterization across the
whole sample is only possible in theory. In practice we will expect it to “hold” to a certain extent
(hopefully as high as possible), and we will rely on standard information retrieval methodology to
charaterize the success in terms of accuracy, F1 measure, and false-positive rate.</p>
      <p>
        In this note, we summarize our experience with five diferent approaches that we have applied on
the EMBER dataset with the aim of obtaining suitable justifications for the malware classification task:
(
        <xref ref-type="bibr" rid="ref1">1</xref>
        ) post-hoc methods such as LIME, SHAP and Anchor, (
        <xref ref-type="bibr" rid="ref2">2</xref>
        ) decision tree learning, (
        <xref ref-type="bibr" rid="ref3">3</xref>
        ) description logics
concept learning, (
        <xref ref-type="bibr" rid="ref4">4</xref>
        ) knowledge graph embedding, and (
        <xref ref-type="bibr" rid="ref5">5</xref>
        ) logic explained networks. They may be
roughly classified into post-hoc (
        <xref ref-type="bibr" rid="ref1">1</xref>
        ) and intrinsic (
        <xref ref-type="bibr" rid="ref2 ref3 ref4 ref5">2–5</xref>
        ), while the latter all loosely fall under SML and
may be further divided into purely symbolic (
        <xref ref-type="bibr" rid="ref2 ref3">2–3</xref>
        ) and neuro-symbolic (
        <xref ref-type="bibr" rid="ref4 ref5">4–5</xref>
        ) methods.
      </p>
      <p>These methods vary largely in their design, character, computational efectiveness, and potential
types of output. It is not our goal in this note to conduct a rigorous comparison; instead, we summarize
our experience, highlight their strengths and limitations and focus on understanding and comparing
especially the types of output these methods may potentially provide. In the consecutive discussion
we focus on further questions open in light of our research, potential other methods to investigate in
this context, the question of the quality of the data and how it can be improved by dynamic malware
analysis, and finally also on the issues related to presentation of the obtained explanations to human
users.</p>
    </sec>
    <sec id="sec-2">
      <title>2. Data and Ontology</title>
      <p>The experiments described in the following sections have been performed on datasets derived from
EMBER [13], a benchmark dataset for training static malware detection models. EMBER contains
structured entries describing 1.1 million Windows Portable Executable files (PE files). We have only
used samples labelled as malware or benign (400,000 each) ignoring unlabelled samples.</p>
      <p>Each EMBER sample is described by a JSON object in terms of properties of the respective PE
ifle determined by static analysis (i.e., without actually running the sample). These include general
information (file size, presence of a digital signature, the numbers of imported and exported functions,
etc.), header information (the target architecture, linker version, various timestamps), descriptions of
the PE file’s sections (name, content type, access rights, entropy), lists of imported functions per DLL,
and the list of exported functions. There are multiple numerical properties, such as a byte histogram,
byte-entropy histogram, or simple statistics for various kinds of strings found in the file.</p>
      <sec id="sec-2-1">
        <title>2.1. PE Malware Ontology and Ontological Datasets</title>
        <p>We have designed the PE Malware Ontology [15] to semantically describe EMBER and similar data
sources on static malware analysis of PE file samples with focus on interpretability. This OWL 2 ontology
comprises 195 classes, 6 object properties, and 10 data properties. It is expressible in the light-weight
DLLitecore( ) logic underlying the OWL 2 QL profile. The core classes and properties are depicted in Fig. 1.</p>
        <p>FileFeature</p>
        <p>Action
has_file_
feature
has_action
ExecutableFile</p>
        <p>DynamicLink</p>
        <p>Library</p>
        <p>PEFile
exports_count: integer
imports_count: integer
mz_count: integer
symbols_count: integer
path_strings_count: integer
registry_strings_count: integer
url_strings_count: integer
has_
section</p>
        <p>Section
section_name: string
section_entropy: double
has_section_
feature
has_section_
flag</p>
        <p>SectionFeature</p>
        <p>SectionFlag
CodeSection</p>
        <p>InitializedDataSection</p>
        <p>UninitializedDataSection</p>
        <p>A sample is described as an instance of the central PEFile class and related to its constituent sections,
instances of the Section class. Files and sections are further classified by their type and described
by relationships to instances of the FileFeature, SectionFeature, and SectionFlags classes. The
former two subsume, respectively, 15 and 3 feature classes relevant to malware detection from the
domain experts’ point of view (e.g., entry point located in a non-executable section, presence of often
exploited kinds of sections, high section content entropy). The SectionFlags class subsumes classes of
access-control flags whose unusual combination may also indicate malware. The richest characterization
of the sample comes from relating it to Action instances, representing the actions that the sample can
perform. These abstract and standardize the lists of functions imported by the sample from system
DLLs. Each action is classified in one of 139 classes derived from the standard MAEC vocabulary of
malware actions [17]. Further abstraction is provided by higher-level action classes that partition the
lower-level classes into 17 categories (networking, access management, system manipulation, …).</p>
        <p>We have produced and published a collection of 31 ontological datasets combining the PE malware
ontology with transformed EMBER data, each with an equal proportion of malware and benign samples.
Aiming towards computationally intensive symbolic methods and  -fold cross-validation, the collection
contains datasets of 1,000, 10,000, and 100,000 samples (10 datasets per size), and the full dataset
describing all 800,000 labelled EMBER samples.</p>
      </sec>
      <sec id="sec-2-2">
        <title>2.2. Vectorized Dataset</title>
        <p>For experiments with ML methods requiring traditional datasets with numerical feature vectors, we
have created a dataset reflecting the binary nature of class membership and feature relationships
in the ontological dataset, as well as the inability of many ontology-based methods to work with
numeric values. The samples in this dataset are thus mapped to a 0/1-valued feature space. Its
dimensions correspond to selected concept expressions being true of the sample. These expressions
are either classes (e.g., the feature is_dll corresponds to DynamicLinkLibrary) or (chains of)
existential restrictions expressing the presence of particular features in the sample or one of its
sections (e.g., has_nonstandard_mz stands for ∃has_feature.NonstandardMZ; act_lf_execute_file for
∃has_action.ExecuteFile; and sect_rdata_has_CNT_INITIALIZED_DATA for ∃has_section.(∃name.
{"rdata"} ⊓ InitializedDataSection)). As the targeted methods can handle large amounts of data, a
single vectorized dataset based on all 800,000 labelled EMBER samples was produced.</p>
      </sec>
    </sec>
    <sec id="sec-3">
      <title>3. Explainable Malware Classification Methods</title>
      <sec id="sec-3-1">
        <title>3.1. Post-Hoc Methods</title>
        <p>Post-hoc explanation methods try to extract and unravel the rationale behind black-box models’ decisions
after training, hence the term post-hoc explainers. To apply a post-hoc explainer, a black-box model
is first trained to perform the classification. Then the post-hoc explainer is applied to extract the
explanations for their decisions, providing insights and enhancing trust in the system. Popular in
literature is the use of LIME (Local Interpretable Model-Agnostic Explanations) [18], SHAP (SHapley
Additive exPlanations) [19], and Anchor [20] explainers, due to their model-agnostic nature.</p>
        <p>LIME explains individual predictions by approximating the black-box model locally with a simple
interpretable model (e.g., linear regression). It perturbs a given instance to obtain modified inputs and
observe changes in model’s output to determine which features contribute most to the prediction.</p>
        <p>SHAP assigns each feature an importance value for a specific prediction using principles from
cooperative game theory. It considers all possible feature combinations to compute how much each
feature contributes on average, making it well-suited for capturing both local and global feature
importance.</p>
        <p>Anchor produces high-precision rule-based explanations by identifying conditions (anchors) that
are suficient to ”lock in” a model’s prediction. These rules are instance-specific but are designed to be
precise and easy to understand, typically in the form of IF–THEN logic.</p>
        <p>
          We explored the efectiveness of these explainers in the malware domain. We first employed the entire
800,000 labelled samples of the vectorized version of the EMBER dataset described in Section 2.2, split
into 60:20:20 as training, validation and test set. Training set was used to train a multi-layer perceptron
(MLP) with two hidden layers (512 and 256 units) using ReLU activation and dropout rates of 0.4 and
0.2 for regularization. The model was optimized using the Adam optimizer (learning rate = 0.001) with
binary cross-entropy as the loss function, and batch size of 100. Validation set was used to compute the
validation loss, which served for the early-stopping criterion. The model achieved F1-score of 91% and
false positive rate of 7% on the test set. Using the test set, we then extracted the explanations using SHAP
and Anchor. Figure 2 shows the resulting feature importance scores produced by SHAP, highlighting
has_debug, is_dll, and has_section_high_entropy as the top three influential features. Formula (
          <xref ref-type="bibr" rid="ref1">1</xref>
          ),
a sample rule obtained from Anchor, rewritten in DL syntax, shows that the simultaneous presence of
high-entropy sections, a write-execute memory region, and a file-writing behaviour indicates a strong
likelihood of malware. Similarly, Formula (
          <xref ref-type="bibr" rid="ref2">2</xref>
          ) shows that the absence of DLL characteristics, relocation
information, and high entropy in the UPX1 section—combined with the presence of a write-execute
section—indicates a potential malware instance.
        </p>
        <p>SHAP Feature Importance
has_debug</p>
        <p>is_dl
has_section_high_entropy
act_ph_al ocate_process_virtual_memory</p>
        <p>has_signature
act_ph_free_process_virtual_memory
has_urls_strings
act_lh_free_library
act_sph_release_critical_section</p>
        <p>has_nonstandard_mz
has_multiple_executable_sections</p>
        <p>act_rh_close_registry_key
act_sm_get_elapsed_system_up_time
act_ph_modify_process_virtual_memory_protection
act_adbg_check_for_remote_debugger
act_sm_get_windows_directory
has_write_execute_section</p>
        <p>has_paths_strings
act_sph_create_critical_section
sect_rdata_has_CNT_INITIALIZED_DATA</p>
        <p>0.00 0.02 0.04 0.06
mean(|SHAP value|) (average impact on model output magnitude)
(a)
has_debug</p>
        <p>is_dl
has_section_high_entropy
act_ph_al ocate_process_virtual_memory</p>
        <p>has_signature
act_ph_free_process_virtual_memory
has_urls_strings
act_lh_free_library
act_sph_release_critical_section</p>
        <p>has_nonstandard_mz
has_multiple_executable_sections</p>
        <p>act_rh_close_registry_key
act_sm_get_elapsed_system_up_time
act_ph_modify_process_virtual_memory_protection
act_adbg_check_for_remote_debugger
act_sm_get_windows_directory
has_write_execute_section</p>
        <p>has_paths_strings
act_sph_create_critical_section
sect_rdata_has_CNT_INITIALIZED_DATA</p>
        <p>SHAP Feature ImportanceHigh
trFvaaeeeuu
l
(b)</p>
        <p>0.2 0.0 0.2 Low
SHAP value (impact on model output)</p>
        <p>
          ∃has_section.∃has_section_feature.HighEntropy ⊓ ∃has_file_feature.WriteExecuteSection
⊓ ∃has_action.WriteToFile
¬DynamicLinkLibrary ⊓ ∃has_file_feature.Relocations
⊓ ∃has_section.(∃name.{"upx1"} ⊓ ∃has_section_feature.HighEntropy)
(
          <xref ref-type="bibr" rid="ref1">1</xref>
          )
(
          <xref ref-type="bibr" rid="ref2">2</xref>
          )
        </p>
        <p>Furthermore, we propose a hybrid explainability framework that extends SHAP and Anchor. While
SHAP and LIME provide first-order or instance-specific explanations, our method enhances SHAP to
provide second-order pairwise feature interactions and extends Anchor to extract symbolic decision rules
characterizing malware behaviour across samples. These global rules support not only interpretability
but also classification of previously unseen data. Figure 3 shows the result of the extended SHAP
framework on the trained MLP model. The plot shows the average change in the predicted probability
for a number of possible feature combinations. Positive values indicate a shift toward the model
predicting malware, while negative values indicate a shift toward benign classification.
has_section_high_entropy &amp; sect_text_write_execute_section = 11 (n=6)</p>
        <p>has_nonstandard_imports_count &amp; is_dll = 11 (n=22)
act_ph_modify_process_virtual_memory_protection &amp; has_nonstandard_section_name = 10 (n=22)</p>
        <p>has_debug &amp; is_dll = 11 (n=60)
has_paths_strings &amp; has_signature = 11 (n=24)
act_ph_allocate_process_virtual_memory &amp; act_ph_modify_process_virtual_memory_protection = 11 (n=43)
act_ph_modify_process_virtual_memory_protection &amp; act_sm_get_windows_directory = 11 (n=42)
act_ph_modify_process_virtual_memory_protection &amp; sect_idata_writable = 10 (n=22)
has_debug &amp; has_signature = 11 (n=60)
has_nonstandard_mz &amp; is_dll = 01 (n=21)
act_ph_modify_process_virtual_memory_protection &amp; sect_tls_writable = 10 (n=22)</p>
        <p>has_signature &amp; sect_rsrc_has_high_entropy = 11 (n=21)
act_ph_modify_process_virtual_memory_protection &amp; act_sph_create_critical_section = 10 (n=21)</p>
        <p>has_multiple_executable_sections &amp; sect_text_write_execute_section = 11 (n=21)
act_ph_modify_process_virtual_memory_protection &amp; act_sm_get_elapsed_system_up_time = 10 (n=21)</p>
        <p>Top 15 Strongest Interactions</p>
        <p>Towards Malware</p>
        <p>Towards Benign
0.2 Av0e.r1age Eff0e.c0t on Pre0d.1iction 0.2 0.3</p>
        <p>We applied the framework to the vectorized EMBER dataset, the extracted rules by the extended
Anchor retained 80% F1-score with 12% false positive rate. The resulting rules ofered actionable insights
and contributed a transparent reasoning layer over high-performing black-box models.</p>
        <p>In conclusion, while the application of the post-hoc methods enhances interpretability, it is important
to note that the fidelity of the generated explanations has been questioned in the literature [ 21, 22].</p>
      </sec>
      <sec id="sec-3-2">
        <title>3.2. Decision-Tree Learning</title>
        <p>
          Decision trees originating in classical machine learning can be used to construct a hierarchical tree
model of conditions composed of the root, nodes, and leaves. The most decisive condition of such a
tree is the condition located in the root of the tree. In our study [23], we trained a decision tree model
C4.5 on the vectorized dataset (cf. Sect. 2.2) in order to construct a hierarchy tree. The tree can be
used to extract rules as conjunctions of the conditions on the paths from the root to the leaves. The
leaves contain information on the number of samples classified by the respective path. The rules can be
extracted based on 1) the leaves with the highest proportion of successfully classified records or 2) the
shortest paths from the root to the leaves, resulting in the most compact rules. In [23], we demonstrated
the extracted rules based on the shortest paths for benign and malware samples. With the C4.5 model,
our own proprietary ESFS feature selection method and 200 features, we created a tree model which
classifies malware samples with accuracy of 91.61%, true positive rate of 92.3%, false positive rate of
9.1%, and macro F1 of 91.65%. Formulas (
          <xref ref-type="bibr" rid="ref3">3</xref>
          ) and (
          <xref ref-type="bibr" rid="ref4">4</xref>
          ) show examples of rules, rewritten in DL syntax, for
benign and malware samples respectively, which correspond to subtrees shown in Fig. 4.
¬DynamicLinkLibrary ⊓ ¬∃has_section.(∃name.{"pdata"} ⊓ InitializedDataSection)
⊓ ∃has_section.(∃name.{"didat"} ⊓ ∃has_section_flag.Writable)
⊓ ¬∃has_section_feature.WriteExecuteSection
¬DynamicLinkLibrary ⊓ ¬∃has_section.(∃name.{"pdata"} ⊓ InitializedDataSection)
⊓ ¬∃has_section.(∃name.{"didat"} ⊓ ∃has_section_flag.Writable)
⊓ ∃has_section.(∃name.{"coderpub"} ⊓ ∃has_section_flag.Readable)
(
          <xref ref-type="bibr" rid="ref3">3</xref>
          )
(
          <xref ref-type="bibr" rid="ref4">4</xref>
          )
(a)
(b)
        </p>
      </sec>
      <sec id="sec-3-3">
        <title>3.3. Concept Learning</title>
        <p>Concept learning [24] is a method that enables the learning of a class expression that matches a set
of examples in a knowledge base. That is, it is an SML method in which the learned expression is a
complex concept expression in a description logic [25]. Generally, concept learning algorithms search
through a space of all possible concepts using refinement operator [26], which for a given concept
returns a set of refined concepts and heuristics, that controls how the search space is traversed. In our
work, we investigated four main concept learning algorithms that are available in DL-Learner [27]
framework: OCEL [26], CELOE [28], PARCEL [29] and SPACEL [30].</p>
        <p>
          We performed multiple experiments using previously mentioned concept learning algorithms and
PE Malware ontology [15, 31]. In these experiments, we used datasets containing 1000 samples (of
which 500 were positive examples and 500 negative examples). For evaluation, we used k-fold cross
validation, with  = 5 . We achieved an F1 score of 74% and an FP rate of 22% for the OCEL algorithm.
This algorithm produces a single class expression that characterizes the whole dataset. The expression
of the class for OCEL can be seen in Formula (
          <xref ref-type="bibr" rid="ref5">5</xref>
          ). For PARCEL algorithm, which on the other hand
produces partial definitions (that are combined together using disjunction), we achieved F1 score of 77%
and FP rate of 15%. An example of a partial definition can be seen in Formula (
          <xref ref-type="bibr" rid="ref6">6</xref>
          ). Parallel algorithms
such as PARCEL or SPACEL usually produce significant amounts of shorter class expressions.
        </p>
        <p>ExecutableFile ⊓ ∃has_file_feature.(MultipleExecutableSections ⊔ NonstandardMZ)
⊓ ∃has_section.∃has_section_flag.Writable
⊓ ⩽1has_action.(AcceptSocketConnection ⊔ DirectoryHandling</p>
        <p>⊔ EnumerateThreads ⊔ GetProcessCurrentDirectory ⊔ OpenMutex)
⩾3has_section.(∃has_section_feature.NonstandardSectionName ⊓ ∃has_section_flag.Writable)</p>
        <p>
          In [32], we explored the possibility of improving the eficiency of concept learning by focusing on
individual malware families. Previous results showed that concept learning lags significantly behind
standard machine learning algorithms. Through these experiments, we concluded that concept learning
struggles to detect malware in general, as it involves a large search space. For this reason, we decided
to train models for individual malware families, since a set of malware samples from a specific family
should contain multiple common features (i.e., a smaller search space) compared to all malware samples
in the dataset. As part of these experiments, we trained the classifiers separately for five families and
combined the results using disjunction. We were able to obtain models with an F1 score of 91% and FP
(
          <xref ref-type="bibr" rid="ref5">5</xref>
          )
(
          <xref ref-type="bibr" rid="ref6">6</xref>
          )
rate of 0.24% for the OCEL algorithm, and an F1 score of 93% and an FP rate of 11% for the PARCEL
algorithm.
        </p>
      </sec>
      <sec id="sec-3-4">
        <title>3.4. Fuzzy Concept Learning</title>
        <p>One disadvantage of regular DLs is their inability to work on top of numeric data values. For example,
in the original dataset the imports count ≥ 0 is given for each sample and the entropy value 0–1 is given
for each section. To be able to include these important features in the concept learning process, we had
to pre-process the input data and generate a “crisp” (i.e. binary) derived file feature low_imports_count
and section feature high_entropy based on a suitable threshold. Fuzzy DL-learner [33, 34] is able to
learn fuzzy concept descriptions in ℰℒ (D), which also supports automatic learning of “fuzzy datatypes”
such as imports_count_veryLow or entropy_high based on actual numeric values in the dataset [35].
The final learned expressions are fuzzy classification rules  ⊑  TARGET with the meaning that the
learned fuzzy concept  classifies under the target concept TARGET with confidence 0 ≤  ≤ 1 . The
Fuzzy DL-Learner system implements various learning algorithms including fuzzy DL-FOIL [36, 37],
pFOIL-DL [38], Fuzzy OWL Boost [33] and fuzzy PN-OWL [34]. The latter, currently the most efective
one, uses a novel learning strategy, alternating a P-stage in which the algorithm tries to learn positive
fuzzy classification rules covering as much of the sample as possible with high precision, and a N-stage
in which the algorithm tries to learn fuzzy classification rules that rule out as many false positives
as possible that are covered by the rules learned in the P-stage. The rules are then combined via an
aggregation function.</p>
        <p>
          In the experiments both algorithms were compared on one of the EMBER_1k datasets (which was one
of the two largest datasets included in the study). PN-OWL outperformed fuzzy DL-FOIL by achieving
the F1 of 73.4% vs. 70.4%. One of the learned fuzzy DL classification rules can be seen in Formula (
          <xref ref-type="bibr" rid="ref7">7</xref>
          ).
        </p>
        <p>
          ExecutableFile ⊓ ∃has_action.CreateWindow ⊓ ∃imports_count.imports_count_veryLow
⊓ ∃urls_strings_count.urls_strings_count_low ⊑0.986 Malware
(
          <xref ref-type="bibr" rid="ref7">7</xref>
          )
Malware experts particularly evaluated the expressions featuring the learned fuzzy datatypes as useful
and insightful.
        </p>
      </sec>
      <sec id="sec-3-5">
        <title>3.5. KG Embedding</title>
        <p>Knowledge base embedding (KBE), particularly knowledge graph embedding (KGE), enables embedding
of structured semantic information from malware ontologies into continuous vector spaces, preserving
relationships among malware attributes and behaviours. In our study [39], we used the RotatE
embedding model [40], which maps entities and relations into vector space using rotational transformations,
allowing it to model complex relational patterns with high fidelity. We utilized one of the 1000-sample
datasets of the ontology built on the EMBER dataset as described in Section 2.1 for our experiment.
Triples were extracted from the ontology via SPARQL queries, and these were then embedded using the
PyKEEN library.</p>
        <p>We evaluated three KGE methods–TransE, BoxE, and RotatE–with RotatE model, trained with
128dimensional embeddings, performing best, with classification accuracy of 80% and 16% false positive
rate. Hence, it was selected for explainable rule generation. To extract interpretable rules from the
RotatE model, we first retrieved top-scoring triples associated with the ‘Malware’ class. A recursive
algorithm then generated conjunctions of these triples that showed high classification performance.
Finally, a disjunction of the top conjunctions—limited to six for complexity control—was selected as the
ifnal rule. This rule was expressed in DL syntax as shown in Formula 8, and evaluated on the test set
achieving accuracy of 76% with 22% false positive rate. The explanation generation approach employed
here is model-specific, as opposed to methods such as the fuzzy concept learning or decision trees
which are intrinsically explainable, or post-hoc methods such as LIME, SHAP and Anchor which are
model-agnostic.</p>
        <p>
          Malware ⊑ (¬∃has_file_feature.Debug ⊓ ∃has_action.ProcessHandling)
⊔ (ExecutableFile ⊓ ∃has_file_feature.MultipleExecutableSections)
(
          <xref ref-type="bibr" rid="ref8">8</xref>
          )
        </p>
      </sec>
      <sec id="sec-3-6">
        <title>3.6. Logic Explained Networks</title>
        <p>Motivated by the need to retain the high performance of deep learning models while ensuring
interpretability and explanation fidelity, we employ Logic Explained Networks (LENs) [ 21], which ofer
transparent decision-making without compromising predictive accuracy. Logic Explained Networks
(LENs) are interpretable-by-design neural architectures that provide human-understandable First-Order
Logic (FOL) explanations for their decisions, aiming to combine the high performance of black-box
deep learning models with interpretability. Unlike traditional interpretable models such as decision
trees, linear regression, or  -Nearest Neighbours, which often struggle with performance or scalability,
Logic Explained Networks (LENs) achieve high predictive performance while producing explanations in
the form of logical rules. In contrast to post-hoc explanation methods typically applied to deep learning
models, LENs are intrinsically interpretable, ensuring that the generated explanations are inherently
faithful to the model’s decision-making process.</p>
        <p>LENs require human-understandable predicates as inputs, such as tabular data or concepts extracted
from raw data. They determine the relevant subset of the input concepts that account for their decisions
through an ad-hoc pruning and regularization technique. Formally, a LEN  is a function, mapping
input concepts represented in [0, 1] to one or more output classes ( ≥ 1 ), simultaneously supporting
classification and explanations framed as first-order logic (FOL) rules built upon these concepts.</p>
        <p>
          In our study [41], we employed a tailored variant called Tailored-LENs, which introduces a
thresholdbased optimization strategy to refine the global explanations, thereby reducing false positives and
improving explanation fidelity. This method enables LENs to scale to large datasets while maintaining
readable and compact explanations, distinguishing them from both standard LENs and other
interpretable models. We applied Tailored-LENs to the vectorized EMBER dataset. The dataset was used with
varying numbers of selected features, with experiments run on 600,000 samples for training and 200,000
for testing using our proprietary feature selection pipeline. The LENsmodel achieved an accuracy of
92.32%, precision of 93.35%, and false positive rate of 6.52%. Generated local rules, such as an example
in DL syntax in Formula (
          <xref ref-type="bibr" rid="ref9">9</xref>
          ), were validated by domain experts as indicative of packed or unsigned
malware. Compared to other interpretable approaches, Tailored-LENs outperformed both concept
learning methods and decision trees, while achieving performance on par with state-of-the-art deep
learning models.
        </p>
        <p>
          ∃has_section.∃has_section_feature.HighEntropy ⊓ ∃has_feature.Signature
(
          <xref ref-type="bibr" rid="ref9">9</xref>
          )
        </p>
      </sec>
    </sec>
    <sec id="sec-4">
      <title>4. Comparison</title>
      <p>While the majority of machine learning research is focused on achieving the best possible performance
regarding the task being solved, in explainable AI it is necessary to also factor in the desirable properties
of explanation methods themselves, such as their applicability, eficiency or expressivity of the produced
explanations. Therefore, classifiers and explainers need to be evaluated with respect to each other. In
this section, we discuss our experience with applying mentioned methods on the EMBER dataset. A
concise summary of their properties can be found in Table 1.</p>
      <p>Malware Classification Accuracy. As we can observe, many of the methods achieved both accuracy
and F1 91-92% on par with what was showed earlier in the literature [42]. Less performing methods
included fuzzy concept learning and KG embedding, however here the experiments were limited to the
smallest 1k datasets, limiting the potential generalization power. The best FP rates were around 6–7%
which is not low but may be expected when working with real-world datasets. All in all a variety of
methods achieved good enough performance and the most suitable method may be selected among
these based on other criteria
Data Processing Eficiency. From the point of view of data eficiency, concept learning methods
were performing the worst; fuzzy concept learning further added complexity, only being able to process
thousands of inputs in a reasonable time. It may be useful to investigate what target expressivity of
DL concepts is necessary for the task and consecutively to explore reducing the expressivity in the
configuration of DL-Learner or even trying out specific systems designed for tractable DLs, such as
SPELL [43].</p>
      <p>Required Data Representation. We have developed an ontological representation for the datasets,
and thus it is natural to apply methods that can directly take knowledge-graph data in RDF format as
inputs. This has additional advantages in no data loss on the input (by required feature selection), potential
exploitation of ontological domain knowledge, and rich direct representation for the explanations. Not
all methods however support this and for some of them the inputs ned to be vectorized. As the authors
note [21], LENs require vectorized data, however, individual features must correspond to meaningful,
interpretable concepts. To certain extent, this is also true for all other methods with vectorized inputs –
to be able to construct the explanations. In our use case, this meaningful interpretation is derived from
the ontology.</p>
      <p>Expressivity of Explanations. From the expressivity point of view, post-hoc methods are the least
powerful. LIME and SHAP explain their predictions via importance scores. Anchor is better thanks to
explanations being rule-based, however, the “vanilla” version provides only local explanations. Our
extended version alleviates this problem by generating global rules. The downside of these three
methods is the fact that, as they are post-hoc, they may sufer from fidelity issues.</p>
      <p>Other used methods all provide rule-based explanations. The highest expressivity is ofered by
concept learning, depending on the system and particular algorithm used. The target expressivity of
the searched expressions may also be configured by the user of concept learning systems. One relevant
factor shows to be, whether disjunctions are supported or allowed. For example the fuzzy DL leaner does
not support it, which may be one of the reasons of relatively low achieved classification permanence.
The issue needs to be further investigated. However, its capability of learning fuzzy datatypes and
subsequently generating fuzzy rules is beneficial for human users. While KG embedding is capable of
generating rules with disjunctions. Rules from individual branches of decision trees are represented
as conjunctions, but it is possible to combine rules from multiple branches using disjunctions. LENs
generate local rules in the form of conjunctions and these are then combined into disjunctions, which
represent the global rules. However, the resulting global rules are often too complex. We tried to tackle
this problem in [41].</p>
      <p>Fidelity. The fidelity of an explanation is the measure of how faithful it is to the model that it is
explaining, in the sense that if the explanations were to be used for classification, how well would the
outputs match the original model’s outputs. Methods such as decision trees, LENs, and symbolic
approaches like concept learning are intrinsically interpretable, so their explanations should, theoretically,
have 100% fidelity. While for some methods (e.g., concept learning) it is true, some level of fidelity
can be lost in other methods due to the process of simplifying the explanations (for example in LENs),
or due to explanations being only partial (individual branches of decision trees). On the other hand,
post-hoc methods, especially the model-agnostic methods like SHAP, LIME and Anchor have lower
ifdelity as they use approximations of black-box models and, therefore, are not able to fully capture
their rationale.</p>
    </sec>
    <sec id="sec-5">
      <title>5. Discussion and Conclusions</title>
      <sec id="sec-5-1">
        <title>5.1. Summary</title>
        <p>We have focused on the problem of classification-based malware characterization under the assumption
that it must be complemented by some form of suitable justification. We have argued in favour of a
symbolic treatment of such justifications, rooted in a suitable ontological vocabulary for the domain.
To this end, we have reported about the proposed PE Malware Ontology and on our experience with
applying multiple approaches mainly on semantically treated EMBER data to achieve such
semanticallyjustified classification that could improve the overall utility and especially the trust of malware experts
in the classification results.</p>
        <p>While this note is not intended as a rigorous formal comparison, we were able to observe and point
out distinct properties of each approach. In summary, on the one hand, symbolic methods, most notably
concept learning, yield very expressive justifications with high fidelity, but they are computationally
very demanding, limiting the overall amount of data feasible to be processed. We were able to partially
mitigate this problem by breaking down the data based on malware families. In the future we plan
to try exploiting methods such as clustering to decrease the datasets with the goal to decrease the
necessary size of the input datasets. We also plan to apply newer, potentially more eficient concept
learning tools [43, 44] based on SAT solvers. Other approaches are computationally more eficient,
but the yielded justifications are less expressive and/or practical, or even somewhat experimental in
nature as in the case of KB embedding. Neural symbolic methods such as LENsare promising w.r.t. the
handled amounts of data – on the other hand, the expression extraction (as much as in the case of e.g.
decision-tree learning) does not take the ontology into account.</p>
      </sec>
      <sec id="sec-5-2">
        <title>5.2. Other Potential Methods</title>
        <p>Neural networks and deep learning have now gained undisputed prominence in machine learning
applications with many novel research streams that are worth exploring. Notably, a large efort is
currently dedicated to alleviating their black-box nature and enabling their applications even in
missioncritical areas such as malware analysis. This is already underscored by the result we were able to obtain
by LEN.</p>
        <p>Graph Neural Networks (GNNs) [45] are a promising method to apply directly over knowledge-graph
inputs as they eliminate the need for feature extraction and vectorization. Diferent approaches to
achieve explainability of GNNs were studied [46]. Among the most promising appears to be XGNNs
[47] which enables to identify sub-graph patterns relevant for certain decisions. Similarly MGNNs [48]
enables to extract explanations in the form of datalog rules.</p>
        <p>While capable of learning expressive ontology-based characterizations of malware samples, one of the
main drawbacks of symbolic methods, such as concept learning, lies in its inherently high computational
demand. This greatly limits the amount of data that these methods may possibly process. Here, a
promising approach could be in ontology-based concept recognition from neural networks’ activation
patterns [49]. This method would enable the use of one of the highly efective neural classifiers for
the malware classification task and recognition of the ontology concepts that are relevant for a given
decision. Such an approach would not reach the expressivity of complex concept descriptions outputted
by concept learners that can be used in classification rules; to some extent this can be addressed by
integration of concept learning into the process [50].</p>
        <p>Completely diferent type of explanations is provided by counterfactual examples [51, 52].
Counterfactual examples are local explanations in the form of what minimal change would need to be applied
to a certain input in order for the model’s output to be changed. This method ofers good flexibility; it
has been applied in intrinsic, hybrid, and even model-agnostic scenarios. Moreover, the contrastive
nature of the explanations could be beneficial for human users [ 53]. One of the most promising tools to
obtain counterfactual, contrastive, or even more general argumentative explanations are argumentation
frameworks [54].</p>
      </sec>
      <sec id="sec-5-3">
        <title>5.3. Quality of Data</title>
        <p>All the works above focused on EMBER dataset and thus on data derived from static malware analysis.
Powerful black-box classifiers are known to achieve accuracies as high as 92 % on this data [ 42]. This is
partly due to their high eficiency but partly also due to the inclusion of features with low interpretability,
e.g. byte histograms, which are unsuitable for inclusion in justifications and therefore we have excluded
them from the semantically-treated data [15].</p>
        <p>Also, the feedback we obtained from malware experts suggests that many of our learned justifications
are still rather simple and less insightful than they would hope for. Given that these concept expressions
do approximate best the available data, we conjecture that more detailed data could potentially lead to
more informative concept expressions.</p>
        <p>It is well known that dynamic malware analysis [55], which requires running the evaluated samples
in a protected environment and observing their behaviour, allows for more eficient malware detection,
exactly for this reason. Unfortunately, suitable datasets based on dynamic malware analysis are not yet
as readily available. A suitable ontological representation is part if our ongoing work.</p>
      </sec>
      <sec id="sec-5-4">
        <title>5.4. Usefulness to Human Users</title>
        <p>Throughout this work we have presented a number of examples of diferent types of explanations
we were able to obtain by the diverse methods. Some are feature importance values, but most of
them have the form of logical expressions (formulas) in some formal language. Their expressivity and
complexity vary, and the question is apparent – what type and size of explanations is actually the most
useful to the human users. Higher expressivity and complexity of these expressions may ofer better
classification power over the sample, and to some extent they may be more informative than overly
simple expressions. However they may be also more expensive to compute and some of the expressive
description logic constructs may be harder to comprehend. We were able to obtain some preliminary
feedback from domain experts as discussed above, but it is apparent that wider and more rigorous
studies are required to better understand these issues in order to steer the research on explanation
extraction methods and also to answer how the explanations are to be presented to the human users
(e.g. if they should be translated into natural language or otherwise accommodated).</p>
      </sec>
    </sec>
    <sec id="sec-6">
      <title>Acknowledgments</title>
      <p>Z. Adams, P. Anthony, Š. Balogh, I. Bečková, M. Homola, J. Kľuka, and P. Švec are funded by the
EU NextGenerationEU through the Recovery and Resilience Plan for Slovakia under the project No.
09I05-03-V02-00064. F. Giannini is funded by the Partnership Extended PE00000013 – “FAIR - Future
Artificial Intelligence Research” – Spoke 1 “Human-centered AI”, and ERC-2018-ADG G.A. 834756 “XAI:
Science and technology for the eXplanation of AI decision making”.</p>
    </sec>
    <sec id="sec-7">
      <title>Declaration on Generative AI</title>
      <p>The author(s) have not employed any Generative AI tools.
[18] M. T. Ribeiro, S. Singh, C. Guestrin, “Why should I trust you?”: Explaining the predictions of any
classifier, 2016.
[19] S. M. Lundberg, S.-I. Lee, A unified approach to interpreting model predictions, Advances in</p>
      <p>Neural Information Processing Systems 30 (2017) 4765–4774.
[20] M. T. Ribeiro, S. Singh, C. Guestrin, Anchors: High-precision model-agnostic explanations,</p>
      <p>Proceedings of the AAAI Conference on Artificial Intelligence 32 (2018).
[21] G. Ciravegna, P. Barbiero, F. Giannini, M. Gori, P. Liò, M. Maggini, S. Melacci, Logic explained
networks, Artificial Intelligence 314 (2023) 103822.
[22] C. Rudin, Stop explaining black box machine learning models for high stakes decisions and use
interpretable models instead, Nature Machine Intelligence 1 (2019) 206–215.
[23] J. Mojžiš, M. Kenyeres, Interpretable rules with a simplified data representation-a case study
with the ember dataset, in: Proceedings of the Computational Methods in Systems and Software,
Springer, 2023, pp. 1–10.
[24] J. Lehmann, P. Hitzler, Concept learning in description logics using refinement operators, Machine</p>
      <p>Learning 78 (2010) 203–250.
[25] F. Baader, I. Horrocks, C. Lutz, U. Sattler, An Introduction to Description Logic, Cambridge</p>
      <p>University Press, 2017.
[26] J. Lehmann, Learning OWL class expressions, volume 22, IOS Press, 2010.
[27] J. Lehmann, Dl-learner: learning concepts in description logics, The Journal of Machine Learning</p>
      <p>Research 10 (2009) 2639–2642.
[28] L. Bühmann, J. Lehmann, P. Westphal, S. Bin, Dl-learner structured machine learning on semantic
web data, in: Companion Proceedings of the The Web Conference 2018, 2018, pp. 467–471.
[29] A. C. Tran, J. Dietrich, H. W. Guesgen, S. Marsland, An approach to parallel class expression
learning, in: Rules on the Web: Research and Applications: 6th International Symposium, RuleML
2012, Montpellier, France, August 27-29, 2012. Proceedings 6, Springer, 2012, pp. 302–316.
[30] A. C. Tran, J. Dietrich, H. W. Guesgen, S. Marsl, Parallel symmetric class expression learning,</p>
      <p>Journal of Machine Learning Research 18 (2017) 1–34.
[31] T. Bisták, P. Švec, J. Kľuka, A. Šimko, Š. Balogh, M. Homola, Improving DL-Learner on a malware
detection use case., in: Description Logics, 2023.
[32] P. Švec, Ontologická reprezentácia pre bezpečnosť informačných systémov [Ontological
representation for security of information systems], Phd thesis, Slovak University of Technology in
Bratislava, Faculty of Electrical Engineering and Informatics, Bratislava, Slovakia, 2024. URL:
https://uim.fei.stuba.sk/wp-content/uploads/2024/10/2024.svec_.dizp_.pdf.
[33] F. A. Cardillo, U. Straccia, Fuzzy owl-boost: Learning fuzzy concept inclusions via real-valued
boosting, Fuzzy Sets Syst. 438 (2022) 164–186.
[34] F. A. Cardillo, F. Debole, U. Straccia, PN-OWL: A two-stage algorithm to learn fuzzy concept
inclusions from OWL 2 ontologies, Fuzzy Sets Syst. 490 (2024) 109048.
[35] I. Huitzil, U. Straccia, N. Díaz-Rodríguez, F. Bobillo, Datil: Learning fuzzy ontology datatypes, in:
Proceedings of the 17th International Conference on Information Processing and Management of
Uncertainty in Knowledge-Based Systems (IPMU 2018), Part II, volume 854 of Communications in
Computer and Information Science, Springer, 2018, pp. 100–112.
[36] F. A. Lisi, U. Straccia, A FOIL-like method for learning under incompleteness and vagueness, in:
23rd International Conference on Inductive Logic Programming, volume 8812 of LNAI, Springer
Verlag, Berlin, 2014, pp. 123–139.
[37] F. A. Lisi, U. Straccia, Learning in description logics with fuzzy concrete domains, Fundamenta</p>
      <p>Informaticae 140 (2015) 373–391.
[38] U. Straccia, M. Mucci, pFOIL-DL: Learning (fuzzy) ℰ ℒ concept descriptions from crisp owl data
using a probabilistic ensemble estimation, in: Proceedings of the 30th Annual ACM Symposium
on Applied Computing (SAC-15), ACM, Salamanca, Spain, 2015, pp. 345–352.
[39] D. Trizna, P. Anthony, M. Homola, Z. Adams, Š. Balogh, Learning explainable malware
characterization using knowledge base embedding, in: 2024 IEEE 5th International Conference on
Electro-Computing Technologies for Humanity (NIGERCON), IEEE, 2024, pp. 1–8.</p>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          [1]
          <string-name>
            <given-names>D.</given-names>
            <surname>Ucci</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Aniello</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Baldoni</surname>
          </string-name>
          ,
          <article-title>Survey of machine learning techniques for malware analysis</article-title>
          ,
          <source>Computers &amp; Security</source>
          <volume>81</volume>
          (
          <year>2019</year>
          )
          <fpage>123</fpage>
          -
          <lpage>147</lpage>
          . doi:
          <volume>10</volume>
          .1016/j.cose.
          <year>2018</year>
          .
          <volume>11</volume>
          .001.
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          [2]
          <string-name>
            <given-names>S.</given-names>
            <surname>Pramanik</surname>
          </string-name>
          ,
          <string-name>
            <given-names>H.</given-names>
            <surname>Teja</surname>
          </string-name>
          , EMBER
          <article-title>- Analysis of malware dataset using convolutional neural networks</article-title>
          ,
          <source>in: 2019 Third International Conference on Inventive Systems and Control (ICISC)</source>
          ,
          <year>2019</year>
          , pp.
          <fpage>286</fpage>
          -
          <lpage>291</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          [3]
          <string-name>
            <given-names>D.</given-names>
            <surname>Gibert</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Mateu</surname>
          </string-name>
          ,
          <string-name>
            <surname>J. Planes,</surname>
          </string-name>
          <article-title>The rise of machine learning for detection and classification of malware: Research developments, trends and challenges</article-title>
          ,
          <source>Journal of Network and Computer Applications</source>
          <volume>153</volume>
          (
          <year>2020</year>
          )
          <fpage>102526</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          [4]
          <string-name>
            <given-names>K.</given-names>
            <surname>Shaukat</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Luo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V.</given-names>
            <surname>Varadharajan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>I. A.</given-names>
            <surname>Hameed</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Xu</surname>
          </string-name>
          ,
          <article-title>A survey on machine learning techniques for cyber security in the last decade</article-title>
          ,
          <source>IEEE Access 8</source>
          (
          <year>2020</year>
          )
          <fpage>222310</fpage>
          -
          <lpage>222354</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref5">
        <mixed-citation>
          [5]
          <string-name>
            <given-names>Y.</given-names>
            <surname>Supriya</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G.</given-names>
            <surname>Kumar</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Sowjanya</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Yadav</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D. L.</given-names>
            <surname>Kameshwari</surname>
          </string-name>
          ,
          <article-title>Malware detection techniques: A survey</article-title>
          ,
          <source>in: 2020 Sixth International Conference on Parallel, Distributed and Grid Computing (PDGC)</source>
          ,
          <year>2020</year>
          , pp.
          <fpage>25</fpage>
          -
          <lpage>30</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref6">
        <mixed-citation>
          [6]
          <string-name>
            <given-names>J.</given-names>
            <surname>Singh</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Singh</surname>
          </string-name>
          ,
          <string-name>
            <surname>A survey</surname>
          </string-name>
          <article-title>on machine learning-based malware detection in executable files</article-title>
          ,
          <source>J. Syst. Archit</source>
          .
          <volume>112</volume>
          (
          <year>2021</year>
          )
          <fpage>101861</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref7">
        <mixed-citation>
          [7]
          <string-name>
            <given-names>E.</given-names>
            <surname>Raf</surname>
          </string-name>
          ,
          <string-name>
            <given-names>W.</given-names>
            <surname>Fleshman</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Zak</surname>
          </string-name>
          ,
          <string-name>
            <given-names>H. S.</given-names>
            <surname>Anderson</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B.</given-names>
            <surname>Filar</surname>
          </string-name>
          ,
          <string-name>
            <surname>M.</surname>
          </string-name>
          <article-title>McLean, Classifying sequences of extreme length with constant memory applied to malware detection</article-title>
          ,
          <source>in: Thirty-Fifth AAAI Conference on Artificial Intelligence</source>
          ,
          <source>AAAI 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, IAAI</source>
          <year>2021</year>
          ,
          <source>The Eleventh Symposium on Educational Advances in Artificial Intelligence, EAAI</source>
          <year>2021</year>
          ,
          <string-name>
            <given-names>Virtual</given-names>
            <surname>Event</surname>
          </string-name>
          ,
          <source>February 2-9</source>
          ,
          <year>2021</year>
          , AAAI Press,
          <year>2021</year>
          , pp.
          <fpage>9386</fpage>
          -
          <lpage>9394</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref8">
        <mixed-citation>
          [8]
          <string-name>
            <given-names>P.</given-names>
            <surname>Aggarwal</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S. F.</given-names>
            <surname>Ahamed</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Shetty</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L. J.</given-names>
            <surname>Freeman</surname>
          </string-name>
          ,
          <article-title>Selective targeted transfer learning for malware classification</article-title>
          ,
          <source>in: 2021 Third IEEE International Conference on Trust, Privacy and Security in Intelligent Systems and Applications (TPS-ISA)</source>
          ,
          <year>2021</year>
          , pp.
          <fpage>114</fpage>
          -
          <lpage>120</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref9">
        <mixed-citation>
          [9]
          <string-name>
            <given-names>U.</given-names>
            <surname>Tayyab</surname>
          </string-name>
          ,
          <string-name>
            <given-names>F. B.</given-names>
            <surname>Khan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M. H.</given-names>
            <surname>Durad</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Khan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>Y. S.</given-names>
            <surname>Lee</surname>
          </string-name>
          ,
          <article-title>A survey of the recent trends in deep learning based malware detection</article-title>
          ,
          <source>J. Cybersecur. Priv</source>
          .
          <volume>2</volume>
          (
          <year>2022</year>
          )
          <fpage>800</fpage>
          -
          <lpage>829</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref10">
        <mixed-citation>
          [10]
          <string-name>
            <given-names>A.</given-names>
            <surname>Mills</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T.</given-names>
            <surname>Spyridopoulos</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Legg</surname>
          </string-name>
          ,
          <article-title>Eficient and interpretable real-time malware detection using random-forest, in: 2019 International conference on cyber situational awareness, data analytics and assessment (</article-title>
          <source>Cyber SA)</source>
          ,
          <year>2019</year>
          , pp.
          <fpage>1</fpage>
          -
          <lpage>8</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref11">
        <mixed-citation>
          [11]
          <string-name>
            <given-names>B.</given-names>
            <surname>Marais</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T.</given-names>
            <surname>Quertier</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Chesneau</surname>
          </string-name>
          ,
          <article-title>Malware analysis with artificial intelligence and a particular attention on results interpretability</article-title>
          ,
          <source>in: Distributed Computing and Artificial Intelligence</source>
          , Volume
          <volume>1</volume>
          : 18th International Conference, DCAI 2021, Salamanca, Spain,
          <fpage>6</fpage>
          -8
          <source>October</source>
          <year>2021</year>
          , volume
          <volume>327</volume>
          <source>of LNNS</source>
          , Springer,
          <year>2021</year>
          , pp.
          <fpage>43</fpage>
          -
          <lpage>55</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref12">
        <mixed-citation>
          [12]
          <string-name>
            <given-names>J.</given-names>
            <surname>Dolejš</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Jureček</surname>
          </string-name>
          ,
          <article-title>Interpretability of machine learning-based results of malware detection using a set of rules</article-title>
          ,
          <source>in: Artificial Intelligence for Cybersecurity</source>
          , Springer International Publishing,
          <year>2022</year>
          , pp.
          <fpage>107</fpage>
          -
          <lpage>136</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref13">
        <mixed-citation>
          [13]
          <string-name>
            <given-names>H. S.</given-names>
            <surname>Anderson</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Roth</surname>
          </string-name>
          ,
          <article-title>EMBER: an open dataset for training static PE malware machine learning models</article-title>
          ,
          <year>2018</year>
          . arXiv:
          <year>1804</year>
          .04637.
        </mixed-citation>
      </ref>
      <ref id="ref14">
        <mixed-citation>
          [14]
          <string-name>
            <given-names>R.</given-names>
            <surname>Harang</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E. M.</given-names>
            <surname>Rudd</surname>
          </string-name>
          , SOREL-20M:
          <article-title>A large scale benchmark dataset for malicious PE detection</article-title>
          , arXiv preprint arXiv:
          <year>2012</year>
          .
          <volume>07634</volume>
          (
          <year>2020</year>
          ).
        </mixed-citation>
      </ref>
      <ref id="ref15">
        <mixed-citation>
          [15]
          <string-name>
            <given-names>P.</given-names>
            <surname>Švec</surname>
          </string-name>
          , Š. Balogh,
          <string-name>
            <given-names>M.</given-names>
            <surname>Homola</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Kľuka</surname>
          </string-name>
          , T. Bisták,
          <article-title>Semantic data representation for explainable Windows malware detection models</article-title>
          ,
          <source>arXiv preprint arXiv:2403.11669</source>
          (
          <year>2024</year>
          ).
        </mixed-citation>
      </ref>
      <ref id="ref16">
        <mixed-citation>
          [16]
          <string-name>
            <given-names>P.</given-names>
            <surname>Westphal</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Bühmann</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Bin</surname>
          </string-name>
          ,
          <string-name>
            <given-names>H.</given-names>
            <surname>Jabeen</surname>
          </string-name>
          , J. Lehmann,
          <article-title>SML-Bench - A benchmarking framework for structured machine learning</article-title>
          ,
          <source>Semantic Web</source>
          <volume>10</volume>
          (
          <year>2019</year>
          )
          <fpage>231</fpage>
          -
          <lpage>245</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref17">
        <mixed-citation>
          [17]
          <string-name>
            <given-names>MITRE</given-names>
            <surname>Corp</surname>
          </string-name>
          .,
          <source>Malware attribute enumeration and characterization</source>
          ,
          <year>2020</year>
          . URL: https://maecproject. github.io/, [Online; accessed 2022-
          <volume>05</volume>
          -15].
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>