<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta />
    <article-meta>
      <title-group>
        <article-title>Globally local and fast explanations of  -SNE-like nonlinear</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <string-name>Cyril de Bodt</string-name>
          <email>cyril.debodt@uclouvain.be</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Pierre Lambert</string-name>
          <email>pierre.h.lambert@uclouvain.be</email>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Rebecca Marion</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Julien Albert</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Emmanuel Jean</string-name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Sacha Corbugy</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>UNamur - NaDI/PReCISE</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>TRAIL</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Namur</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Belgium</string-name>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <aff id="aff0">
          <label>0</label>
          <institution>MIT Media Lab</institution>
          ,
          <addr-line>Cambridge [MA]</addr-line>
          ,
          <country country="US">USA</country>
        </aff>
        <aff id="aff1">
          <label>1</label>
          <institution>Multitel &amp; TRAIL</institution>
          ,
          <addr-line>Mons</addr-line>
          ,
          <country country="BE">Belgium</country>
        </aff>
        <aff id="aff2">
          <label>2</label>
          <institution>Stochastic Neighbor Embedding (SNE) [5] mitigates</institution>
        </aff>
        <aff id="aff3">
          <label>3</label>
          <institution>UCLouvain - ICTEAM &amp; TRAIL</institution>
          ,
          <addr-line>Louvain-la-Neuve</addr-line>
          ,
          <country country="BE">Belgium</country>
        </aff>
        <aff id="aff4">
          <label>4</label>
          <institution>Workshop Proce dings</institution>
        </aff>
        <aff id="aff5">
          <label>5</label>
          <institution>[24] L. Pagliosa</institution>
          ,
          <addr-line>P. Pagliosa, L. G. Nonato, Understand-</addr-line>
        </aff>
        <aff id="aff6">
          <label>6</label>
          <institution>[32] R. Marion, A. Bibal</institution>
          ,
          <addr-line>B. Frénay, Bir: A method for</addr-line>
        </aff>
        <aff id="aff7">
          <label>7</label>
          <institution>[33] B. Kang</institution>
          ,
          <addr-line>D. García García, J. Lijfijt, R. Santos-</addr-line>
        </aff>
      </contrib-group>
      <pub-date>
        <year>2002</year>
      </pub-date>
      <abstract>
        <p>Nonlinear dimensionality reduction (NLDR) algorithms such as  -SNE are often employed to visually analyze high-dimensional (HD) data sets in the form of low-dimensional (LD) embeddings. Unfortunately, the nonlinearity of the NLDR process prohibits the interpretation of the resulting embeddings in terms of the HD features. State-of-the-art studies propose post-hoc explanation approaches to locally explain the embeddings. However, such tools are typically slow and do not automatically cover the entire LD embedding, instead providing local explanations around one selected data point at a time. This prevents users from quickly gaining insights about the general explainability landscape of the embedding. This paper presents a globally local and fast explanation framework for NLDR embeddings. This framework is fast because it only requires the computation of sparse linear regression models on subsets of the data, without ever reapplying the NLDR algorithm itself. In addition, the framework is globally local in the sense that the entire LD embedding is automatically covered by multiple local explanations. The diferent interpretable structures in the embedding are directly characterized, making it possible to quantify the importance of the HD features in various regions of the LD embedding. An example use-case is examined, emphasizing the value of the presented framework. Public codes and a software are available at https://github.com/PierreLambert3/glocally_explained. dimensionality reduction, data visualization, interactivity, interpretability, explainability,  -SNE, data exploration Advances in Interpretable Machine Learning and Artificial Intelligence, ∗Corresponding author.</p>
      </abstract>
      <kwd-group>
        <kwd>Numerous other</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>1. Introduction</title>
      <p>Dimensionality
reduction
(DR)
computes
dimensional (LD) representations of high-dimensional
(HD) data, e.g., to visually explore them or to curb the
curse of dimensionality [1].</p>
      <p>
        The relevance of a DR
method for a given visualization task typically depends
on its preservation of the HD neighborhoods in the
resulting LD embedding [2]. Two major frameworks
have been proposed for projecting from HD to LD
coordinates [1]: one is based on preserving distances [3],
while the other is based on reproducing neighborhoods
[
        <xref ref-type="bibr" rid="ref9">4, 5</xref>
        ]. For instance, distance-preserving methods like
principal component analysis (PCA) [6] and classical
metric multidimensional scaling (MDS) [3] project HD
samples linearly; nonlinear variants of these methods
      </p>
      <p>https://github.com/PierreLambert3 (P. Lambert);
https://github.com/cdebodt (C. de Bodt)</p>
      <p>0000-0003-2347-1756 (C. de Bodt)
as the popular  -SNE [15], UMAP [16], multi-scale
perplexity-free approaches [17, 18, 19], etc.</p>
      <sec id="sec-1-1">
        <title>While these nonlinear DR (NLDR) algorithms deliver</title>
      </sec>
      <sec id="sec-1-2">
        <title>HD data, their intrinsic nonlinearity greatly afects the</title>
        <p>interpretability of the LD representations. Indeed, the
obtained LD dimensions are hardly or most often not
interpretable in terms of the HD features [20]. Since NLDR
methods are not interpretable by design, previous studies
have developed techniques to analyze and interpret the</p>
      </sec>
      <sec id="sec-1-3">
        <title>LD embeddings, which is known as post-hoc explanation</title>
        <p>
          or interpretability [21]. One can for instance cite [
          <xref ref-type="bibr" rid="ref15">22</xref>
          ],
(e.g., [7, 8]) aim to preserve weighted Euclidean or impressively faithful LD embeddings with respect to the
decision trees. On the other hand, [21] locally explain fast. The globally local nature of our approach refers to
 -SNE embeddings by adapting LIME; the authors argue the fact that multiple local explanations are automatically
that explaining the entire embedding at once would be computed over the entire LD embedding (i.e., globally).
dificult, as  -SNE usually does not preserve large HD Such an automatic processing enables the user to directly
distances well [20]. However, the local nature of  -SNE glimpse the overall explainability landscape of the
emmotivates the computation of local explanations in the LD bedding, as well as a structured overview of the impact of
embedding; LIME can then be revisited and performed the HD features in the various parts of the LD embedding.
locally around a user-selected data point. Nevertheless, The regions for which local explanations are learned in
such an approach has two main limitations: (1) it is slow, the LD embedding can be determined in diferent ways
and (2) it does not cover the entire LD embedding auto- [24]: using a clustering algorithm such as K-means, as
matically, as local explanations are only provided around in this work, thanks to a manual selection performed by
data points that have been selected, one at a time. This the user, or by recursively splitting the embedding into
approach is slow because, in order to explain a given data subcells along the LD dimensions based on a model error
point’s position in the embedding,  -SNE must be reap- criterion.
plied to many artificially simulated points around that Our fast and globally local explanation framework can
data point; the non-parametric nature of  -SNE, combined be viewed as taking the best of both linear and
nonlinwith its significant computational cost, greatly increases ear projection worlds: the LD embedding can indeed be
computation time, which decreases the potential for in- generated by a nonlinear DR algorithm, achieving much
teractivity. The second limitation of the method is that better DR quality in terms of data visualization thanks to
the user only receives a local explanation around the increased flexibility and adaptability [ 12, 15, 2]. On the
selected point in the embedding; she must thus explore other hand, the computed local explanations are linear
the various regions of the embedding manually. This is and sparse, which promotes interpretability. Moreover,
not realistic in practice, especially when working with the globally local explanations make it possible to readily
large databases, and even more so since the approach is depict the importance of the HD features in the diferent
not fast. regions of the LD embedding. As an experiment, an
exam
        </p>
        <p>This paper aims to address these limitations by devel- ple use-case on a public data set is presented,
highlightoping a fast and globally local explanation framework ing the usefulness of the proposed approach. Free code
for NLDR embeddings. Based on the BIOT explanation and software are publicly available online ( https://github.
approach [23], this framework learns sparse linear re- com/PierreLambert3/glocally_explained), enabling the
gression models for subsets of the data set and does not easy use of the proposed framework.
require a reapplication of the NLDR algorithm, making it This paper is organized as follows: Section 2 first
reviews some related works. Section 3 then presents our in this paper addresses the limitations of [21] by (1)
diproposed approach, while Section 4 discusses an example rectly providing local explanations everywhere in the LD
use-case. Section 5 draws final conclusions. embedding (i.e., globally local explanations), (2)
avoiding the need to sample new artificial data points, and (3)
relying only on the calculation of linear regression
mod2. Related works els, which ensures fast processing and hence facilitates
interactivity.</p>
        <p>
          Interpreting NLDR techniques is a challenging task. To
tackle this challenge, various approaches have been
proposed. Some papers (e.g., [
          <xref ref-type="bibr" rid="ref16">25, 26, 27</xref>
          ]) have proposed 3. Proposed approach
methods for explaining the LD embedding dimensions
with respect to the HD features. Since local NLDR algo- This section introduces our proposed approach for
globrithms such as  -SNE do not efectively preserve large dis- ally local and fast explanations of NLDR embeddings.
tances, explaining the resulting embedding dimensions Section 3.1 first summarizes our notations. Section 3.2
with these methods may be misleading. Other methods then details our methodology, and Section 3.3 finally
attempt to interpret NLDR results by explaining visual presents an optional fine-tuning strategy.
clusters [
          <xref ref-type="bibr" rid="ref15">22, 28, 29</xref>
          ]. For example, in [
          <xref ref-type="bibr" rid="ref15">22</xref>
          ], the authors
propose an interactive pipeline for explaining clusters in the 3.1. Notations
LD embedding using decision trees; this pipeline enables
the user to manually select LD clusters, which are then Matrices are denoted with bold-faced capital letters (e.g.,
explained in terms of the HD features with a decision  ⃗), vectors with bold-faced lower-case letters (e.g.,  ⃗) and
tree, an interpretable model. The resulting model can scalars with lower-case letters (e.g.,  ). A single element
be used to explain why certain data points are clustered from a matrix is denoted with a lower-case letter with two
together and to identify the HD features that distinguish subscripts (e.g.,   ), the first indicating the row and the
the diferent clusters. In contrast, our proposed approach second indicating the column. Instances are indexed by
aims to understand intra-cluster positions, i.e., the HD the letter  ∈ {1, ..., } , features by the letter  ∈ {1, ..., } ,
features that make two points from the same cluster lie embedding dimensions by the letter  ∈ {1, ..., } and
at diferent corners of this cluster. Moreover, our frame- regions or subcells of the embedding by the letter ℓ ∈
work makes it possible to not only explain LD clusters, {1, ..., } .
but more generally interpret the overall positions of the
points in the embedding. 3.2. General methodology
        </p>
        <p>
          Other existing methods aim to locally and linearly
explain the position of a specific instance in the LD space. In [23], the Best Interpretable Orthogonal
TransformaIn particular, [21] adapts LIME [30] to locally explain  - tion (BIOT) method was proposed to explain the
dimenSNE embeddings. The original version of LIME involves sions of multidimensional scaling (MDS) embeddings. In
three steps. First, it samples instances around a point of the case of  -SNE, such an explanation strategy is not
interest. Then, it queries the model for these instances. directly applicable because  -SNE only preserves local
Finally, it fits an interpretable model with the result of the structure from the high-dimensional data. However, as
queries. In [21], the authors use a SMOTE oversampling proposed in [21],  -SNE embeddings may be explained
technique [31] to create new artificial neighbors for the locally. Instead of learning a BIOT explanation model for
point of interest. To query  -SNE, the entire DR process the entire embedding (i.e., a single global explanation),
is re-applied for each sampled instance, since the  -SNE we propose learning diferent BIOT models for diferent
mapping function is unknown. Finally, BIR [32] —which regions (or subcells) of the embedding (i.e. local
explais the predecessor of BIOT [23], a method employed in nation). For a given region, the BIOT model identifies
our work —is used to produce local explanations; BIR the features that best explain the positioning of points
ifnds the rotation of the queried sampled data that results within that region of the embedding, independently of all
in the best explanation model (in terms of model sparsity other regions. This approach can be applied to any
nonand error). While the approach presented in [21] pro- linear 2-D embedding, including embeddings generated
vides nice intuitions about the LD embedding structure, by  -SNE and its extensions (e.g., [33, 19]) or by other
it has several limitations. First, it can only compute one NLDR algorithms (e.g., [
          <xref ref-type="bibr" rid="ref13">16, 17, 9, 34</xref>
          ]).
local explanation at a time, for one selected point. Sec- Let  (⃗×) be the matrix of  features used to generate
ond, the obtained explanation is highly dependent on the the embedding  ⃗ ( × 2 ). Furthermore, let  ⃗ ( × 2 ) and  ⃗0
artificial sampling. Finally, running the entire NLDR pro- (2 × 1) contain the weights and intercepts for the linear
cess for all sampled instances is (very) time consuming, models relating the features in  ⃗ to each dimension of the
and thus prohibits interactivity. The approach presented embedding  ⃗, where there is one model per dimension.
        </p>
        <p>1</p>
        <p>∑
2 =1 =1</p>
        <p>2
 0( ,⃗ ⃗
0, )⃗=</p>
        <p>⊤
which is minimized w.r.t  ⃗ ,  ⃗0 and  ⃗ under the constraint</p>
        <p>Clearly, this objective function can be extended to the
case where diferent model parameters  ⃗ (ℓ),  ⃗0
are optimized for diferent regions ℓ of the embedding,
where the set of instances in region ℓ is denoted  ℓ. In
practice, the best segmentation of the embedding into
re</p>
        <p>(ℓ) and  ⃗(ℓ)
gions is unknown. In this paper, we propose segmenting
the embedding automatically by performing K-means on
the embedding data. The choice of the hyperparameter 
depends on the topology apparent in the embedding and
of the granularity of details desired by the user. Other
strategies are possible, for instance by recursively
dividing the LD dimensions along their medians.</p>
        <p>2
=1
⃗2).</p>
        <p>Finally,  ⃗ (22 ) is an orthogonal transformation matrix
that is applied to  ⃗ to promote model sparsity and
pre</p>
        <p>The interface displayed in Fig. 1 shows an embedding
with multiple local linear explanations: each explanation
diction quality, and  &gt; 0 is a hyperparameter to control is composed of a green and a burgundy axis. Explanation
model sparsity. For 2-D embeddings, the BIOT objective ⃝ A has been selected by the user; the color transparency
function for global explanation is
ding. An example use-case demonstrates that the method
can efectively reveal zones in the embedding where
points are organized according to specific HD features.</p>
      </sec>
      <sec id="sec-1-4">
        <title>Finally, some accompanying software is provided ( https:</title>
        <p>//github.com/PierreLambert3/glocally_explained),
targeting both DR researchers and experts seeking to analyse
their data with nonlinear dimensionality reduction
visualization tools.</p>
        <p>Further works will include testing our framework with
of the points increases linearly with the absolute
diference between their position in the embedding and the
position predicted by the selected linear model (i.e., the
greater the error, the more transparent). This enables the
user to visualize the portion of the embedding for which
(1) the selected linear model is faithful. The right panel
depicts the relative importance of the HD features for each
axis of the selected explanation (i.e., ⃝ A in this case), as
quantified by the local linear model weights; the
horizontal bar under each feature name represents the feature’s
signed linear projection weight (LPW) on the considered
axis, highlighting the importance of the feature in the
local explanation. For visual clarity, only the 5 features
with the greatest LPW magnitudes are depicted for each
local explanation axis. The feature total sulfur dioxide has
been selected by the user (mark ⃝ B ). When selecting a
feature in the right panel, thick indicators appear on both
axes of all local explanations, with lengths proportional
to the LPW magnitudes of the corresponding feature on
all axes; mark ⃝ C shows two such indicators. This makes
it possible to grasp the influence of an HD feature in the
various regions of the entire embedding.</p>
      </sec>
      <sec id="sec-1-5">
        <title>Each view in Fig. 2 shows the importance of a particu</title>
        <p>indicated at the bottom of the panel. The left view
highlights that free sulfur dioxide is particularly important
when explaining the top portion of the embedding along
a vertical direction, whereas the horizontal direction can
be partly explained by the concentration of citric acid.</p>
      </sec>
      <sec id="sec-1-6">
        <title>We observe that the structures apparent in the bottomleft part of the embedding are not very dependent on the three analyzed features.</title>
      </sec>
    </sec>
    <sec id="sec-2">
      <title>5. Conclusion</title>
      <sec id="sec-2-1">
        <title>This work proposes a globally local and fast explanation framework that provides multiple local linear explanations for 2-D data embeddings, enabling the user to assess, at a glance, the importance of diferent HD</title>
        <p>3.3. Fine-tuning
In Section 3.2, the proposed strategy for automatic seg- lar feature in the embedding, with the respective feature
mentation (K-means) depends on the coordinates of the
instances in the embedding. However, the shape and size
of the zone that can be explained may not directly depend
on the spatial coordinates of the embedding. This means
that the regions identified using K-means may not be the
most optimal with respect to the quality of the resulting
explanations. In some cases, it is hence useful to
finetune the final regions by directly considering explanation
quality. To do so, we propose a method called Clustered</p>
      </sec>
      <sec id="sec-2-2">
        <title>BIOT, which reassigns instances  to explanation regions</title>
        <p>
          Clustered BIOT can be found in Appendix A.
 ℓ based on a modification of BIOT. Further details on
4. Experiments and discussion
posed method using an interactive user interface. This
user interface is available on the public repository
indicated in the abstract. All of the featured embeddings
are representations of the winequality-red dataset,
available in the UCI machine learning repository [35]. This
data set contains 11 physico-chemical variables
describing various red wines. The embeddings are produced by
a recent NE algorithm that mixes  -SNE gradients with
those of a fast stochastic approximation of MDS, which
This section presents an example use-case for the pro- features, both locally and across the whole LD
embedpreserves HD data structures across multiple scales [
          <xref ref-type="bibr" rid="ref13">34</xref>
          ]. actual end-users in the context of a real use case; their
        </p>
        <p>An interactive pipeline for explaining visual clus- [35] D. Dua, C. Graf, UCI machine learning repository,
ters in dimensionality reduction visualizations with</p>
      </sec>
      <sec id="sec-2-3">
        <title>Explaining multidimensional nonlinear mds embed</title>
        <p>dings using the best interpretable orthogonal
transing attribute variability in multidimensional
projections, in: 2016 29th SIBGRAPI Conference on</p>
      </sec>
      <sec id="sec-2-4">
        <title>Graphics, Patterns and Images (SIBGRAPI), IEEE,</title>
        <p>2016, pp. 297–304.</p>
        <p>Telea, F. V. Paulovich, Explaining three-dimensional
dimensionality reduction plots, Information
Visuwork for dimensionality reduction based data
exploration, in: Proceedings of the 2018 CHI Conference
on Human Factors in Computing Systems, 2018, pp.
1–13.
projection matrix/tree: Interactive subspace visual
exploration and analysis of high dimensional data,</p>
      </sec>
      <sec id="sec-2-5">
        <title>IEEE Transactions on Visualization and Computer</title>
        <p>Graphics 19 (2013) 2625–2633.
analysis of dimensionality reduction results with</p>
      </sec>
    </sec>
    <sec id="sec-3">
      <title>A. Clustered BIOT</title>
      <sec id="sec-3-1">
        <title>As mentioned in Section 3.3, the main method proposed</title>
        <p>in this paper can be fine-tuned with a method we call
Clustered BIOT. Let  ℓ = 1 if instance  is in region ℓ
and 0 otherwise. The matrix  ⃗ containing all elements
 ℓ respects the general conventions of hard clustering
(each instance belongs to exactly one cluster and each
cluster contains at least one instance). Then, the objective
function for Clustered BIOT is
 1( ,⃗{  ⃗ (ℓ),  ⃗0</p>
        <p>, ⃗(ℓ)}|ℓ=1)
=1
⊤
ization and computer graphics 26 (2019) 45–55.
contrastive learning, IEEE transactions on visual- where  ℓ ∶= { |  ℓ = 1}. For fixed  ⃗ (ℓ),  ⃗0
a given instance  , the solution for  ⃗ is the vector that
(ℓ) and  ⃗(ℓ) and
international conference on knowledge discovery  can belong to only one cluster), the optimal cluster for
minimizes
ality reduction results using shapley values, Expert</p>
      </sec>
      <sec id="sec-3-2">
        <title>Systems with Applications 178 (2021) 115020.</title>
        <p>trust you?” explaining the predictions of any
classifier, in: Proceedings of the 22nd ACM SIGKDD
and data mining, 2016, pp. 1135–1144.
selecting the best interpretable multidimensional
scaling rotation using external variables,
Neuro</p>
        <p>2
∑  ℓ
ℓ=1
Since only one element of  ⃗ can be equal to one (instance
instance  is whichever model ℓ minimizes the prediction
arg minℓ ( ⃗
⊤</p>
        <p>Thus, Clustered BIOT can be optimized by alternating
between clustering instances according to prediction
error and fitting BIOT models to the clusters. An instance
 is assigned to cluster ℓ if BIOT model ℓ has the lowest
prediction error for that instance compared to the other
which is minimized w.r.t  ⃗ and { ⃗ (ℓ),  ⃗0
,  ⃗(ℓ)}|ℓ=1 under
the constraints that (i)  ⃗(ℓ) is an orthogonal matrix ∀ℓ
and (ii)  ⃗ respects the clustering conventions above.</p>
        <p>For fixed  ⃗, the solution for { ⃗ (ℓ),  ⃗0
,  ⃗(ℓ)}|ℓ=1 can be
found by training BIOT on each subset of instances  ℓ,
(2)
(3)
(4)</p>
      </sec>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          <article-title>feedback will enable the improvement of the various sionality reduction</article-title>
          ,
          <source>Science</source>
          <volume>290</volume>
          (
          <year>2000</year>
          )
          <fpage>2319</fpage>
          -
          <lpage>2323</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          <article-title>design choices of our interface</article-title>
          .
          <source>In addition, a qualitative DOI: 10.1126/science.290.5500</source>
          .2319.
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          <article-title>comparison with other explainability methods such as [9</article-title>
          ]
          <string-name>
            <given-names>S. T.</given-names>
            <surname>Roweis</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L. K.</given-names>
            <surname>Saul</surname>
          </string-name>
          , Nonlinear dimensionality
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          <article-title>LIME will enable a more comprehensive evaluation of reduction by locally linear embedding</article-title>
          ,
          <source>science 290</source>
        </mixed-citation>
      </ref>
      <ref id="ref5">
        <mixed-citation>
          <article-title>the proposed method</article-title>
          . (
          <year>2000</year>
          )
          <fpage>2323</fpage>
          -
          <lpage>2326</lpage>
          . [10]
          <string-name>
            <given-names>J.</given-names>
            <surname>Suykens</surname>
          </string-name>
          ,
          <article-title>Data visualization and dimensionality reduction using kernel maps with a reference point,</article-title>
        </mixed-citation>
      </ref>
      <ref id="ref6">
        <mixed-citation>
          <source>Acknowledgments IEEE Trans. Neural Netw</source>
          .
          <volume>19</volume>
          (
          <year>2008</year>
          )
          <fpage>1501</fpage>
          -
          <lpage>1517</lpage>
          . [11]
          <string-name>
            <given-names>D.</given-names>
            <surname>Francois</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V.</given-names>
            <surname>Wertz</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Verleysen</surname>
          </string-name>
          , The concentra-
        </mixed-citation>
      </ref>
      <ref id="ref7">
        <mixed-citation>
          <article-title>This work was supported by Service Public de Wallonie tion of fractional distances 19 (</article-title>
          <year>2007</year>
          )
          <fpage>873</fpage>
          -
          <lpage>886</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref8">
        <mixed-citation>
          <article-title>Recherche under grant n° 2010235-ARIAC by DIGITAL</article-title>
          - [12]
          <string-name>
            <given-names>J. A.</given-names>
            <surname>Lee</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Verleysen</surname>
          </string-name>
          , Quality assessment of di-
        </mixed-citation>
      </ref>
      <ref id="ref9">
        <mixed-citation>
          WALLONIA4.
          <article-title>AI</article-title>
          .
          <article-title>SC is supported by a FRIA grant (</article-title>
          <string-name>
            <surname>F.R.S</surname>
          </string-name>
          .
          <article-title>- mensionality reduction: Rank-based criteria</article-title>
          , Neu-
        </mixed-citation>
      </ref>
      <ref id="ref10">
        <mixed-citation>
          <string-name>
            <surname>FNRS</surname>
          </string-name>
          ).
          <source>rocomputing 72</source>
          (
          <year>2009</year>
          )
          <fpage>1431</fpage>
          -
          <lpage>1443</lpage>
          . [13]
          <string-name>
            <given-names>B.</given-names>
            <surname>Schölkopf</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Smola</surname>
          </string-name>
          ,
          <string-name>
            <surname>K.-R. Müller</surname>
          </string-name>
          , Nonlinear
        </mixed-citation>
      </ref>
      <ref id="ref11">
        <mixed-citation>
          <article-title>t-sne efectively</article-title>
          ,
          <source>Distill</source>
          <volume>1</volume>
          (
          <year>2016</year>
          )
          <article-title>e2</article-title>
          . [21]
          <string-name>
            <given-names>A.</given-names>
            <surname>Bibal</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V. M.</given-names>
            <surname>Vu</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G.</given-names>
            <surname>Nanfack</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B.</given-names>
            <surname>Frénay</surname>
          </string-name>
          , Explain-
        </mixed-citation>
      </ref>
      <ref id="ref12">
        <mixed-citation>
          <string-name>
            <surname>ESANN</surname>
          </string-name>
          ,
          <year>2020</year>
          , pp.
          <fpage>393</fpage>
          -
          <lpage>398</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref13">
        <mixed-citation>
          [34]
          <string-name>
            <given-names>P.</given-names>
            <surname>Lambert</surname>
          </string-name>
          , C. de Bodt,
          <string-name>
            <given-names>M.</given-names>
            <surname>Verleysen</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J. A.</given-names>
            <surname>Lee</surname>
          </string-name>
          ,
        </mixed-citation>
      </ref>
      <ref id="ref14">
        <mixed-citation>
          <article-title>bedding like t-sne and umap</article-title>
          ,
          <source>Neurocomputing 503</source>
        </mixed-citation>
      </ref>
      <ref id="ref15">
        <mixed-citation>
          [22]
          <string-name>
            <given-names>A.</given-names>
            <surname>Bibal</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Clarinval</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B.</given-names>
            <surname>Dumas</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B.</given-names>
            <surname>Frénay</surname>
          </string-name>
          , Ixvc:
        </mixed-citation>
      </ref>
      <ref id="ref16">
        <mixed-citation>
          [25]
          <string-name>
            <surname>D. B. Coimbra</surname>
            ,
            <given-names>R. M.</given-names>
          </string-name>
          <string-name>
            <surname>Martins</surname>
            ,
            <given-names>T. T.</given-names>
          </string-name>
          <string-name>
            <surname>Neves</surname>
            ,
            <given-names>A. C.</given-names>
          </string-name>
          [27]
          <string-name>
            <given-names>X.</given-names>
            <surname>Yuan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Ren</surname>
          </string-name>
          ,
          <string-name>
            <given-names>Z.</given-names>
            <surname>Wang</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Guo</surname>
          </string-name>
          , Dimension [28]
          <string-name>
            <given-names>T.</given-names>
            <surname>Fujiwara</surname>
          </string-name>
          ,
          <string-name>
            <given-names>O.-H.</given-names>
            <surname>Kwon</surname>
          </string-name>
          , K.-L. Ma, Supporting
        </mixed-citation>
      </ref>
      <ref id="ref17">
        <mixed-citation>
          2017. URL: http://archive.ics.uci.edu/ml. [29]
          <string-name>
            <given-names>W. E.</given-names>
            <surname>Marcilio-Jr</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D. M.</given-names>
            <surname>Eler</surname>
          </string-name>
          , Explaining dimension[30]
          <string-name>
            <given-names>M. T.</given-names>
            <surname>Ribeiro</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Singh</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Guestrin</surname>
          </string-name>
          , ” why should i [31]
          <string-name>
            <given-names>N. V.</given-names>
            <surname>Chawla</surname>
          </string-name>
          ,
          <string-name>
            <given-names>K. W.</given-names>
            <surname>Bowyer</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L. O.</given-names>
            <surname>Hall</surname>
          </string-name>
          , W. P. error:
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>