Source code for pyreal.explainers.se.similar_examples
import faiss
import numpy as np
import pandas as pd
from sklearn.neighbors import KDTree
from sklearn.preprocessing import StandardScaler
from pyreal.explainers.se.base import SimilarExamplesBase
from pyreal.explanation_types import SimilarExampleExplanation
from pyreal.explanation_types.base import convert_columns_with_dict
# From:
# towardsdatascience.com/make-knn-300-times-faster-than-scikit-learns-in-20-lines-5e29d74e76bb
class FaissKNeighbors:
def __init__(self, X):
self.index = faiss.IndexFlatL2(X.shape[1])
self.index.add(X.astype(np.float32))
def query(self, x, k, return_distance=False):
distances, indices = self.index.search(x.astype(np.float32), k=k)
if return_distance:
return distances, indices
return indices
[docs]class SimilarExamples(SimilarExamplesBase):
"""
SimilarExamples object.
A SimilarExamples object gets feature explanation using the Nearest Neighbors algorithm
SimilarExamples explainers expect data to be entirely numeric
Args:
model (string filepath or model object):
Filepath to the pickled model to explain, or model object with .predict() function
x_train_orig (DataFrame of size (n_instances, n_features)):
Training set in original form.
standardize (Boolean):
If True, standardize the data when selected similar examples
fast (Boolean):
If True, use a faster algorithm to compute the neighbors. Set to False if having
trouble with faiss library
**kwargs: see base Explainer args
"""
[docs] def __init__(self, model, x_train_orig=None, standardize=False, fast=True, **kwargs):
self.explainer = None
self.standardize = standardize
self.standardizer = None
self.x_train_interpret = None
self.x_train_interpret_features = None
self.fast = fast
super(SimilarExamples, self).__init__(model, x_train_orig, **kwargs)
[docs] def fit(self, x_train_orig=None, y_train=None):
"""
Fit the explainer
Args:
x_train_orig (DataFrame of shape (n_instances, n_features):
Training set to fit on, required if not provided on initialization
y_train (Series of shape (n_features):
Targets of training set, required if not provided on initialization
"""
x_train_orig, y_train = self._get_training_data(x_train_orig, y_train)
dataset = self.transform_to_x_algorithm(x_train_orig)
if self.standardize:
self.standardizer = StandardScaler()
dataset = self.standardizer.fit_transform(dataset)
if self.fast:
self.explainer = FaissKNeighbors(dataset)
else:
self.explainer = KDTree(dataset)
self.y_train = y_train
self.x_train_interpret = self.transform_to_x_interpret(x_train_orig)
return self
def produce_explanation_interpret(
self, x_orig, disable_feature_descriptions=False, num_examples=5
):
"""
Get the n nearest neighbors to x_orig
Args:
x_orig (DataFrame of shape (n_instances, n_features)):
The input to be explained
disable_feature_descriptions (Boolean):
If False, do not apply feature descriptions
num_examples (int):
Number of neighbors to return
Returns:
SimilarExamplesExplanation
Set of similar examples and their targets
"""
if self.explainer is None:
raise AttributeError("Instance has no explainer. Must call fit() before produce()")
if not disable_feature_descriptions: # Running this here for optimization
x_train_interpret = convert_columns_with_dict(
self.x_train_interpret, self.feature_descriptions
)
else:
x_train_interpret = self.x_train_interpret
x = self.transform_to_x_algorithm(x_orig)
if self.standardize:
x = self.standardizer.transform(x)
inds = self.explainer.query(x, k=num_examples, return_distance=False)
raw_explanation_x = {}
raw_explanation_y = {}
for i in range(len(inds)):
raw_explanation_x[i] = x_train_interpret.iloc[inds[i], :]
raw_explanation_y[i] = pd.Series(self.y_train.iloc[inds[i]].squeeze())
x_interpret = self.transform_to_x_interpret(x_orig)
explanation = SimilarExampleExplanation(
(raw_explanation_x, raw_explanation_y), x_interpret
)
return explanation
def produce_explanation(self, x_orig, **kwargs):
"""
Unused for similar examples explainers as explanations are directly produced in the
interpretable feature space
"""
return None