Source code for pyreal.explainers.base

import logging
from abc import ABC, abstractmethod

import numpy as np
import pandas as pd
from sklearn.base import is_classifier
from sklearn.metrics import get_scorer

from pyreal.explanation_types import Explanation
from pyreal.transformers import BreakingTransformError
from pyreal.transformers import fit_transformers as fit_transformers_func
from pyreal.transformers import run_transformers
from pyreal.utils import model_utils

log = logging.getLogger(__name__)


def _check_transformers(transformers):
    """
    Validate that all Transformers in `transformers` are legal. Converts single Transformer objects
    into lists. Checks for the existence of a `.transform()` function for all Transformers.

    Args:
        transformers (Transformer or list of Transformers):
            A list of Transformer objects to validate
    Returns:
        List of Transformers
            The original input list, or a single Transformer converted to a list

    Raises:
        TypeError
            If one or more objects in `transformers` does not have a `.transform()` function.
    """
    if transformers is None:
        return []
    if not isinstance(transformers, list):
        transformers = [transformers]
    else:
        transformers = transformers
    for transformer in transformers:
        transform_method = getattr(transformer, "transform", None)
        if not callable(transform_method):
            raise TypeError("Given transformer that does not have a .transform function")
    return transformers


def _get_transformers(transformers, algorithm=None, model=None, interpret=None):
    """
    Return Transformers in `transformers` that have all the requested flags.

    Args:
        transformers (list of Transformers):
            List from which to pick transformers
        algorithm (Boolean or None):
            If True or False, choose transformers with that value. If None, do not consider the
            value of this flag.
        model (Boolean or None):
            If True or False, choose transformers with that value. If None, do not consider the
            value of this flag.
        interpret (Boolean or None):
            If True or False, choose transformers with that value. If None, do not consider the
            value of this flag.

    Returns:
        List of Transformers
            A list of Transformers from `transformers` that have all requested flags.
    """
    select_transformers = []
    for t in transformers:
        if (
            (algorithm is None or t.algorithm == algorithm)
            and (model is None or t.model == model)
            and (interpret is None or t.interpret == interpret)
        ):
            select_transformers.append(t)
    return select_transformers


[docs]class ExplainerBase(ABC):
[docs]    def __init__(
        self,
        model,
        x_train_orig=None,
        y_train=None,
        e_algorithm=None,
        feature_descriptions=None,
        classes=None,
        class_descriptions=None,
        transformers=None,
        fit_on_init=False,
        training_size=None,
        return_original_explanation=False,
        fit_transformers=False,
        openai_api_key=None,
        llm=None,
    ):
        """
        Generic ExplainerBase object

        Args:
            model (string filepath or model object):
               Filepath to the pickled model to explain, or model object with .predict() function
               model.predict() should return a single value prediction for each input
               Classification models should return the index or class. If the latter, the `classes`
               parameter should be provided.
            x_train_orig (DataFrame of shape (n_instances, x_orig_feature_count)):
               The training set for the explainer. If none, must be provided separately
               when fitting
            y_train (Series of shape (n_instances,)):
               The y values for the dataset
            e_algorithm (string):
                Algorithm to use, if applicable
            feature_descriptions (dict):
               Interpretable descriptions of each feature
            classes (array):
                List of class names returned by the model, in the order that the internal model
                considers them if applicable.
                Can be automatically extracted if model is an sklearn classifier
                None if model is not a classifier
            class_descriptions (dict):
                Interpretable descriptions of each class
                None if model is not a classifier
            transformers (transformer object or list of transformer objects):
               Transformer(s) used by the Explainer.
            fit_on_init (Boolean):
               If True, fit the explainer on initiation.
               If False, self.fit() must be manually called before produce() is called
            training_size (Integer):
                If given this value, sample a training set with size of this value
                from x_train_orig and use it to train the explainer instead of the
                entire x_train_orig.
            return_original_explanation (Boolean):
                If True, return the explanation originally generated without any transformations
            fit_transformers (Boolean):
                If True, fit transformers on x_train_orig. Requires x_train_orig not be None
            openai_api_key (string):
                OpenAI API key. Required for GPT narrative explanations, unless openai client
                is provided
            llm (LLM model object): Local LLM object or LLM client object to use to generate \
                narratives. One of `llm` or `openai_api_key` must be provided.
        """
        if isinstance(model, str):
            self.model = model_utils.load_model_from_pickle(model)
        else:
            predict_method = getattr(model, "predict", None)
            if not callable(predict_method):
                raise TypeError("Given model that does not have a .predict function")
            self.model = model

        self.x_train_orig = x_train_orig
        self.y_train = y_train

        if x_train_orig is not None and not isinstance(x_train_orig, pd.DataFrame):
            raise TypeError("x_train_orig must be of type DataFrame")
        if y_train is not None and not (
            isinstance(y_train, pd.DataFrame) or isinstance(y_train, pd.Series)
        ):
            raise TypeError("y_train must be of type DataFrame or Series")

        self.e_algorithm = e_algorithm

        self.transformers = _check_transformers(transformers)

        self.feature_descriptions = feature_descriptions

        self.classes = classes
        if (
            classes is None
            and (
                str(self.model.__module__).startswith("sklearn")
                or str(self.model.__module__).startswith("sktime")
            )
            and is_classifier(model)
            and hasattr(model, "classes_")
        ):
            self.classes = model.classes_

        self.class_descriptions = class_descriptions
        self.return_original_explanation = return_original_explanation

        self.x_train_orig_subset = self.x_train_orig
        self.y_train_subset = self.y_train

        self.training_size = training_size
        if x_train_orig is not None and training_size is not None:
            self.x_train_orig_subset, self.y_train_subset = self._select_training_set(
                x_train_orig, y_train
            )

        if fit_transformers:
            if x_train_orig is None:
                raise ValueError("Cannot fit transformers unless x_train_orig is provided")
            a_transformers = _get_transformers(self.transformers, algorithm=True)
            i_transformers = _get_transformers(self.transformers, interpret=True)
            fit_transformers_func(a_transformers, self.x_train_orig)
            fit_transformers_func(i_transformers, self.x_train_orig)

        if fit_on_init:
            if x_train_orig is None:
                raise
            self.fit()

        self.llm = llm
        self.openai_api_key = openai_api_key

[docs]    def fit(self, x_train_orig=None, y_train=None):
        """
        Fit this explainer object.

        Args:
            x_train_orig (DataFrame of shape (n_instances, n_features):
                Training set to fit on, required if not provided on initialization
            y_train:
                Targets of training set, required if not provided on initialization
        """
        return self

[docs]    def produce(self, x_orig=None, disable_feature_descriptions=False, **kwargs):
        """
        Return the explanation, in the interpretable feature space with feature descriptions
        applied.

        Args:
            x_orig (DataFrame of shape (n_instances, n_features) or Series of length (n_features)):
                Input to explain
            disable_feature_descriptions (Boolean):
                If False, do not apply feature descriptions
            **kwargs:
                Additional arguments to be used by more specific explainers.

        Returns:
            Explanation
                A generated interpretable explanation object, including interpretable feature
                names and values.
        """
        series = False
        name = None
        if x_orig is not None and isinstance(x_orig, pd.Series):
            name = x_orig.name
            series = True
            x_orig = x_orig.to_frame().T
        explanation_interpret = self.produce_explanation_interpret(x_orig=x_orig, **kwargs)
        if not disable_feature_descriptions:
            explanation_interpret.apply_feature_descriptions(self.feature_descriptions)
        if series:
            x_interpret = explanation_interpret.get_values().squeeze()
            x_interpret.name = name
            explanation_interpret.update_values(x_interpret)
        return explanation_interpret

    def produce_explanation_interpret(self, x_orig, **kwargs):
        """
        Produce an interpretable explanation and corresponding values

        Args:
             x_orig (DataFrame of shape (n_instances, n_features)):
                Input to explain
            **kwargs:
                 Additional arguments to be used by more specific explainers.

        Returns:
            Explanation
                Generated explanation in the interpretable feature space, with values
        """
        explanation = self.produce_explanation(x_orig=x_orig, **kwargs)
        return self.transform_explanation(explanation, x_orig)

    @abstractmethod
    def produce_explanation(self, x_orig, **kwargs):
        """
        Run the explanation algorithm to produce an explanation

        Args:
            x_orig (DataFrame of shape (n_instances, n_features)):
                Input to explain
            **kwargs:
                Additional arguments to be used by more specific explainers.

        Returns:
            A generated explanation object in the algorithm feature space (without values)
        """
        pass

    def transform_to_x_algorithm(self, x_orig):
        """
        Transform x_orig to x_algorithm, using the algorithm transformers

        Args:
            x_orig (DataFrame of shape (n_instances, x_orig_feature_count)  or Series):
                Original input

        Returns:
             DataFrame or Series of shape (n_instances, x_algorithm_feature_count)
                x_orig converted to explainable form
        """
        a_transformers = _get_transformers(self.transformers, algorithm=True)
        return run_transformers(a_transformers, x_orig)

    def transform_to_x_model(self, x_orig):
        """
        Transform x_orig to x_model, using the model transformers

        Args:
            x_orig (DataFrame of shape (n_instances, x_orig_feature_count) or Series):
                Original input

        Returns:
             DataFrame or Series of shape (n_instances, x_model_feature_count)
                x_orig converted to model-ready form
        """
        m_transformers = _get_transformers(self.transformers, model=True)
        return run_transformers(m_transformers, x_orig)

    def transform_x_from_algorithm_to_model(self, x_algorithm):
        """
        Transform x_algorithm to x_model, using the model transformers

        Args:
            x_algorithm (DataFrame of shape (n_instances, x_orig_feature_count) or Series):
                Input in explain space

        Returns:
             DataFrame or Series of shape (n_instances, x_model_feature_count)
                x_algorithm converted to model-ready form
        """
        m_transformers = _get_transformers(self.transformers, algorithm=False, model=True)
        return run_transformers(m_transformers, x_algorithm)

    def transform_to_x_interpret(self, x_orig):
        """
        Transform x_orig to x_interpret, using the interpret transformers
        Args:
            x_orig (DataFrame of shape (n_instances, x_orig_feature_count) or Series):
                Original input

        Returns:
             DataFrame or Series of shape (n_instances, x_interpret_feature_count)
                x_orig converted to interpretable form
        """
        i_transformers = _get_transformers(self.transformers, interpret=True)
        return run_transformers(i_transformers, x_orig)

    def transform_explanation(self, explanation, x_orig=None):
        """
        Transform the explanation into its interpretable form, by running the algorithm
        transformer's "inverse_transform_explanation" and interpretable transformers
        "transform_explanation" functions.
        If an `x_orig` is provided, or the explanation has values, also convert `x_orig` with the
        same transformers. This function will result in `values` in the Explanation object
        in the same feature space as the final explanation

        Args:
            explanation (Explanation):
                The raw explanation to transform
            x_orig (DataFrame of shape (n_instances, n_features) or None)
                Data to transform to final space

        Returns:
            Explanation
                The interpretable form of the explanation
        """
        if not isinstance(explanation, Explanation):
            raise ValueError("explanation is not a valid Explanation object")

        if explanation.values is not None and x_orig is None:
            x_orig = explanation.values
            explanation.values = None

        convert_x = x_orig is not None
        if self.return_original_explanation:
            if convert_x:
                explanation = explanation.update_values(self.transform_to_x_algorithm(x_orig))
            return explanation

        x = None
        if convert_x:
            x = x_orig.copy()

        a_transformers = _get_transformers(self.transformers, algorithm=True, interpret=False)
        i_transformers = _get_transformers(self.transformers, interpret=True)

        # Iterate through algorithm transformers
        for i, t in enumerate(a_transformers[::-1]):
            if t.require_values:
                explanation.update_values(  # UNTESTED
                    run_transformers(a_transformers[0 : len(a_transformers) - i], x), inplace=True
                )
            try:
                explanation = t.inverse_transform_explanation(explanation)
            # If this is a breaking transformer, transform x to the current point and return
            except BreakingTransformError:
                log.warning(
                    "Transformer class %s does not have the required inverse explanation "
                    "transform and is set to break, stopping transform process"
                    % type(t).__name__
                )
                break_point = len(a_transformers) - i
                if convert_x:
                    x = run_transformers(a_transformers[0:break_point], x)
                    return explanation.update_values(x)
                else:
                    return explanation
        # Iterate through interpret transformers
        for t in i_transformers:
            if t.require_values and x is not None:
                explanation.update_values(x, inplace=True)
            if not t.algorithm:
                try:
                    explanation = t.transform_explanation(explanation)
                except BreakingTransformError:
                    log.warning(
                        "Transformer class %s does not have the required explanation "
                        "transform and is set to break, stopping transform process"
                        % type(t).__name__
                    )
                    if convert_x:
                        return explanation.update_values(x)
                    return explanation
            if convert_x:
                x = t.transform(x)
        if convert_x:
            return explanation.update_values(x)
        return explanation

[docs]    def model_predict(self, x_orig):
        """
        Predict on x_orig using the model and return the result

        Args:
            x_orig (DataFrame of shape (n_instances, x_orig_feature_count)):
                Data to predict on

        Returns:
            DataFrame of shape (n_instances,)
                Model prediction on x_orig
        """
        if x_orig.ndim == 1:
            x_orig = x_orig.to_frame().T
        x_model = self.transform_to_x_model(x_orig)
        return self.model.predict(x_model)

    def model_predict_proba(self, x_orig):
        """
        Return the output probabilities of each class for x_orig

        Args:
            x_orig (DataFrame of shape (n_instances, x_orig_feature_count)):
                Data to predict on

        Returns:
            DataFrame of shape (n_instances, n_classes)
                Model output probabilities on x_orig
        """
        if not hasattr(self.model, "predict_proba"):
            raise AttributeError("Model does not have a predict_proba method.")
        if x_orig.ndim == 1:
            x_orig = x_orig.to_frame().T
        x_model = self.transform_to_x_model(x_orig)
        return self.model.predict_proba(x_model)

    def model_predict_on_algorithm(self, x_algorithm):
        """
        Predict on x_algorithm using the model and return the result

        Args:
            x_algorithm (DataFrame of shape (n_instances, x_orig_feature_count)):
                Data to predict on

        Returns:
            DataFrame of shape (n_instances,)
                Model prediction on x_orig
        """
        if x_algorithm.ndim == 1:
            x_algorithm = x_algorithm.to_frame().T
        x_model = self.transform_x_from_algorithm_to_model(x_algorithm)
        return self.model.predict(x_model)

    def feature_description(self, feature_name):
        """
        Returns the interpretable description associated with a feature

        Args:
            feature_name (string)

        Returns:
            string
                 Description of feature
        """
        return self.feature_descriptions[feature_name]

    def evaluate_model(self, scorer, x_orig=None, y=None):
        """
        Evaluate the model using a chosen scorer algorithm.

        Args:
            scorer (string):
                Type of scorer to use. See sklearn's scoring parameter options here:
                https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter
            x_orig (DataFrame of shape (n_instances, n_features)):
                Dataset to score on. Required if x_train_orig was not provided at initialization.
                If None, use self.x_train_orig
            y (DataFrame of shape (n_instances, n_features)):
                Dataset to score on. Required if y_train was not provided at initialization


        Returns:
            float
                A score for the model
        """
        x_orig, y = self._get_training_data(x_orig, y)

        scorer = get_scorer(scorer)
        x_model = self.transform_to_x_model(x_orig)
        score = scorer(self.model, x_model, y)
        return score

    @abstractmethod
    def evaluate_variation(self, with_fit=False, explanations=None, n_iterations=20, n_rows=10):
        """
        Evaluate the variation of the explanations generated by this Explainer.
        A variation of 0 means this explainer is expected to generate the exact same explanation
        given the same model and input. Variation is always non-negative, and can be arbitrarily
        high.

        Args:
            with_fit (Boolean):
                If True, evaluate the variation in explanations including the fit (fit each time
                before running). If False, evaluate the variation in explanations of a pre-fit
                Explainer.
            explanations (None or List of Explanation Objects):
                If provided, run the variation check on the precomputed list of explanations
                instead of generating
            n_iterations (int):
                Number of explanations to generate to evaluation variation
            n_rows (int):
                Number of rows of dataset to generate explanations on

        Returns:
            float
                The variation of this Explainer's explanations
        """

    def _get_x_train_orig(self, x_train_orig):
        """
        Helper function to get the appropriate x_orig or raise errors if something goes wrong
        Args:
            x_train_orig (DataFrame or None):
                Provided DataFrame
        Returns:
            The dataframe to use (x_orig or self.x_train_orig)

        Raises:
            ValueError if no valid dataframe
        """
        if x_train_orig is not None:
            return self._select_training_set(x_train_orig)[0]
        if self.x_train_orig_subset is not None:
            return self.x_train_orig_subset
        else:
            raise ValueError("Must provide x_train_orig at initialization or fitting time!")

    def _get_training_data(self, x_train_orig, y_train):
        if x_train_orig is None and self.x_train_orig is None:
            raise ValueError("Must provide x_train_orig at initialization or fitting time")
        if y_train is None and self.y_train is None:
            raise ValueError("Must provide y_train at initialization or fitting time")
        if x_train_orig is not None and y_train is None:
            raise ValueError("Must provide y_train if providing x_train_orig")

        if x_train_orig is None:
            return self.x_train_orig_subset, self.y_train_subset
        else:
            return self._select_training_set(x_train_orig, y_train)

    def _select_training_set(self, x_train, y_train=None):
        if self.training_size is None:
            return x_train, y_train
        if self.training_size < len(x_train.index):
            if self.classes is not None and self.training_size < len(self.classes):
                raise ValueError("training_size must be larger than the number of classes")
            else:
                data_sample_indices = pd.Index(
                    np.random.choice(x_train.index, self.training_size, replace=False)
                )

            # use x_train_orig_subset for fitting explainer
            x_train_subset = x_train.loc[data_sample_indices]
            if y_train is not None:
                y_train_subset = y_train.loc[data_sample_indices]
                return x_train_subset, y_train_subset
            return x_train_subset, None
        else:
            return x_train, y_train