From 3365ccee00664263d3be903f7a34fa49676a8f42 Mon Sep 17 00:00:00 2001 From: ldingemans <69890037+ldingemans@users.noreply.github.com> Date: Mon, 25 Jul 2022 14:28:24 +0200 Subject: [PATCH] Add files via upload --- lime/lime_image.py | 38 ++++++++++++++----------------- lime/lime_tabular.py | 53 +++++++++++--------------------------------- 2 files changed, 30 insertions(+), 61 deletions(-) diff --git a/lime/lime_image.py b/lime/lime_image.py index ea3940e2a..347dd3d0b 100644 --- a/lime/lime_image.py +++ b/lime/lime_image.py @@ -6,9 +6,10 @@ import numpy as np import sklearn +import sklearn.preprocessing from sklearn.utils import check_random_state from skimage.color import gray2rgb -from tqdm.auto import tqdm +from tqdm import tqdm from . import lime_base @@ -27,8 +28,7 @@ def __init__(self, image, segments): self.segments = segments self.intercept = {} self.local_exp = {} - self.local_pred = {} - self.score = {} + self.local_pred = None def get_image_and_mask(self, label, positive_only=True, negative_only=False, hide_rest=False, num_features=5, min_weight=0.): @@ -134,7 +134,7 @@ def explain_instance(self, image, classifier_fn, labels=(1,), distance_metric='cosine', model_regressor=None, random_seed=None, - progress_bar=True): + classifier_args=None): """Generates explanations for a prediction. First, we generate neighborhood data by randomly perturbing features @@ -149,14 +149,13 @@ def explain_instance(self, image, classifier_fn, labels=(1,), takes a numpy array and outputs prediction probabilities. For ScikitClassifiers , this is classifier.predict_proba. labels: iterable with labels to be explained. - hide_color: If not None, will hide superpixels with this color. - Otherwise, use the mean pixel color of the image. + hide_color: TODO top_labels: if not None, ignore labels and produce explanations for the K labels with highest prediction probabilities, where K is this parameter. num_features: maximum number of features present in explanation num_samples: size of the neighborhood to learn the linear model - batch_size: batch size for model predictions + batch_size: TODO distance_metric: the distance metric to use for weights. model_regressor: sklearn regressor to use in explanation. Defaults to Ridge regression in LimeBase. Must have model_regressor.coef_ @@ -166,7 +165,6 @@ def explain_instance(self, image, classifier_fn, labels=(1,), random_seed: integer used as random seed for the segmentation algorithm. If None, a random integer, between 0 and 1000, will be generated using the internal random number generator. - progress_bar: if True, show tqdm progress bar. Returns: An ImageExplanation object (see lime_image.py) with the corresponding @@ -181,7 +179,10 @@ def explain_instance(self, image, classifier_fn, labels=(1,), segmentation_fn = SegmentationAlgorithm('quickshift', kernel_size=4, max_dist=200, ratio=0.2, random_seed=random_seed) - segments = segmentation_fn(image) + try: + segments = segmentation_fn(image) + except ValueError as e: + raise e fudged_image = image.copy() if hide_color is None: @@ -194,12 +195,10 @@ def explain_instance(self, image, classifier_fn, labels=(1,), fudged_image[:] = hide_color top = labels - + data, labels = self.data_labels(image, fudged_image, segments, classifier_fn, num_samples, - batch_size=batch_size, - progress_bar=progress_bar) - + batch_size=batch_size, classifier_args=classifier_args) distances = sklearn.metrics.pairwise_distances( data, data[0].reshape(1, -1), @@ -214,8 +213,7 @@ def explain_instance(self, image, classifier_fn, labels=(1,), for label in top: (ret_exp.intercept[label], ret_exp.local_exp[label], - ret_exp.score[label], - ret_exp.local_pred[label]) = self.base.explain_instance_with_data( + ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data( data, labels, distances, label, num_features, model_regressor=model_regressor, feature_selection=self.feature_selection) @@ -228,7 +226,7 @@ def data_labels(self, classifier_fn, num_samples, batch_size=10, - progress_bar=True): + classifier_args=None): """Generates images and predictions in the neighborhood of this image. Args: @@ -240,7 +238,6 @@ def data_labels(self, matrix of prediction probabilities num_samples: size of the neighborhood to learn the linear model batch_size: classifier_fn will be called on batches of this size. - progress_bar: if True, show tqdm progress bar. Returns: A tuple (data, labels), where: @@ -253,8 +250,7 @@ def data_labels(self, labels = [] data[0, :] = 1 imgs = [] - rows = tqdm(data) if progress_bar else data - for row in rows: + for row in data: temp = copy.deepcopy(image) zeros = np.where(row == 0)[0] mask = np.zeros(segments.shape).astype(bool) @@ -263,10 +259,10 @@ def data_labels(self, temp[mask] = fudged_image[mask] imgs.append(temp) if len(imgs) == batch_size: - preds = classifier_fn(np.array(imgs)) + preds = classifier_fn(np.array(imgs), classifier_args=classifier_args) labels.extend(preds) imgs = [] if len(imgs) > 0: - preds = classifier_fn(np.array(imgs)) + preds = classifier_fn(np.array(imgs), classifier_args=classifier_args) labels.extend(preds) return data, np.array(labels) diff --git a/lime/lime_tabular.py b/lime/lime_tabular.py index 880f3d391..4071da2c2 100644 --- a/lime/lime_tabular.py +++ b/lime/lime_tabular.py @@ -12,8 +12,6 @@ import sklearn import sklearn.preprocessing from sklearn.utils import check_random_state -from pyDOE2 import lhs -from scipy.stats.distributions import norm from lime.discretize import QuartileDiscretizer from lime.discretize import DecileDiscretizer @@ -139,7 +137,7 @@ def __init__(self, discretizer='quartile', sample_around_instance=False, random_state=None, - training_data_stats=None): + training_data_stats=None,): """Init function. Args: @@ -208,11 +206,10 @@ def __init__(self, if discretize_continuous and not sp.sparse.issparse(training_data): # Set the discretizer if training data stats are provided if self.training_data_stats: - discretizer = StatsDiscretizer( - training_data, self.categorical_features, - self.feature_names, labels=training_labels, - data_stats=self.training_data_stats, - random_state=self.random_state) + discretizer = StatsDiscretizer(training_data, self.categorical_features, + self.feature_names, labels=training_labels, + data_stats=self.training_data_stats, + random_state=self.random_state) if discretizer == 'quartile': self.discretizer = QuartileDiscretizer( @@ -305,7 +302,7 @@ def explain_instance(self, num_samples=5000, distance_metric='euclidean', model_regressor=None, - sampling_method='gaussian'): + classifier_args=None): """Generates explanations for a prediction. First, we generate neighborhood data by randomly perturbing features @@ -333,8 +330,6 @@ def explain_instance(self, model_regressor: sklearn regressor to use in explanation. Defaults to Ridge regression in LimeBase. Must have model_regressor.coef_ and 'sample_weight' as a parameter to model_regressor.fit() - sampling_method: Method to sample synthetic data. Defaults to Gaussian - sampling. Can also use Latin Hypercube Sampling. Returns: An Explanation object (see explanation.py) with the corresponding @@ -343,7 +338,7 @@ def explain_instance(self, if sp.sparse.issparse(data_row) and not sp.sparse.isspmatrix_csr(data_row): # Preventative code: if sparse, convert to csr format if not in csr format already data_row = data_row.tocsr() - data, inverse = self.__data_inverse(data_row, num_samples, sampling_method) + data, inverse = self.__data_inverse(data_row, num_samples) if sp.sparse.issparse(data): # Note in sparse case we don't subtract mean since data would become dense scaled_data = data.multiply(self.scaler.scale_) @@ -358,7 +353,7 @@ def explain_instance(self, metric=distance_metric ).ravel() - yss = predict_fn(inverse) + yss = predict_fn(inverse, classifier_args) # for classification, the model needs to provide a list of tuples - classes # along with prediction probabilities @@ -455,8 +450,7 @@ def explain_instance(self, for label in labels: (ret_exp.intercept[label], ret_exp.local_exp[label], - ret_exp.score[label], - ret_exp.local_pred[label]) = self.base.explain_instance_with_data( + ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data( scaled_data, yss, distances, @@ -474,8 +468,7 @@ def explain_instance(self, def __data_inverse(self, data_row, - num_samples, - sampling_method): + num_samples): """Generates a neighborhood around a prediction. For numerical features, perturb them by sampling from a Normal(0,1) and @@ -488,7 +481,6 @@ def __data_inverse(self, Args: data_row: 1d numpy array, corresponding to a row num_samples: size of the neighborhood to learn the linear model - sampling_method: 'gaussian' or 'lhs' Returns: A tuple (data, inverse), where: @@ -517,26 +509,9 @@ def __data_inverse(self, instance_sample = data_row[:, non_zero_indexes] scale = scale[non_zero_indexes] mean = mean[non_zero_indexes] - - if sampling_method == 'gaussian': - data = self.random_state.normal(0, 1, num_samples * num_cols - ).reshape(num_samples, num_cols) - data = np.array(data) - elif sampling_method == 'lhs': - data = lhs(num_cols, samples=num_samples - ).reshape(num_samples, num_cols) - means = np.zeros(num_cols) - stdvs = np.array([1]*num_cols) - for i in range(num_cols): - data[:, i] = norm(loc=means[i], scale=stdvs[i]).ppf(data[:, i]) - data = np.array(data) - else: - warnings.warn('''Invalid input for sampling_method. - Defaulting to Gaussian sampling.''', UserWarning) - data = self.random_state.normal(0, 1, num_samples * num_cols - ).reshape(num_samples, num_cols) - data = np.array(data) - + data = self.random_state.normal( + 0, 1, num_samples * num_cols).reshape( + num_samples, num_cols) if self.sample_around_instance: data = data * scale + instance_sample else: @@ -643,8 +618,6 @@ def __init__(self, training_data, mode="classification", n_samples, n_timesteps * n_features) self.n_timesteps = n_timesteps self.n_features = n_features - if feature_names is None: - feature_names = ['feature%d' % i for i in range(n_features)] # Update the feature names feature_names = ['{}_t-{}'.format(n, n_timesteps - (i + 1))