Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix incompatible types in assignment #344

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dice_ml/explainer_interfaces/dice_KD.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def vary_valid(self, KD_query_instance, total_CFs, features_to_vary, permitted_r

# TODO: this should be a user-specified parameter
num_queries = min(len(self.dataset_with_predictions), total_CFs * 10)
cfs = []
cfs = pd.DataFrame()

if self.KD_tree is not None and num_queries > 0:
KD_tree_output = self.KD_tree.query(KD_query_instance, num_queries)
Expand Down
13 changes: 9 additions & 4 deletions dice_ml/explainer_interfaces/dice_genetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import copy
import random
import timeit
from typing import Any

import numpy as np
import pandas as pd
Expand All @@ -27,7 +28,7 @@ def __init__(self, data_interface, model_interface):
self.num_output_nodes = None

# variables required to generate CFs - see generate_counterfactuals() for more info
self.cfs = []
self.cfs = pd.DataFrame()
self.features_to_vary = []
self.cf_init_weights = [] # total_CFs, algorithm, features_to_vary
self.loss_weights = [] # yloss_type, diversity_loss_type, feature_weights
Expand Down Expand Up @@ -343,12 +344,16 @@ def _predict_fn_custom(self, input_instance, desired_class):

def compute_yloss(self, cfs, desired_range, desired_class):
"""Computes the first part (y-loss) of the loss function."""
yloss = 0.0
yloss: Any = 0.0
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, multiple types of values are stored.

if self.model.model_type == ModelTypes.Classifier:
predicted_value = np.array(self.predict_fn_scores(cfs))
if self.yloss_type == 'hinge_loss':
maxvalue = np.full((len(predicted_value)), -np.inf)
for c in range(self.num_output_nodes):
if self.num_output_nodes is None:
num_output_nodes = 0
else:
num_output_nodes = self.num_output_nodes
for c in range(num_output_nodes):
if c != desired_class:
maxvalue = np.maximum(maxvalue, predicted_value[:, c])
yloss = np.maximum(0, maxvalue - predicted_value[:, int(desired_class)])
Expand Down Expand Up @@ -429,7 +434,7 @@ def mate(self, k1, k2, features_to_vary, query_instance):
def find_counterfactuals(self, query_instance, desired_range, desired_class,
features_to_vary, maxiterations, thresh, verbose):
"""Finds counterfactuals by generating cfs through the genetic algorithm"""
population = self.cfs.copy()
population: Any = self.cfs.copy()
iterations = 0
previous_best_loss = -np.inf
current_best_loss = np.inf
Expand Down
9 changes: 6 additions & 3 deletions dice_ml/explainer_interfaces/dice_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import copy
import random
import timeit
from typing import Any, Optional, Union

import numpy as np
import torch
Expand Down Expand Up @@ -223,14 +224,16 @@ def do_optimizer_initializations(self, optimizer, learning_rate):
opt_method = optimizer.split(':')[1]

# optimizater initialization
self.optimizer: Optional[Union[torch.optim.Adam, torch.optim.RMSprop]] = None
if opt_method == "adam":
self.optimizer = torch.optim.Adam(self.cfs, lr=learning_rate)
elif opt_method == "rmsprop":
self.optimizer = torch.optim.RMSprop(self.cfs, lr=learning_rate)

def compute_yloss(self):
"""Computes the first part (y-loss) of the loss function."""
yloss = 0.0
yloss: Any = 0.0
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, multiple types of values are stored.

criterion: Optional[Union[torch.nn.BCEWithLogitsLoss, torch.nn.ReLU]] = None
for i in range(self.total_CFs):
if self.yloss_type == "l2_loss":
temp_loss = torch.pow((self.get_model_output(self.cfs[i]) - self.target_cf_class), 2)[0]
Expand Down Expand Up @@ -307,7 +310,7 @@ def compute_diversity_loss(self):
def compute_regularization_loss(self):
"""Adds a linear equality constraints to the loss functions -
to ensure all levels of a categorical variable sums to one"""
regularization_loss = 0.0
regularization_loss: Any = 0.0
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, multiple types of values are stored.

for i in range(self.total_CFs):
for v in self.encoded_categorical_feature_indexes:
regularization_loss += torch.pow((torch.sum(self.cfs[i][v[0]:v[-1]+1]) - 1.0), 2)
Expand Down Expand Up @@ -425,7 +428,7 @@ def find_counterfactuals(self, query_instance, desired_class, optimizer, learnin
test_pred = self.predict_fn(torch.tensor(query_instance).float())[0]
if desired_class == "opposite":
desired_class = 1.0 - np.round(test_pred)
self.target_cf_class = torch.tensor(desired_class).float()
self.target_cf_class: Any = torch.tensor(desired_class).float()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, multiple types of values are stored.


self.min_iter = min_iter
self.max_iter = max_iter
Expand Down
7 changes: 3 additions & 4 deletions dice_ml/explainer_interfaces/dice_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@ def __init__(self, data_interface, model_interface):
self.model.transformer.initialize_transform_func()

self.precisions = self.data_interface.get_decimal_precisions(output_type="dict")
if self.data_interface.outcome_name in self.precisions:
self.outcome_precision = [self.precisions[self.data_interface.outcome_name]]
else:
self.outcome_precision = 0
self.outcome_precision = [
self.precisions[self.data_interface.outcome_name]
] if self.data_interface.outcome_name in self.precisions else 0

def _generate_counterfactuals(self, query_instance, total_CFs, desired_range=None,
desired_class="opposite", permitted_range=None,
Expand Down
3 changes: 1 addition & 2 deletions dice_ml/explainer_interfaces/dice_tensorflow2.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,7 @@ def initialize_CFs(self, query_instance, init_near_query_instance=False):
one_init.append(np.random.uniform(self.minx[0][i], self.maxx[0][i]))
else:
one_init.append(query_instance[0][i])
one_init = np.array([one_init], dtype=np.float32)
self.cfs[n].assign(one_init)
self.cfs[n].assign(np.array([one_init], dtype=np.float32))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Substitute directly. This prevents one_init from being of more than one type.


def round_off_cfs(self, assign=False):
"""function for intermediate projection of CFs."""
Expand Down
26 changes: 16 additions & 10 deletions dice_ml/explainer_interfaces/explainer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pickle
from abc import ABC, abstractmethod
from collections.abc import Iterable
from typing import Any, Dict, Optional, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -152,10 +153,9 @@ def generate_counterfactuals(self, query_instances, total_CFs,
cf_examples_arr = []
query_instances_list = []
if isinstance(query_instances, pd.DataFrame):
for ix in range(query_instances.shape[0]):
query_instances_list.append(query_instances[ix:(ix+1)])
query_instances_list = [query_instances[ix:(ix+1)] for ix in range(query_instances.shape[0])]
elif isinstance(query_instances, Iterable):
query_instances_list = query_instances
query_instances_list = [query_instance for query_instance in query_instances]
for query_instance in tqdm(query_instances_list):
self.data_interface.set_continuous_feature_indexes(query_instance)
res = self._generate_counterfactuals(
Expand Down Expand Up @@ -416,7 +416,7 @@ def feature_importance(self, query_instances, cf_examples_list=None,
posthoc_sparsity_algorithm=posthoc_sparsity_algorithm,
**kwargs).cf_examples_list
allcols = self.data_interface.categorical_feature_names + self.data_interface.continuous_feature_names
summary_importance = None
summary_importance: Optional[Union[Dict[int, float]]] = None
local_importances = None
if global_importance:
summary_importance = {}
Expand Down Expand Up @@ -731,13 +731,16 @@ def is_cf_valid(self, model_score):
model_score = model_score[0]
# Converting target_cf_class to a scalar (tf/torch have it as (1,1) shape)
if self.model.model_type == ModelTypes.Classifier:
target_cf_class = self.target_cf_class
if hasattr(self.target_cf_class, "shape"):
if len(self.target_cf_class.shape) == 1:
target_cf_class = self.target_cf_class[0]
temp_target_cf_class = self.target_cf_class[0]
elif len(self.target_cf_class.shape) == 2:
target_cf_class = self.target_cf_class[0][0]
target_cf_class = int(target_cf_class)
temp_target_cf_class = self.target_cf_class[0][0]
else:
temp_target_cf_class = int(self.target_cf_class)
else:
temp_target_cf_class = int(self.target_cf_class)
target_cf_class = temp_target_cf_class

if len(model_score) == 1: # for tensorflow/pytorch models
pred_1 = model_score[0]
Expand All @@ -757,6 +760,7 @@ def is_cf_valid(self, model_score):
return self.target_cf_range[0] <= model_score and model_score <= self.target_cf_range[1]

def get_model_output_from_scores(self, model_scores):
output_type: Any = None
if self.model.model_type == ModelTypes.Classifier:
output_type = np.int32
else:
Expand Down Expand Up @@ -806,17 +810,19 @@ def build_KD_tree(self, data_df_copy, desired_range, desired_class, predicted_ou
data_df_copy[predicted_outcome_name] = predictions

# segmenting the dataset according to outcome
dataset_with_predictions = None
if self.model.model_type == ModelTypes.Classifier:
dataset_with_predictions = data_df_copy.loc[[i == desired_class for i in predictions]].copy()

elif self.model.model_type == ModelTypes.Regressor:
dataset_with_predictions = data_df_copy.loc[
[desired_range[0] <= pred <= desired_range[1] for pred in predictions]].copy()

else:
dataset_with_predictions = None

KD_tree = None
# Prepares the KD trees for DiCE
if len(dataset_with_predictions) > 0:
if dataset_with_predictions is not None and len(dataset_with_predictions) > 0:
dummies = pd.get_dummies(dataset_with_predictions[self.data_interface.feature_names])
KD_tree = KDTree(dummies)

Expand Down
8 changes: 4 additions & 4 deletions dice_ml/explainer_interfaces/feasible_base_vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,9 @@ def train(self, pre_trained=False):
train_loss = 0.0
train_size = 0

train_dataset = torch.tensor(self.vae_train_feat).float()
train_dataset = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
train_dataset = torch.utils.data.DataLoader(
torch.tensor(self.vae_train_feat).float(), # type: ignore
batch_size=self.batch_size, shuffle=True)
for train in enumerate(train_dataset):
self.cf_vae_optimizer.zero_grad()

Expand Down Expand Up @@ -178,8 +179,7 @@ def generate_counterfactuals(self, query_instance, total_CFs, desired_class="opp
final_cf_pred = []
final_test_pred = []
for i in range(len(query_instance)):
train_x = test_dataset[i]
train_x = torch.tensor(train_x).float()
train_x = torch.tensor(test_dataset[i]).float()
train_y = torch.argmax(self.pred_model(train_x), dim=1)

curr_gen_cf = []
Expand Down
5 changes: 3 additions & 2 deletions dice_ml/explainer_interfaces/feasible_model_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ def train(self, constraint_type, constraint_variables, constraint_direction, con
train_loss = 0.0
train_size = 0

train_dataset = torch.tensor(self.vae_train_feat).float()
train_dataset = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
train_dataset = torch.utils.data.DataLoader(
torch.tensor(self.vae_train_feat).float(), # type: ignore
batch_size=self.batch_size, shuffle=True)
for train in enumerate(train_dataset):
self.cf_vae_optimizer.zero_grad()

Expand Down