PyTorch

Computing a counterfactual of a PyTorch model is done by using the ceml.torch.counterfactual.generate_counterfactual() function.

We must provide the PyTorch model within a class that is derived from torch.nn.Module and ceml.model.model.ModelWithLoss. In this class, we must overwrite the predict function and the get_loss function which returns a loss that we want to use - a couple of differentiable loss functions are implemented in ceml.backend.torch.costfunctions.

Besides the model, we must specify the input whose prediction we want to explain and the desired target prediction (prediction of the counterfactual). In addition we can restrict the features that can be used for computing a counterfactual, specify a regularization of the counterfactual and specifying the optimization algorithm used for computing a counterfactual.

A complete example of a softmax regression model using the negative-log-likelihood is given below:

 1#!/usr/bin/env python3
 2# -*- coding: utf-8 -*-
 3import torch
 4torch.manual_seed(424242)
 5import numpy as np
 6from sklearn.datasets import load_iris
 7from sklearn.model_selection import train_test_split
 8from sklearn.metrics import accuracy_score
 9
10from ceml.torch import generate_counterfactual
11from ceml.backend.torch.costfunctions import NegLogLikelihoodCost
12from ceml.model import ModelWithLoss
13
14
15# Neural network - Softmax regression
16class Model(torch.nn.Module, ModelWithLoss):
17    def __init__(self, input_size, num_classes):
18        super(Model, self).__init__()
19
20        self.linear = torch.nn.Linear(input_size, num_classes)
21        self.softmax = torch.nn.Softmax(dim=0)
22    
23    def forward(self, x):
24        return self.linear(x)   # NOTE: Softmax is build into CrossEntropyLoss
25    
26    def predict_proba(self, x):
27        return self.softmax(self.forward(x))
28    
29    def predict(self, x, dim=1):
30        return torch.argmax(self.forward(x), dim=dim)
31    
32    def get_loss(self, y_target, pred=None):
33        return NegLogLikelihoodCost(input_to_output=self.predict_proba, y_target=y_target)
34
35
36if __name__ == "__main__":
37    # Load data
38    X, y = load_iris(return_X_y=True)
39    X = X.astype(np.dtype(np.float32))
40
41    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
42
43    # numpy -> torch tensor
44    x = torch.from_numpy(X_train)
45    labels = torch.from_numpy(y_train)
46
47    x_test = torch.from_numpy(X_test)
48    y_test = torch.from_numpy(y_test)
49
50    # Create and fit model
51    model = Model(4, 3)
52
53    learning_rate = 0.001
54    momentum = 0.9
55    criterion = torch.nn.CrossEntropyLoss()  
56    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)  
57
58    num_epochs = 800
59    for epoch in range(num_epochs):
60        optimizer.zero_grad()
61        outputs = model(x)
62        loss = criterion(outputs, labels)
63        loss.backward()
64        optimizer.step()
65
66    # Evaluation
67    y_pred = model.predict(x_test).detach().numpy()
68    print("Accuracy: {0}".format(accuracy_score(y_test, y_pred)))
69
70    # Select a data point whose prediction has to be explained
71    x_orig = X_test[1,:]
72    print("Prediction on x: {0}".format(model.predict(torch.from_numpy(np.array([x_orig])))))
73
74    # Whitelist of features we can use/change when computing the counterfactual
75    features_whitelist = [0, 2] # Use the first and third feature only
76
77    # Compute counterfactual
78    print("\nCompute counterfactual ....")
79    print(generate_counterfactual(model, x_orig, y_target=0, features_whitelist=features_whitelist, regularization="l1", C=0.1, optimizer="nelder-mead"))