Source code for distil.active_learning_strategies.bayesian_active_learning_disagreement_dropout

import torch
from .strategy import Strategy

[docs]class BALDDropout(Strategy):
    """
    
    Implements Bayesian Active Learning by Disagreement (BALD) Strategy :footcite:`houlsby2011bayesian`,
    which assumes a Basiyan setting and selects points which maximise the mutual information 
    between the predicted labels and model parameters. This implementation is an adaptation for a 
    non-bayesian setting, with the assumption that there is a dropout layer in the model. 

    Parameters
    ----------
    X: numpy array
        Present training/labeled data   
    y: numpy array
        Labels of present training data
    unlabeled_x: numpy array
        Data without labels
    net: class
        Pytorch Model class
    handler: class
        Data Handler, which can load data even without labels.
    nclasses: int
        Number of unique target variables
    args: dict
        Specify optional parameters
        
        batch_size 
        Batch size to be used inside strategy class (int, optional)

        n_drop
        Dropout value to be used (int, optional)
    """
    def __init__(self, X, Y, unlabeled_x, net, handler, nclasses, args={}):
        
        """
        Constructor method
        """
        if 'n_drop' in args:
            self.n_drop = args['n_drop']
        else:
            self.n_drop = 10
        super(BALDDropout, self).__init__(X, Y, unlabeled_x, net, handler, nclasses, args={})

[docs]    def select(self, budget):

        """
        Select next set of points

        Parameters
        ----------
        budget: int
            Number of indexes to be returned for next set

        Returns
        ----------
        idxs: list
            List of selected data point indexes with respect to unlabeled_x
        """	
		
        probs = self.predict_prob_dropout_split(self.unlabeled_x, self.n_drop)
        pb = probs.mean(0)
        entropy1 = (-pb*torch.log(pb)).sum(1)
        entropy2 = (-probs*torch.log(probs)).sum(2).mean(0)
        U = entropy2 - entropy1
        idxs = U.sort()[1][:budget]

        return idxs