Source code for distil.active_learning_strategies.badge

from .strategy import Strategy
import numpy as np

import torch
from torch import nn
from scipy import stats

def init_centers(X, K, device):
    pdist = nn.PairwiseDistance(p=2)
    ind = np.argmax([np.linalg.norm(s, 2) for s in X])
    mu = [X[ind]]
    indsAll = [ind]
    centInds = [0.] * len(X)
    cent = 0
    #print('#Samps\tTotal Distance')
    while len(mu) < K:
        if len(mu) == 1:
            D2 = pdist(torch.from_numpy(X).to(device), torch.from_numpy(mu[-1]).to(device))
            D2 = torch.flatten(D2)
            D2 = D2.cpu().numpy().astype(float)
        else:
            newD = pdist(torch.from_numpy(X).to(device), torch.from_numpy(mu[-1]).to(device))
            newD = torch.flatten(newD)
            newD = newD.cpu().numpy().astype(float)
            for i in range(len(X)):
                if D2[i] >  newD[i]:
                    centInds[i] = cent
                    D2[i] = newD[i]

        if sum(D2) == 0.0: pdb.set_trace()
        D2 = D2.ravel().astype(float)
        Ddist = (D2 ** 2)/ sum(D2 ** 2)
        customDist = stats.rv_discrete(name='custm', values=(np.arange(len(D2)), Ddist))
        ind = customDist.rvs(size=1)[0]
        mu.append(X[ind])
        indsAll.append(ind)
        cent += 1
    #gram = np.matmul(X[indsAll], X[indsAll].T)
    #val, _ = np.linalg.eig(gram)
    #val = np.abs(val)
    #vgt = val[val > 1e-2]
    return indsAll

[docs]class BADGE(Strategy):
    """
    This method is based on the paper Deep Batch Active Learning by Diverse, Uncertain Gradient 
    Lower Bounds :footcite:`DBLP-Badge`. According to the paper, this strategy, Batch Active 
    learning by Diverse Gradient Embeddings (BADGE), samples groups of points that are disparate 
    and high magnitude when represented in a hallucinated gradient space, a strategy designed to 
    incorporate both predictive uncertainty and sample diversity into every selected batch. 
    Crucially, BADGE trades off between uncertainty and diversity without requiring any hand-tuned 
    hyperparameters. Here at each round of selection, loss gradients are computed using the 
    hypothesised labels. Then to select the points to be labeled are selected by applying 
    k-means++ on these loss gradients.
    
    Parameters
    ----------
    labeled_dataset: torch.utils.data.Dataset
        The labeled training dataset
    unlabeled_dataset: torch.utils.data.Dataset
        The unlabeled pool dataset
    net: torch.nn.Module
        The deep model to use
    nclasses: int
        Number of unique values for the target
    args: dict
        Specify additional parameters
        
        - **batch_size**: The batch size used internally for torch.utils.data.DataLoader objects. (int, optional)
        - **device**: The device to be used for computation. PyTorch constructs are transferred to this device. Usually is one of 'cuda' or 'cpu'. (string, optional)
        - **loss**: The loss function to be used in computations. (typing.Callable[[torch.Tensor, torch.Tensor], torch.Tensor], optional)
    """
    
    def __init__(self, labeled_dataset, unlabeled_dataset, net, nclasses, args={}): #
        
        super(BADGE, self).__init__(labeled_dataset, unlabeled_dataset, net, nclasses, args)        
        
[docs]    def select(self, budget):
        """
        Selects next set of points
        
        Parameters
        ----------
        budget: int
            Number of data points to select for labeling
            
        Returns
        ----------
        idxs: list
            List of selected data point indices with respect to unlabeled_dataset
        """	

        self.model.eval()
        gradEmbedding = self.get_grad_embedding(self.unlabeled_dataset, True, "linear")
        chosen = init_centers(gradEmbedding.cpu().numpy(), budget, self.device)
        return chosen