Source code for sklvq.objectives._generalized_learning_objective

import numpy as np

from ..objectives._base import ObjectiveBaseClass
from .. import activations, discriminants

from typing import Union
from typing import TYPE_CHECKING

from .._utils import init_class

if TYPE_CHECKING:
    from ..models import LVQBaseClass


ACTIVATION_FUNCTIONS = [
    "identity",
    "sigmoid",
    "soft-plus",
    "swish",
]

DISCRIMINANT_FUNCTIONS = [
    "relative-distance",
]


[docs]class GeneralizedLearningObjective(ObjectiveBaseClass): """Generalized learning objective Class that holds the generalized learning objective function and its gradient as described in `[1]`_. Parameters ---------- activation_type : {"identity", "sigmoid", "soft-plus", "swish"} or type If string needs to be one of the indicated options. If not a string needs to be a custom activation class. See :class:`sklvq.activations.ActivationBaseClass`. activation_params : dict or None The dictionary with the parameters for the activation function or None if it doesn't require any parameters. discriminant_type: {"relative-distance"} or type Can only be the relative distance. If not a string it can be a custom class. See :class:`sklvq.discriminants.DiscriminantBaseClass`. discriminant_params : dict or None The dictionary with the parameters for the discriminant function or None if it doesn't require any parameters. Notes ----- Compatible and used within the following models: :class:`.GLVQ`, :class:`.GMLVQ`, and :class:`.LGMLVQ`. References ---------- _`[1]` Sato, A., and Yamada, K. (1996) "Generalized Learning Vector Quantization." Advances in Neural Network Information Processing Systems, 423–429, 1996.""" def __init__( self, activation_type: Union[str, type], activation_params: dict, discriminant_type: Union[str, type], discriminant_params: dict, ): if activation_params is None: activation_params = {} activation_class = init_class( activations, activation_type, ACTIVATION_FUNCTIONS ) self.activation = activation_class(**activation_params) if discriminant_params is None: discriminant_params = {} discriminant_class = init_class( discriminants, discriminant_type, DISCRIMINANT_FUNCTIONS ) self.discriminant = discriminant_class(**discriminant_params)
[docs] def __call__( self, model: "LVQBaseClass", data: np.ndarray, labels: np.ndarray, ) -> np.ndarray: r"""Computes the generalized learning objective: .. math:: E = \sum_{i=1}^{N} f(\mu(d_0(\mathbf{x}_i), d_1(\mathbf{x}_i)) with :math:`\mu(\cdot)` the discriminative function, :math:`f(\cdot)` the activation function, and :math:`d_0(\mathbf{x}_i)` and :math:`d_1(\mathbf{x}_i)` the shortest distance to a prototype with a different and the same label respectively. Parameters ---------- model : LVQBaseClass The model which can be any LVQBaseClass compatible with this objective function. data: ndarray with shape (n_samples, n_features) The data. labels: ndarray with shape (n_samples) The labels of the samples in the data. Returns ------- float: The cost """ dist_same, dist_diff, _, _ = _compute_distance(data, labels, model) return np.sum(self.activation(self.discriminant(dist_same, dist_diff)))
[docs] def gradient( self, model: "LVQBaseClass", data: np.ndarray, labels: np.ndarray, ) -> np.ndarray: r"""Computes the generalized learning objective's gradient with respect to the prototype with a different label: .. math:: \frac{\partial E}{\partial \mathbf{w}_0} = \frac{\partial f}{\partial \mu} \frac{\partial \mu}{\partial d_0} \frac{\partial d_0}{\partial \mathbf{w}_0} with :math:`\mathbf{w}_0` the prototype with a different label than the data and :math:`d_0` the distance to that prototype. .. math:: \frac{\partial E}{\partial \mathbf{w}_1} = \frac{\partial f}{\partial \mu} \frac{\partial \mu}{\partial d_1} \frac{\partial d_1}{\partial \mathbf{w}_1} with :math:`\mathbf{w}_1` the prototype with the same label as the data and :math:`d_1` the distance to that prototype. Parameters ---------- model : LVQBaseClass The model which can be any LVQBaseClass compatible with this objective function. data: ndarray with shape (n_samples, n_features) The data. labels: ndarray with shape (n_samples) The labels of the samples in the data. Returns ------- ndarray with the same shape as the model variables array (depending on the model) The generalized learning objective function's gradient """ dist_same, dist_diff, i_dist_same, i_dist_diff = _compute_distance( data, labels, model ) discriminant_score = self.discriminant(dist_same, dist_diff) # Pre-allocation, needs to be zero. gradient_buffer = np.zeros(model.get_variables().size) # For each prototype for i_prototype in range(0, model.prototypes_labels_.size): # Find for which samples it is the closest/winner AND has the same label # ii_winner_same = i_prototype == i_dist_same if i_prototype in i_dist_same: ii_winner_same = i_prototype == i_dist_same # Only if these cases exist we can/should compute an update self._partial_gradient( gradient_buffer, discriminant_score[ii_winner_same], dist_same[ii_winner_same], dist_diff[ii_winner_same], True, # Indicating same label data[ii_winner_same, :], model, i_prototype, ) # Find for which samples this prototype is the closest and has a different label if i_prototype in i_dist_diff: ii_winner_diff = i_prototype == i_dist_diff self._partial_gradient( gradient_buffer, discriminant_score[ii_winner_diff], dist_same[ii_winner_diff], dist_diff[ii_winner_diff], False, # Indicating diff label data[ii_winner_diff, :], model, i_prototype, ) return gradient_buffer
def _partial_gradient( self, gradient_buffer, discriminant_score, dist_same, dist_diff, winner_same, data, model, i_prototype, ): # Computes the following partial derivative: df/du activation_gradient = self.activation.gradient(discriminant_score) # Computes the following partial derivatives: du/ddi, with i = 2 discriminant_gradient = self.discriminant.gradient( dist_same, dist_diff, winner_same ) # Computes the following partial derivatives: ddi/dwi, with i = 2 distance_gradient = model._distance.gradient(data, model, i_prototype) # The distance vectors weighted by the activation and discriminant partial # derivatives. model.add_partial_gradient( gradient_buffer, (activation_gradient * discriminant_gradient).dot(distance_gradient), i_prototype, )
def _find_min(indices: np.ndarray, distances: np.ndarray) -> (np.ndarray, np.ndarray): """ Helper function to find the minimum distance and the index of this distance. """ # Set the irrelevant distances to infinity. dist_temp = np.where(indices, distances, np.inf) # Find the indices of the closest prototype (column) i_dist_min = dist_temp.argmin(axis=1) # Return the shortest distances and the indices of the prototypes. return dist_temp[np.arange(i_dist_min.size), i_dist_min], i_dist_min def _compute_distance(data: np.ndarray, labels: np.ndarray, model: "LVQBaseClass"): """Computes the distances between each prototype and each observation and finds all indices where the shortest distance is that of the prototype with the same label and with a different label.""" prototypes_labels = model.prototypes_labels_ # Step 1: Compute distances between X and the model (how is depending on model and coupled # distance function) distances = model._distance(data, model) # Step 2: Find for all samples the distance between closest prototype with same label (d1) # and different label (d2). ii_same marks for all samples the prototype with the same label. num_samples = labels.size num_prototypes = model.prototypes_labels_.size if num_samples == 1: # Faster if num_samples == 1 ii_same = np.atleast_2d(labels == prototypes_labels) elif num_samples < num_prototypes: # Faster to go over the labels if there are less than the prototypes. ii_same = np.array([label == prototypes_labels for label in labels]) else: # List comprehension of the prototypes. This are all slight improvements to computation # time, as list comprehension takes quite some time. ii_same = np.transpose( [labels == prototype_label for prototype_label in prototypes_labels] ) # For each prototype mark the samples that have a different label ii_diff = ~ii_same # For each sample find the closest prototype with the same label. Returns distance and index # of prototype dist_same, i_dist_same = _find_min(ii_same, distances) # For each sample find the closest prototype with a different label. Returns distance and # index of prototype dist_diff, i_dist_diff = _find_min(ii_diff, distances) return dist_same, dist_diff, i_dist_same, i_dist_diff