Source code for sklvq.objectives._generalized_learning_objective

import numpy as np

from ..objectives._base import ObjectiveBaseClass
from .. import activations, discriminants

from typing import Union
from typing import TYPE_CHECKING

from .._utils import init_class

if TYPE_CHECKING:
    from ..models import LVQBaseClass


ACTIVATION_FUNCTIONS = [
    "identity",
    "sigmoid",
    "soft-plus",
    "swish",
]

DISCRIMINANT_FUNCTIONS = [
    "relative-distance",
]


[docs]class GeneralizedLearningObjective(ObjectiveBaseClass):
    """Generalized learning objective

    Class that holds the generalized learning objective function and its gradient as described
    in `[1]`_.

    Parameters
    ----------
    activation_type : {"identity", "sigmoid", "soft-plus", "swish"} or type
        If string needs to be one of the indicated options. If not a string needs to be a custom
        activation class. See :class:`sklvq.activations.ActivationBaseClass`.
    activation_params : dict or None
        The dictionary with the parameters for the activation function or None if it doesn't
        require any parameters.
    discriminant_type: {"relative-distance"} or type
        Can only be the relative distance. If not a string it can be a custom class.
        See :class:`sklvq.discriminants.DiscriminantBaseClass`.
    discriminant_params : dict or None
        The dictionary with the parameters for the discriminant function or None if it doesn't
        require any parameters.

    Notes
    -----
    Compatible and used within the following models: :class:`.GLVQ`, :class:`.GMLVQ`,
    and :class:`.LGMLVQ`.

    References
    ----------
    _`[1]` Sato, A., and Yamada, K. (1996) "Generalized Learning Vector Quantization."
    Advances in Neural Network Information Processing Systems, 423–429, 1996."""

    def __init__(
        self,
        activation_type: Union[str, type],
        activation_params: dict,
        discriminant_type: Union[str, type],
        discriminant_params: dict,
    ):
        if activation_params is None:
            activation_params = {}

        activation_class = init_class(
            activations, activation_type, ACTIVATION_FUNCTIONS
        )
        self.activation = activation_class(**activation_params)

        if discriminant_params is None:
            discriminant_params = {}

        discriminant_class = init_class(
            discriminants, discriminant_type, DISCRIMINANT_FUNCTIONS
        )
        self.discriminant = discriminant_class(**discriminant_params)

[docs]    def __call__(
        self,
        model: "LVQBaseClass",
        data: np.ndarray,
        labels: np.ndarray,
    ) -> np.ndarray:
        r"""Computes the generalized learning objective:

            .. math::

                E = \sum_{i=1}^{N} f(\mu(d_0(\mathbf{x}_i), d_1(\mathbf{x}_i))

        with :math:`\mu(\cdot)` the discriminative function, :math:`f(\cdot)` the activation
        function, and :math:`d_0(\mathbf{x}_i)` and :math:`d_1(\mathbf{x}_i)` the shortest
        distance to a prototype  with  a different and the same label respectively.

        Parameters
        ----------
        model : LVQBaseClass
            The model which can be any LVQBaseClass compatible with this objective function.

        data: ndarray with shape (n_samples, n_features)
            The data.

        labels: ndarray with shape (n_samples)
            The labels of the samples in the data.

        Returns
        -------
        float:
            The cost
        """
        dist_same, dist_diff, _, _ = _compute_distance(data, labels, model)

        return np.sum(self.activation(self.discriminant(dist_same, dist_diff)))

[docs]    def gradient(
        self,
        model: "LVQBaseClass",
        data: np.ndarray,
        labels: np.ndarray,
    ) -> np.ndarray:
        r"""Computes the generalized learning objective's gradient with respect to the
        prototype with a different label:

            .. math::
                \frac{\partial E}{\partial \mathbf{w}_0} = \frac{\partial f}{\partial \mu}
                \frac{\partial \mu}{\partial d_0} \frac{\partial d_0}{\partial \mathbf{w}_0}

        with :math:`\mathbf{w}_0` the prototype with a different label than the data and :math:`d_0`
        the distance to that prototype.

            .. math::
                 \frac{\partial E}{\partial \mathbf{w}_1} = \frac{\partial f}{\partial \mu}
                 \frac{\partial \mu}{\partial d_1} \frac{\partial d_1}{\partial \mathbf{w}_1}

        with :math:`\mathbf{w}_1` the prototype with the same label as the data and :math:`d_1`
        the distance to that prototype.

        Parameters
        ----------
        model : LVQBaseClass
            The model which can be any LVQBaseClass compatible with this objective function.

        data: ndarray with shape (n_samples, n_features)
            The data.

        labels: ndarray with shape (n_samples)
            The labels of the samples in the data.

        Returns
        -------
        ndarray with the same shape as the model variables array (depending on the model)
            The generalized learning objective function's gradient

        """

        dist_same, dist_diff, i_dist_same, i_dist_diff = _compute_distance(
            data, labels, model
        )
        discriminant_score = self.discriminant(dist_same, dist_diff)

        # Pre-allocation, needs to be zero.
        gradient_buffer = np.zeros(model.get_variables().size)

        # For each prototype
        for i_prototype in range(0, model.prototypes_labels_.size):
            # Find for which samples it is the closest/winner AND has the same label
            # ii_winner_same = i_prototype == i_dist_same
            if i_prototype in i_dist_same:
                ii_winner_same = i_prototype == i_dist_same
                # Only if these cases exist we can/should compute an update
                self._partial_gradient(
                    gradient_buffer,
                    discriminant_score[ii_winner_same],
                    dist_same[ii_winner_same],
                    dist_diff[ii_winner_same],
                    True,  # Indicating same label
                    data[ii_winner_same, :],
                    model,
                    i_prototype,
                )

            # Find for which samples this prototype is the closest and has a different label
            if i_prototype in i_dist_diff:
                ii_winner_diff = i_prototype == i_dist_diff
                self._partial_gradient(
                    gradient_buffer,
                    discriminant_score[ii_winner_diff],
                    dist_same[ii_winner_diff],
                    dist_diff[ii_winner_diff],
                    False,  # Indicating diff label
                    data[ii_winner_diff, :],
                    model,
                    i_prototype,
                )

        return gradient_buffer

    def _partial_gradient(
        self,
        gradient_buffer,
        discriminant_score,
        dist_same,
        dist_diff,
        winner_same,
        data,
        model,
        i_prototype,
    ):
        # Computes the following partial derivative: df/du
        activation_gradient = self.activation.gradient(discriminant_score)

        #  Computes the following partial derivatives: du/ddi, with i = 2
        discriminant_gradient = self.discriminant.gradient(
            dist_same, dist_diff, winner_same
        )

        # Computes the following partial derivatives: ddi/dwi, with i = 2
        distance_gradient = model._distance.gradient(data, model, i_prototype)

        # The distance vectors weighted by the activation and discriminant partial
        # derivatives.
        model.add_partial_gradient(
            gradient_buffer,
            (activation_gradient * discriminant_gradient).dot(distance_gradient),
            i_prototype,
        )


def _find_min(indices: np.ndarray, distances: np.ndarray) -> (np.ndarray, np.ndarray):
    """ Helper function to find the minimum distance and the index of this distance. """
    # Set the irrelevant distances to infinity.
    dist_temp = np.where(indices, distances, np.inf)
    # Find the indices of the closest prototype (column)
    i_dist_min = dist_temp.argmin(axis=1)
    # Return the shortest distances and the indices of the prototypes.
    return dist_temp[np.arange(i_dist_min.size), i_dist_min], i_dist_min


def _compute_distance(data: np.ndarray, labels: np.ndarray, model: "LVQBaseClass"):
    """Computes the distances between each prototype and each observation and finds all indices
    where the shortest distance is that of the prototype with the same label and with a different label."""
    prototypes_labels = model.prototypes_labels_

    # Step 1: Compute distances between X and the model (how is depending on model and coupled
    # distance function)
    distances = model._distance(data, model)

    # Step 2: Find for all samples the distance between closest prototype with same label (d1)
    # and different label (d2). ii_same marks for all samples the prototype with the same label.

    num_samples = labels.size
    num_prototypes = model.prototypes_labels_.size
    if num_samples == 1:
        # Faster if num_samples == 1
        ii_same = np.atleast_2d(labels == prototypes_labels)
    elif num_samples < num_prototypes:
        # Faster to go over the labels if there are less than the prototypes.
        ii_same = np.array([label == prototypes_labels for label in labels])
    else:
        # List comprehension of the prototypes. This are all slight improvements to computation
        # time, as list comprehension takes quite some time.
        ii_same = np.transpose(
            [labels == prototype_label for prototype_label in prototypes_labels]
        )

    # For each prototype mark the samples that have a different label
    ii_diff = ~ii_same

    # For each sample find the closest prototype with the same label. Returns distance and index
    # of prototype
    dist_same, i_dist_same = _find_min(ii_same, distances)

    # For each sample find the closest prototype with a different label. Returns distance and
    # index of prototype
    dist_diff, i_dist_diff = _find_min(ii_diff, distances)

    return dist_same, dist_diff, i_dist_same, i_dist_diff