Source code for sklvq.distances._euclidean

import numpy as np

from scipy.spatial.distance import cdist

from ._base import DistanceBaseClass

from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from ..models import GLVQ


[docs]class Euclidean(DistanceBaseClass): """Euclidean distance function Class that holds the euclidean distance function and its gradient. Parameters ---------- force_all_finite : {True, False, "allow-nan"} Parameter to indicate that NaNLVQ distance variant should be used. If true no nans are allowed. If False or "allow-nan" nans are allowed. See also -------- SquaredEuclidean, AdaptiveSquaredEuclidean, LocalAdaptiveSquaredEuclidean Notes ----- Compatible with the :class:`.GLVQ` algorithm (only). """ __slots__ = ()
[docs] def __call__(self, data: np.ndarray, model: "GLVQ") -> np.ndarray: r"""Computes the Euclidean distance: .. math:: d(\mathbf{w}, \mathbf{x}) = \sqrt{(\mathbf{x} - \mathbf{w})^{\top} (\mathbf{x} - \mathbf{w})}, with :math:`\mathbf{w}` a prototype and :math:`\mathbf{x}` a sample. Parameters ---------- data : ndarray with shape (n_samples, n_features) The data for which the distances to the prototypes of the model need to be computed. model : GLVQ A GLVQ model instance, containing the prototypes. Returns ------- ndarray with shape (n_samples, n_prototypes) Evaluation of the distance between each sample and prototype of the model. """ distance_function = "euclidean" if model.force_all_finite == "allow-nan": distance_function = _nan_euclidean return cdist(data, model.prototypes_, distance_function)
[docs] def gradient(self, data: np.ndarray, model: "GLVQ", i_prototype: int) -> np.ndarray: r"""Computes the gradient of the euclidean distance with respect to a single prototype: .. math:: \frac{\partial d}{\partial \mathbf{w}_i} = -2 \cdot (\mathbf{x} - \mathbf{w}_i) Parameters ---------- data : ndarray with shape (n_samples, n_features) The data for which the distance gradient to the prototypes of the model need to be computed. model : GLVQ A GLVQ model instance. i_prototype : int Index of the prototype to compute the gradient for. Returns ------- ndarray with shape (n_samples, n_features) The gradient of the prototype with respect to every sample in the data. """ prototype = model.get_model_params()[i_prototype, :] difference = data - prototype if model.force_all_finite == "allow-nan" or False: difference[np.isnan(difference)] = 0.0 # Euclidean distance to single prototype. Equal to: np.sqrt(np.sum((data - prototype)**2)) denominator = np.sqrt(np.einsum("ij, ij -> i", difference, difference)) # Might happen if a sample is exactly on a prototype, mostly caused by nans in the data. denominator[denominator == 0.0] = 1.0 distance_gradient = -1 * difference / denominator[:, np.newaxis] return distance_gradient
def _nan_euclidean(u: np.ndarray, v: np.ndarray) -> np.ndarray: # Euclidean distance between two vectors u and v, ignoring nans. return np.sqrt(np.nansum(u - v) ** 2)