Source code for viqa.utils.misc

"""Module for miscellaneous utility functions."""

# Authors
# -------
# Author: Lukas Behammer
# Research Center Wels
# University of Applied Sciences Upper Austria, 2023
# CT Research Group
#
# Modifications
# -------------
# Original code, 2024, Lukas Behammer
# Add _get_binary and find_largest_region, 2024, Michael Stidi
# Update _get_binary and find_largest_region, 2024, Lukas Behammer
#
# License
# -------
# BSD-3-Clause License

import math

import numpy as np
import scipy.fft as fft
import scipy.ndimage as ndi
import skimage as ski
import torch
from scipy.ndimage import distance_transform_edt

from .visualization import visualize_2d, visualize_3d


def _to_grayscale(img):
    """Convert an image to grayscale."""
    img = _to_float(img)
    img_gray = ski.color.rgb2gray(img)
    img_gray[img_gray > 255] = 255
    return img_gray


def _rgb_to_yuv(img):
    """Convert an RGB image to YUV."""
    weights = np.array(
        [
            [0.2126, 0.7152, 0.0722],
            [-0.09991, -0.33609, 0.436],
            [0.615, -0.55861, -0.05639],
        ]
    )
    img_yuv = img @ weights
    return img_yuv


def _to_float(img, dtype=np.float64):
    """Convert a numpy array to float."""
    match img.dtype:
        case np.float32 | np.float64:
            return img
        case _:
            return img.astype(dtype)



[docs]
def correlate_convolve_abs(
    img, kernel, mode="correlate", border_mode="constant", value=0
):
    """Correlates or convolves a numpy array with a kernel.

    Parameters
    ----------
    img : np.ndarray
        Input image
    kernel : np.ndarray
        Kernel
    mode : str, default='correlate'
        'correlate' or 'convolve'
    border_mode : str, default='constant'
        'constant', 'reflect', 'nearest', 'mirror' or 'wrap'

        .. seealso::
            See NumPy documentation for :py:func:`numpy.pad`.

    value : int, optional, default=0
        Value for constant border mode

    Returns
    -------
    res : np.ndarray
        Convolved result as numpy array

    Raises
    ------
    ValueError
        If ``border_mode`` is not supported \n
        If number of dimensions is not supported

    See Also
    --------
    :py:func:`scipy.signal.correlate`
    :py:func:`scipy.signal.convolve`

    Notes
    -----
    Correlates or convolves a numpy array with a kernel in the form
    :math:`\\operatorname{mean}(\\lvert \\pmb{I} \\cdot \\mathcal{K} \\rvert)` with
    :math:`\\pmb{I}` denoting the image and :math:`\\mathcal{K}` denoting the Kernel.
    Works in 2D and 3D.

    Examples
    --------
    >>> import numpy as np
    >>> from viqa import kernels
    >>> img = np.random.rand(128, 128)
    >>> kernel = kernels.sobel_kernel_2d_x()
    >>> res = correlate_convolve_abs(
    ...         img,
    ...         kernel,
    ...         mode="correlate",
    ...         border_mode="constant",
    ...         value=0
    ... )
    """
    if mode == "convolve":  # If mode is convolve
        kernel = np.flip(kernel)  # Flip kernel

    kernel_size = kernel.shape[0]  # Get kernel size
    ndim = len(img.shape)  # Get number of dimensions

    # Pad image
    match border_mode:
        case "constant":
            origin = np.pad(img, kernel_size, mode="constant", constant_values=value)
        case "reflect":
            origin = np.pad(img, kernel_size, mode="reflect")
        case "nearest":
            origin = np.pad(img, kernel_size, mode="edge")
        case "mirror":
            origin = np.pad(img, kernel_size, mode="symmetric")
        case "wrap":
            origin = np.pad(img, kernel_size, mode="wrap")
        case _:
            raise ValueError("Border mode not supported")

    # Correlate or convolve
    res = np.zeros(img.shape)  # Initialize result array
    for k in range(0, img.shape[0]):
        for m in range(0, img.shape[1]):
            # Check if 2D or 3D
            if ndim == 3:
                for n in range(0, img.shape[2]):
                    res[k, m, n] = np.mean(
                        abs(
                            kernel
                            * origin[
                                k : k + kernel_size,
                                m : m + kernel_size,
                                n : n + kernel_size,
                            ]
                        )
                    )
            elif ndim == 2:
                res[k, m] = np.mean(
                    abs(kernel * origin[k : k + kernel_size, m : m + kernel_size])
                )
            else:
                raise ValueError("Number of dimensions not supported")

    return res



def _extract_blocks(img, block_size, stride):
    """Extract blocks from an image.

    Parameters
    ----------
    img : np.ndarray
        Input image
    block_size : int
        Size of the block
    stride : int
        Stride

    Returns
    -------
    np.ndarray
        Numpy array of blocks
    """
    boxes = []
    m, n = img.shape
    for i in range(0, m - (block_size - 1), stride):
        for j in range(0, n - (block_size - 1), stride):
            boxes.append(img[i : i + block_size, j : j + block_size])
            # yield(img[i:i+block_size, j:j+block_size])  # TODO: change to generator
    return np.array(boxes)


def _fft(img):
    """Wrap scipy fft."""
    return fft.fftshift(fft.fftn(img))


def _ifft(fourier_img):
    """Wrap scipy ifft."""
    return fft.ifftn(fft.ifftshift(fourier_img))


def _is_even(num):
    """Check if a number is even."""
    return num % 2 == 0



[docs]
def gabor_convolve(
    img,
    scales_num: int,
    orientations_num: int,
    min_wavelength=3,
    wavelength_scaling=3,
    bandwidth_param=0.55,
    d_theta_on_sigma=1.5,
):
    """Compute Log Gabor filter responses.

    Parameters
    ----------
    img : np.ndarray
        Image to be filtered
    scales_num : int
        Number of wavelet scales
    orientations_num : int
        Number of filter orientations
    min_wavelength : int, default=3
        Wavelength of smallest scale filter, maximum frequency is set by this value,
        should be >= 3
    wavelength_scaling : int, default=3
        Scaling factor between successive filters
    bandwidth_param : float, default=0.55
        Ratio of standard deviation of the Gaussian describing log Gabor filter's
        transfer function in the frequency domain to the filter's center frequency
        (0.74 for 1 octave bandwidth, 0.55 for 2 octave bandwidth, 0.41 for 3 octave
        bandwidth)
    d_theta_on_sigma : float, default=1.5
        Ratio of angular interval between filter orientations and standard deviation of
        angular Gaussian spreading function, a value of 1.5 results in approximately
        the minimum overlap needed to get even spectral coverage

    Returns
    -------
    np.ndarray
        Log Gabor filtered image

    Notes
    -----
    Even spectral coverage and independence of filter output are dependent on
    bandwidth_param vs wavelength_scaling.
    Some experimental values: \n
    0.85 <--> 1.3 \n
    0.74 <--> 1.6 (1 octave bandwidth) \n
    0.65 <--> 2.1 \n
    0.55 <--> 3.0 (2 octave bandwidth) \n
    Additionally d_theta_on_sigma should be set to 1.5 for approximately the minimum
    overlap needed to get even spectral coverage.

    For more information see [1]_. This code was originally written in Matlab by Peter
    Kovesi and adapted by Eric Larson. The adaption by Eric Larson is available under
    [2]_.

    References
    ----------
    .. [1] Kovesi, Peter.
        https://www.peterkovesi.com/matlabfns/PhaseCongruency/Docs/convexpl.html
    .. [2] Larson, E. C. (2008). http://vision.eng.shizuoka.ac.jp/mad
        (version 2011_10_07)
    .. [3] Field, D. J. (1987). Relations between the statistics of natural images and
        the response properties of cortical cells. Journal of The Optical Society of
        America A, 4(12), 2379–2394. https://doi.org/10.1364/JOSAA.4.002379

    Examples
    --------
    >>> import numpy as np
    >>> from viqa.utils import gabor_convolve
    >>> img = np.random.rand(128, 128)
    >>> res = gabor_convolve(img, scales_num=3, orientations_num=4)
    """
    # Authors
    # -------
    # Author: Peter Kovesi
    # Department of Computer Science & Software Engineering
    # The University of Western Australia
    # pk@cs.uwa.edu.au  https://peterkovesi.com/projects/
    #
    # Adaption: Eric Larson
    # Department of Electrical and Computer Engineering
    # Oklahoma State University, 2008
    # University Of Washington Seattle, 2009
    # Image Coding and Analysis lab
    #
    # Translation: Lukas Behammer
    # Research Center Wels
    # University of Applied Sciences Upper Austria
    # CT Research Group
    #
    # MODIFICATIONS
    # -------------
    # Original code, May 2001, Peter Kovesi
    # Altered, 2008, Eric Larson
    # Altered precomputations, 2011, Eric Larson
    # Translated to Python, 2024, Lukas Behammer
    #
    # LICENSE
    # -------
    # Copyright (c) 2001-2010 Peter Kovesi
    # www.peterkovesi.com
    #
    # Permission is hereby granted, free of charge, to any person obtaining a copy
    # of this software and associated documentation files (the "Software"), to deal
    # in the Software without restriction, subject to the following conditions:
    #
    # The above copyright notice and this permission notice shall be included in
    # all copies or substantial portions of the Software.
    #
    # The Software is provided "as is", without warranty of any kind.

    # Precomputing and assigning variables
    scales = np.arange(0, scales_num)
    orientations = np.arange(0, orientations_num)
    rows, cols = img.shape  # image dimensions
    # center of image
    col_c = math.floor(cols / 2)
    row_c = math.floor(rows / 2)

    # set up filter wavelengths from scales
    wavelengths = [
        min_wavelength * wavelength_scaling**scale_n for scale_n in range(0, scales_num)
    ]

    # convert image to frequency domain
    im_fft = fft.fftn(img)

    # compute matrices of same size as im with values ranging from -0.5 to 0.5 (-1.0 to
    # 1.0) for horizontal and vertical
    #   directions each
    def _get_range(cols_rows):
        if _is_even(cols_rows):
            range_ = np.linspace(-cols_rows / 2, (cols_rows - 2) / 2, cols_rows) / (
                cols_rows / 2
            )
        else:
            range_ = np.linspace(-cols_rows / 2, cols_rows / 2, cols_rows) / (
                cols_rows / 2
            )
        return range_

    x_range = _get_range(cols)
    y_range = _get_range(rows)
    x, y = np.meshgrid(x_range, y_range)

    # filters have radial component (frequency band) and an angular component
    # (orientation), those are multiplied to get the final filter

    # compute radial distance from center of matrix
    radius = np.sqrt(x**2 + y**2)
    radius[radius == 0] = 1  # avoid logarithm of zero

    # compute polar angle and its sine and cosine
    theta = np.arctan2(-y, x)
    sin_theta = np.sin(theta)
    cos_theta = np.cos(theta)

    # compute standard deviation of angular Gaussian function
    theta_sigma = np.pi / orientations_num / d_theta_on_sigma

    # compute radial component
    radial_components = []
    for scale_n, _scale in enumerate(scales):  # for each scale
        center_freq = 1.0 / wavelengths[scale_n]  # center frequency of filter
        normalised_center_freq = center_freq / 0.5
        # log Gabor response for each frequency band (scale)
        log_gabor = np.exp(
            (np.log(radius) - np.log(normalised_center_freq)) ** 2
            / -(2 * np.log(bandwidth_param) ** 2)
        )
        log_gabor[row_c, col_c] = 0
        radial_components.append(log_gabor)

    # angular component and final filtering
    res = np.empty(
        (scales_num, orientations_num), dtype=object
    )  # precompute result array
    for orientation_n, _orientation in enumerate(orientations):  # for each orientation
        # compute angular component
        # Pre-compute filter data specific to this orientation
        # For each point in the filter matrix calculate the angular distance from the
        # specified filter orientation. To overcome the angular wrap-around problem
        # sine difference and cosine difference values are first computed and then
        # the atan2 function is used to determine angular distance.
        angle = orientation_n * np.pi / orientations_num  # filter angle
        diff_sin = sin_theta * np.cos(angle) - cos_theta * np.sin(
            angle
        )  # difference of sin
        diff_cos = cos_theta * np.cos(angle) + sin_theta * np.sin(
            angle
        )  # difference of cos
        angular_distance = abs(
            np.arctan2(diff_sin, diff_cos)
        )  # absolute angular distance
        spread = np.exp(
            (-(angular_distance**2)) / (2 * theta_sigma**2)
        )  # angular filter component

        # filtering
        for scale_n, _scale in enumerate(scales):  # for each scale
            # compute final filter
            filter_ = fft.fftshift(radial_components[scale_n] * spread)
            filter_[0, 0] = 0

            # apply filter
            res[scale_n, orientation_n] = fft.ifftn(im_fft * filter_)

    return res



def _check_chromatic(img_r, img_m, chromatic):
    """Permute image based on dimensions and chromaticity."""
    img_r = _to_float(img_r, np.float32)
    img_m = _to_float(img_m, np.float32)
    # check if chromatic
    if chromatic is False:
        if img_r.ndim == 3:
            # 3D images
            img_r_tensor = torch.tensor(img_r).unsqueeze(0).permute(3, 0, 1, 2)
            img_m_tensor = torch.tensor(img_m).unsqueeze(0).permute(3, 0, 1, 2)
        elif img_r.ndim == 2:
            # 2D images
            img_r_tensor = torch.tensor(img_r).unsqueeze(0).unsqueeze(0)
            img_m_tensor = torch.tensor(img_m).unsqueeze(0).unsqueeze(0)
        else:
            raise ValueError("Image format not supported.")
    else:
        img_r_tensor = torch.tensor(img_r).permute(2, 0, 1).unsqueeze(0)
        img_m_tensor = torch.tensor(img_m).permute(2, 0, 1).unsqueeze(0)
    return img_r_tensor, img_m_tensor


def _check_border_too_close(center, radius):
    for center_coordinate in center:
        if not isinstance(center_coordinate, int):
            raise TypeError("Center has to be a tuple of integers.")
        if abs(center_coordinate) - radius < 0:
            raise ValueError(
                "Center has to be at least the radius away from the border."
            )


def _get_binary(img, lower_threshold, upper_threshold, show=False):
    """Get the binary of an image.

    Parameters
    ----------
    img : np.ndarray
        Input image
    lower_threshold : int
        Lower threshold as percentile
    upper_threshold : int
        Upper threshold as percentile
    show : bool, optional
        If True, the binary image is visualized. Default is False.

    Returns
    -------
    np.ndarray
        Binary image
    """
    if img.ndim == 3 and img.shape[-1] == 3:  # 2D color image
        img = _to_grayscale(img)

    # Get the lower and upper threshold and convert to binary
    lower_threshold_perc = np.percentile(img, lower_threshold)
    upper_threshold_perc = np.percentile(img, upper_threshold)
    binary_image = np.logical_and(
        img > lower_threshold_perc, img <= upper_threshold_perc
    )

    # Visualize the binary image
    if show:
        if img.ndim == 2:  # 2D image
            visualize_2d(binary_image)
        elif img.ndim == 3 and img.shape[-1] > 3:  # 3D image
            visualize_3d(binary_image, [img.shape[dim] // 2 for dim in range(3)])
        else:
            raise ValueError("Image must be 2D or 3D to visualize.")

    return binary_image



[docs]
def find_largest_region(img, iterations=5, region_type="cubic"):
    """Find the largest region in a binary image.

    The function finds the largest region in a binary region by calculating the exact
    euclidean distance transform. The center and radius of the largest region are
    returned, as well as the region itself based on the given region type.

    Parameters
    ----------
    img : np.ndarray
        Binary image
    iterations : int, optional
        Number of iterations for dilation and erosion. Default is 5.
    region_type : {'cubic', 'spherical', 'full', 'original'}, optional
        Type of region to be found. Default is 'cubic'.
        If 'original' the original image is returned.
        If 'full' the full region is returned (eroded twice after cleaning with dilation
        and erosion).
        If 'cubic' the region is returned as a cube. Alias for 'cubic' are 'cube' and
        'square'.
        If 'spherical' the region is returned as a sphere. Alias for 'spherical' are
        'sphere' and 'circle'.
        If other values are passed, the region is returned after dilation and erosion.

        .. note::

                This only influences the returned array, not the calculation of the
                largest region.

    Returns
    -------
    tuple
        Coordinates of the largest region
    int
        Radius of the largest region
    np.ndarray
        Largest region as masked array
    """
    # Check if image is binary
    minimum, maximum = img.min(), img.max()
    if img.dtype != "bool" or not (minimum == 0 and maximum == 1):
        raise ValueError("Image must be binary.")

    struct_3d = np.array(
        [
            [[0, 0, 0], [0, 1, 0], [0, 0, 0]],
            [[0, 1, 0], [1, 1, 1], [0, 1, 0]],
            [[0, 0, 0], [0, 1, 0], [0, 0, 0]],
        ],
        dtype="uint8",
    )

    struct_2d = np.array(
        [[0, 1, 0], [1, 1, 1], [0, 1, 0]],
        dtype="uint8",
    )

    if img.ndim == 2:
        struct = struct_2d
    elif img.ndim == 3 and img.shape[-1] == 3:  # 2D color image
        struct = struct_2d
    elif img.ndim == 3 and img.shape[-1] > 3:  # 3D image
        struct = struct_3d
    else:
        raise ValueError("Image must be 2D or 3D.")

    # Clean the image
    img_dilated = ndi.binary_dilation(img, structure=struct, iterations=iterations)
    img_cleaned = ndi.binary_erosion(
        img_dilated, structure=struct, iterations=iterations + 1
    )

    # Calculate the distance transform
    distance = distance_transform_edt(img_cleaned)
    center = np.unravel_index(np.argmax(distance), distance.shape)
    center = tuple(int(coord) for coord in center)
    radius = int(distance[*center])

    # Create the region
    if region_type in {"cubic", "cube", "square"}:
        region_masked = _to_cubic(img_cleaned, center, radius)
    elif region_type in {"spherical", "sphere", "circle"}:
        region_masked = _to_spherical(img_cleaned, center, radius)
    elif region_type == "full":
        img_eroded = ndi.binary_erosion(img_cleaned, structure=struct, iterations=2)
        region_masked = np.ma.array(img_cleaned, mask=~img_eroded, copy=True)
    elif region_type == "original":
        region_masked = img
    else:
        region_masked = img_cleaned

    return center, radius, region_masked



def _to_spherical(img, center, radius):
    """Create a sphere by masking an image."""
    if img.ndim == 2 or (img.ndim == 3 and img.shape[-1] == 3):
        if len(center) != 2:
            raise ValueError("Center must be 2D.")
        x, y = np.ogrid[
            -center[0] : img.shape[0] - center[0],
            -center[1] : img.shape[1] - center[1],
        ]
        mask = x**2 + y**2 <= radius**2

        if img.ndim == 3:  # 2D color image
            mask = np.repeat(mask[:, :, np.newaxis], 3, axis=2)
    elif img.ndim == 3 and img.shape[-1] > 3:
        if len(center) != 3:
            raise ValueError("Center must be 3D.")
        x, y, z = np.ogrid[
            -center[0] : img.shape[0] - center[0],
            -center[1] : img.shape[1] - center[1],
            -center[2] : img.shape[2] - center[2],
        ]
        mask = x**2 + y**2 + z**2 <= radius**2
    else:
        raise ValueError("Center must be 2D or 3D.")
    region = np.ma.array(img, mask=~mask, copy=True)
    return region


def _to_cubic(img, center, radius):
    """Create a cube by masking an image."""
    mask = np.zeros_like(img, dtype=bool)
    if img.ndim == 2 or (img.ndim == 3 and img.shape[-1] == 3):
        if len(center) != 2:
            raise ValueError("Center must be 2D.")
        mask[
            center[0] - radius : center[0] + radius,
            center[1] - radius : center[1] + radius,
        ] = 1

        if img.ndim == 3:  # 2D color image
            mask = np.repeat(mask[:, :, np.newaxis], 3, axis=2)
    elif img.ndim == 3 and img.shape[-1] > 3:
        if len(center) != 3:
            raise ValueError("Center must be 3D.")
        mask[
            center[0] - radius : center[0] + radius,
            center[1] - radius : center[1] + radius,
            center[2] - radius : center[2] + radius,
        ] = 1
    else:
        raise ValueError("Image must be 2D or 3D.")
    region = np.ma.array(img, mask=~mask, copy=True)
    return region