Source code for viqa.utils.misc

"""Module for miscellaneous utility functions."""

# Authors
# -------
# Author: Lukas Behammer
# Research Center Wels
# University of Applied Sciences Upper Austria, 2023
# CT Research Group
#
# Modifications
# -------------
# Original code, 2024, Lukas Behammer
# Add _get_binary and find_largest_region, 2024, Michael Stidi
# Update _get_binary and find_largest_region, 2024, Lukas Behammer
#
# License
# -------
# BSD-3-Clause License

import math

import numpy as np
import scipy.fft as fft
import scipy.ndimage as ndi
import skimage as ski
import torch
from scipy.ndimage import distance_transform_edt

from .visualization import visualize_2d, visualize_3d


def _to_grayscale(img):
    """Convert an image to grayscale."""
    img = _to_float(img)
    img_gray = ski.color.rgb2gray(img)
    img_gray[img_gray > 255] = 255
    return img_gray


def _rgb_to_yuv(img):
    """Convert an RGB image to YUV."""
    weights = np.array(
        [
            [0.2126, 0.7152, 0.0722],
            [-0.09991, -0.33609, 0.436],
            [0.615, -0.55861, -0.05639],
        ]
    )
    img_yuv = img @ weights
    return img_yuv


def _to_float(img, dtype=np.float64):
    """Convert a numpy array to float."""
    match img.dtype:
        case np.float32 | np.float64:
            return img
        case _:
            return img.astype(dtype)


[docs] def correlate_convolve_abs( img, kernel, mode="correlate", border_mode="constant", value=0 ): """Correlates or convolves a numpy array with a kernel. Parameters ---------- img : np.ndarray Input image kernel : np.ndarray Kernel mode : str, default='correlate' 'correlate' or 'convolve' border_mode : str, default='constant' 'constant', 'reflect', 'nearest', 'mirror' or 'wrap' .. seealso:: See NumPy documentation for :py:func:`numpy.pad`. value : int, optional, default=0 Value for constant border mode Returns ------- res : np.ndarray Convolved result as numpy array Raises ------ ValueError If ``border_mode`` is not supported \n If number of dimensions is not supported See Also -------- :py:func:`scipy.signal.correlate` :py:func:`scipy.signal.convolve` Notes ----- Correlates or convolves a numpy array with a kernel in the form :math:`\\operatorname{mean}(\\lvert \\pmb{I} \\cdot \\mathcal{K} \\rvert)` with :math:`\\pmb{I}` denoting the image and :math:`\\mathcal{K}` denoting the Kernel. Works in 2D and 3D. Examples -------- >>> import numpy as np >>> from viqa import kernels >>> img = np.random.rand(128, 128) >>> kernel = kernels.sobel_kernel_2d_x() >>> res = correlate_convolve_abs( ... img, ... kernel, ... mode="correlate", ... border_mode="constant", ... value=0 ... ) """ if mode == "convolve": # If mode is convolve kernel = np.flip(kernel) # Flip kernel kernel_size = kernel.shape[0] # Get kernel size ndim = len(img.shape) # Get number of dimensions # Pad image match border_mode: case "constant": origin = np.pad(img, kernel_size, mode="constant", constant_values=value) case "reflect": origin = np.pad(img, kernel_size, mode="reflect") case "nearest": origin = np.pad(img, kernel_size, mode="edge") case "mirror": origin = np.pad(img, kernel_size, mode="symmetric") case "wrap": origin = np.pad(img, kernel_size, mode="wrap") case _: raise ValueError("Border mode not supported") # Correlate or convolve res = np.zeros(img.shape) # Initialize result array for k in range(0, img.shape[0]): for m in range(0, img.shape[1]): # Check if 2D or 3D if ndim == 3: for n in range(0, img.shape[2]): res[k, m, n] = np.mean( abs( kernel * origin[ k : k + kernel_size, m : m + kernel_size, n : n + kernel_size, ] ) ) elif ndim == 2: res[k, m] = np.mean( abs(kernel * origin[k : k + kernel_size, m : m + kernel_size]) ) else: raise ValueError("Number of dimensions not supported") return res
def _extract_blocks(img, block_size, stride): """Extract blocks from an image. Parameters ---------- img : np.ndarray Input image block_size : int Size of the block stride : int Stride Returns ------- np.ndarray Numpy array of blocks """ boxes = [] m, n = img.shape for i in range(0, m - (block_size - 1), stride): for j in range(0, n - (block_size - 1), stride): boxes.append(img[i : i + block_size, j : j + block_size]) # yield(img[i:i+block_size, j:j+block_size]) # TODO: change to generator return np.array(boxes) def _fft(img): """Wrap scipy fft.""" return fft.fftshift(fft.fftn(img)) def _ifft(fourier_img): """Wrap scipy ifft.""" return fft.ifftn(fft.ifftshift(fourier_img)) def _is_even(num): """Check if a number is even.""" return num % 2 == 0
[docs] def gabor_convolve( img, scales_num: int, orientations_num: int, min_wavelength=3, wavelength_scaling=3, bandwidth_param=0.55, d_theta_on_sigma=1.5, ): """Compute Log Gabor filter responses. Parameters ---------- img : np.ndarray Image to be filtered scales_num : int Number of wavelet scales orientations_num : int Number of filter orientations min_wavelength : int, default=3 Wavelength of smallest scale filter, maximum frequency is set by this value, should be >= 3 wavelength_scaling : int, default=3 Scaling factor between successive filters bandwidth_param : float, default=0.55 Ratio of standard deviation of the Gaussian describing log Gabor filter's transfer function in the frequency domain to the filter's center frequency (0.74 for 1 octave bandwidth, 0.55 for 2 octave bandwidth, 0.41 for 3 octave bandwidth) d_theta_on_sigma : float, default=1.5 Ratio of angular interval between filter orientations and standard deviation of angular Gaussian spreading function, a value of 1.5 results in approximately the minimum overlap needed to get even spectral coverage Returns ------- np.ndarray Log Gabor filtered image Notes ----- Even spectral coverage and independence of filter output are dependent on bandwidth_param vs wavelength_scaling. Some experimental values: \n 0.85 <--> 1.3 \n 0.74 <--> 1.6 (1 octave bandwidth) \n 0.65 <--> 2.1 \n 0.55 <--> 3.0 (2 octave bandwidth) \n Additionally d_theta_on_sigma should be set to 1.5 for approximately the minimum overlap needed to get even spectral coverage. For more information see [1]_. This code was originally written in Matlab by Peter Kovesi and adapted by Eric Larson. The adaption by Eric Larson is available under [2]_. References ---------- .. [1] Kovesi, Peter. https://www.peterkovesi.com/matlabfns/PhaseCongruency/Docs/convexpl.html .. [2] Larson, E. C. (2008). http://vision.eng.shizuoka.ac.jp/mad (version 2011_10_07) .. [3] Field, D. J. (1987). Relations between the statistics of natural images and the response properties of cortical cells. Journal of The Optical Society of America A, 4(12), 2379–2394. https://doi.org/10.1364/JOSAA.4.002379 Examples -------- >>> import numpy as np >>> from viqa.utils import gabor_convolve >>> img = np.random.rand(128, 128) >>> res = gabor_convolve(img, scales_num=3, orientations_num=4) """ # Authors # ------- # Author: Peter Kovesi # Department of Computer Science & Software Engineering # The University of Western Australia # pk@cs.uwa.edu.au https://peterkovesi.com/projects/ # # Adaption: Eric Larson # Department of Electrical and Computer Engineering # Oklahoma State University, 2008 # University Of Washington Seattle, 2009 # Image Coding and Analysis lab # # Translation: Lukas Behammer # Research Center Wels # University of Applied Sciences Upper Austria # CT Research Group # # MODIFICATIONS # ------------- # Original code, May 2001, Peter Kovesi # Altered, 2008, Eric Larson # Altered precomputations, 2011, Eric Larson # Translated to Python, 2024, Lukas Behammer # # LICENSE # ------- # Copyright (c) 2001-2010 Peter Kovesi # www.peterkovesi.com # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # The Software is provided "as is", without warranty of any kind. # Precomputing and assigning variables scales = np.arange(0, scales_num) orientations = np.arange(0, orientations_num) rows, cols = img.shape # image dimensions # center of image col_c = math.floor(cols / 2) row_c = math.floor(rows / 2) # set up filter wavelengths from scales wavelengths = [ min_wavelength * wavelength_scaling**scale_n for scale_n in range(0, scales_num) ] # convert image to frequency domain im_fft = fft.fftn(img) # compute matrices of same size as im with values ranging from -0.5 to 0.5 (-1.0 to # 1.0) for horizontal and vertical # directions each def _get_range(cols_rows): if _is_even(cols_rows): range_ = np.linspace(-cols_rows / 2, (cols_rows - 2) / 2, cols_rows) / ( cols_rows / 2 ) else: range_ = np.linspace(-cols_rows / 2, cols_rows / 2, cols_rows) / ( cols_rows / 2 ) return range_ x_range = _get_range(cols) y_range = _get_range(rows) x, y = np.meshgrid(x_range, y_range) # filters have radial component (frequency band) and an angular component # (orientation), those are multiplied to get the final filter # compute radial distance from center of matrix radius = np.sqrt(x**2 + y**2) radius[radius == 0] = 1 # avoid logarithm of zero # compute polar angle and its sine and cosine theta = np.arctan2(-y, x) sin_theta = np.sin(theta) cos_theta = np.cos(theta) # compute standard deviation of angular Gaussian function theta_sigma = np.pi / orientations_num / d_theta_on_sigma # compute radial component radial_components = [] for scale_n, _scale in enumerate(scales): # for each scale center_freq = 1.0 / wavelengths[scale_n] # center frequency of filter normalised_center_freq = center_freq / 0.5 # log Gabor response for each frequency band (scale) log_gabor = np.exp( (np.log(radius) - np.log(normalised_center_freq)) ** 2 / -(2 * np.log(bandwidth_param) ** 2) ) log_gabor[row_c, col_c] = 0 radial_components.append(log_gabor) # angular component and final filtering res = np.empty( (scales_num, orientations_num), dtype=object ) # precompute result array for orientation_n, _orientation in enumerate(orientations): # for each orientation # compute angular component # Pre-compute filter data specific to this orientation # For each point in the filter matrix calculate the angular distance from the # specified filter orientation. To overcome the angular wrap-around problem # sine difference and cosine difference values are first computed and then # the atan2 function is used to determine angular distance. angle = orientation_n * np.pi / orientations_num # filter angle diff_sin = sin_theta * np.cos(angle) - cos_theta * np.sin( angle ) # difference of sin diff_cos = cos_theta * np.cos(angle) + sin_theta * np.sin( angle ) # difference of cos angular_distance = abs( np.arctan2(diff_sin, diff_cos) ) # absolute angular distance spread = np.exp( (-(angular_distance**2)) / (2 * theta_sigma**2) ) # angular filter component # filtering for scale_n, _scale in enumerate(scales): # for each scale # compute final filter filter_ = fft.fftshift(radial_components[scale_n] * spread) filter_[0, 0] = 0 # apply filter res[scale_n, orientation_n] = fft.ifftn(im_fft * filter_) return res
def _check_chromatic(img_r, img_m, chromatic): """Permute image based on dimensions and chromaticity.""" img_r = _to_float(img_r, np.float32) img_m = _to_float(img_m, np.float32) # check if chromatic if chromatic is False: if img_r.ndim == 3: # 3D images img_r_tensor = torch.tensor(img_r).unsqueeze(0).permute(3, 0, 1, 2) img_m_tensor = torch.tensor(img_m).unsqueeze(0).permute(3, 0, 1, 2) elif img_r.ndim == 2: # 2D images img_r_tensor = torch.tensor(img_r).unsqueeze(0).unsqueeze(0) img_m_tensor = torch.tensor(img_m).unsqueeze(0).unsqueeze(0) else: raise ValueError("Image format not supported.") else: img_r_tensor = torch.tensor(img_r).permute(2, 0, 1).unsqueeze(0) img_m_tensor = torch.tensor(img_m).permute(2, 0, 1).unsqueeze(0) return img_r_tensor, img_m_tensor def _check_border_too_close(center, radius): for center_coordinate in center: if not isinstance(center_coordinate, int): raise TypeError("Center has to be a tuple of integers.") if abs(center_coordinate) - radius < 0: raise ValueError( "Center has to be at least the radius away from the border." ) def _get_binary(img, lower_threshold, upper_threshold, show=False): """Get the binary of an image. Parameters ---------- img : np.ndarray Input image lower_threshold : int Lower threshold as percentile upper_threshold : int Upper threshold as percentile show : bool, optional If True, the binary image is visualized. Default is False. Returns ------- np.ndarray Binary image """ if img.ndim == 3 and img.shape[-1] == 3: # 2D color image img = _to_grayscale(img) # Get the lower and upper threshold and convert to binary lower_threshold_perc = np.percentile(img, lower_threshold) upper_threshold_perc = np.percentile(img, upper_threshold) binary_image = np.logical_and( img > lower_threshold_perc, img <= upper_threshold_perc ) # Visualize the binary image if show: if img.ndim == 2: # 2D image visualize_2d(binary_image) elif img.ndim == 3 and img.shape[-1] > 3: # 3D image visualize_3d(binary_image, [img.shape[dim] // 2 for dim in range(3)]) else: raise ValueError("Image must be 2D or 3D to visualize.") return binary_image
[docs] def find_largest_region(img, iterations=5, region_type="cubic"): """Find the largest region in a binary image. The function finds the largest region in a binary region by calculating the exact euclidean distance transform. The center and radius of the largest region are returned, as well as the region itself based on the given region type. Parameters ---------- img : np.ndarray Binary image iterations : int, optional Number of iterations for dilation and erosion. Default is 5. region_type : {'cubic', 'spherical', 'full', 'original'}, optional Type of region to be found. Default is 'cubic'. If 'original' the original image is returned. If 'full' the full region is returned (eroded twice after cleaning with dilation and erosion). If 'cubic' the region is returned as a cube. Alias for 'cubic' are 'cube' and 'square'. If 'spherical' the region is returned as a sphere. Alias for 'spherical' are 'sphere' and 'circle'. If other values are passed, the region is returned after dilation and erosion. .. note:: This only influences the returned array, not the calculation of the largest region. Returns ------- tuple Coordinates of the largest region int Radius of the largest region np.ndarray Largest region as masked array """ # Check if image is binary minimum, maximum = img.min(), img.max() if img.dtype != "bool" or not (minimum == 0 and maximum == 1): raise ValueError("Image must be binary.") struct_3d = np.array( [ [[0, 0, 0], [0, 1, 0], [0, 0, 0]], [[0, 1, 0], [1, 1, 1], [0, 1, 0]], [[0, 0, 0], [0, 1, 0], [0, 0, 0]], ], dtype="uint8", ) struct_2d = np.array( [[0, 1, 0], [1, 1, 1], [0, 1, 0]], dtype="uint8", ) if img.ndim == 2: struct = struct_2d elif img.ndim == 3 and img.shape[-1] == 3: # 2D color image struct = struct_2d elif img.ndim == 3 and img.shape[-1] > 3: # 3D image struct = struct_3d else: raise ValueError("Image must be 2D or 3D.") # Clean the image img_dilated = ndi.binary_dilation(img, structure=struct, iterations=iterations) img_cleaned = ndi.binary_erosion( img_dilated, structure=struct, iterations=iterations + 1 ) # Calculate the distance transform distance = distance_transform_edt(img_cleaned) center = np.unravel_index(np.argmax(distance), distance.shape) center = tuple(int(coord) for coord in center) radius = int(distance[*center]) # Create the region if region_type in {"cubic", "cube", "square"}: region_masked = _to_cubic(img_cleaned, center, radius) elif region_type in {"spherical", "sphere", "circle"}: region_masked = _to_spherical(img_cleaned, center, radius) elif region_type == "full": img_eroded = ndi.binary_erosion(img_cleaned, structure=struct, iterations=2) region_masked = np.ma.array(img_cleaned, mask=~img_eroded, copy=True) elif region_type == "original": region_masked = img else: region_masked = img_cleaned return center, radius, region_masked
def _to_spherical(img, center, radius): """Create a sphere by masking an image.""" if img.ndim == 2 or (img.ndim == 3 and img.shape[-1] == 3): if len(center) != 2: raise ValueError("Center must be 2D.") x, y = np.ogrid[ -center[0] : img.shape[0] - center[0], -center[1] : img.shape[1] - center[1], ] mask = x**2 + y**2 <= radius**2 if img.ndim == 3: # 2D color image mask = np.repeat(mask[:, :, np.newaxis], 3, axis=2) elif img.ndim == 3 and img.shape[-1] > 3: if len(center) != 3: raise ValueError("Center must be 3D.") x, y, z = np.ogrid[ -center[0] : img.shape[0] - center[0], -center[1] : img.shape[1] - center[1], -center[2] : img.shape[2] - center[2], ] mask = x**2 + y**2 + z**2 <= radius**2 else: raise ValueError("Center must be 2D or 3D.") region = np.ma.array(img, mask=~mask, copy=True) return region def _to_cubic(img, center, radius): """Create a cube by masking an image.""" mask = np.zeros_like(img, dtype=bool) if img.ndim == 2 or (img.ndim == 3 and img.shape[-1] == 3): if len(center) != 2: raise ValueError("Center must be 2D.") mask[ center[0] - radius : center[0] + radius, center[1] - radius : center[1] + radius, ] = 1 if img.ndim == 3: # 2D color image mask = np.repeat(mask[:, :, np.newaxis], 3, axis=2) elif img.ndim == 3 and img.shape[-1] > 3: if len(center) != 3: raise ValueError("Center must be 3D.") mask[ center[0] - radius : center[0] + radius, center[1] - radius : center[1] + radius, center[2] - radius : center[2] + radius, ] = 1 else: raise ValueError("Image must be 2D or 3D.") region = np.ma.array(img, mask=~mask, copy=True) return region