Source code for histomicstk.features.compute_fsd_features

import numpy as np


[docs] def compute_fsd_features(im_label, K=128, Fs=6, Delta=8, rprops=None): """ Calculates `Fourier shape descriptors` for each objects. Parameters ---------- im_label : array_like A labeled mask image wherein intensity of a pixel is the ID of the object it belongs to. Non-zero values are considered to be foreground objects. K : int, optional Number of points for boundary resampling to calculate fourier descriptors. Default value = 128. Fs : int, optional Number of frequency bins for calculating FSDs. Default value = 6. Delta : int, optional Used to dilate nuclei and define cytoplasm region. Default value = 8. rprops : output of skimage.measure.regionprops, optional rprops = skimage.measure.regionprops( im_label ). If rprops is not passed then it will be computed inside which will increase the computation time. Returns ------- fdata: Pandas data frame containing the FSD features for each object/label. References ---------- .. [#] D. Zhang et al. "A comparative study on shape retrieval using Fourier descriptors with different shape signatures," In Proc. ICIMADE01, 2001. """ import pandas as pd from skimage.measure import regionprops from skimage.segmentation import find_boundaries # List of feature names feature_list = [] for i in range(Fs): feature_list = np.append(feature_list, 'Shape.FSD' + str(i + 1)) # get Label size x sizex = im_label.shape[0] sizey = im_label.shape[1] # get the number of objects in Label if rprops is None: rprops = regionprops(im_label) # create pandas data frame containing the features for each object numFeatures = len(feature_list) numLabels = len(rprops) # pre-compute Interval outside the loop Interval = np.round( np.power(2, np.linspace(0, np.log2(K) - 1, Fs + 1, endpoint=True)), ).astype(np.uint8) # initialize an empty list to collect data data_list = [] for i in range(numLabels): # get bounds of dilated nucleus min_row, max_row, min_col, max_col = _GetBounds( rprops[i].bbox, Delta, sizex, sizey, ) # grab label mask lmask = (im_label[min_row:max_row, min_col:max_col] == rprops[i].label).astype(bool) # find boundaries Bounds = np.argwhere( find_boundaries(lmask, mode='inner').astype(np.uint8) == 1, ) # check length of boundaries if len(Bounds) < 2: data_list.append(np.zeros(numFeatures)) else: # compute fourier descriptors and collect data data_list.append(_FSDs(Bounds[:, 0], Bounds[:, 1], K, Interval)) # create DataFrame after the loop fdata = pd.DataFrame(data_list, columns=feature_list) return fdata
def _InterpolateArcLength(X, Y, K): """ Resamples boundary points [X, Y] at L total equal arc-length locations. Parameters ---------- X : array_like x points of boundaries Y : array_like y points of boundaries K : int Number of points for boundary resampling to calculate fourier descriptors. Default value = 128. Returns ------- iX : array_like L-length vector of horizontal interpolated coordinates with equal arc-length spacing. iY : array_like L-length vector of vertical interpolated coordinates with equal arc-length spacing. """ # generate spaced points 0, 1/k, 1 interval = np.linspace(0, 1, K + 1) # get segment lengths slens = np.sqrt(np.diff(X)**2 + np.diff(Y)**2) # normalize to unit length slens = np.true_divide(slens, slens.sum()) # calculate cumulative length along boundary cumulative = np.zeros(len(slens) + 1) cumulative[1:] = np.cumsum(slens) # place points in 'Interval' along boundary locations = np.digitize(interval, cumulative) # clip to ends locations[locations > len(slens)] = len(slens) # linear interpolation Lie = (interval - cumulative[locations - 1]) / slens[locations - 1] iX = X[locations - 1] + (X[locations] - X[locations - 1]) * Lie iY = Y[locations - 1] + (Y[locations] - Y[locations - 1]) * Lie return iX, iY def _FSDs(X, Y, K, Intervals): """ Calculated FSDs from boundary points X,Y. Boundaries are resampled to have K equally spaced points (arclength) around the shape. The curvature is calculated using the cumulative angular function, measuring the displacement of the tangent angle from the starting point of the boundary. The K-length fft of the cumulative angular function is calculated, and then the elements of 'F' are summed as the spectral energy over 'Intervals'. Parameters ---------- X : array_like x points of boundaries Y : array_like y points of boundaries K : int Number of points for boundary resampling to calculate fourier descriptors. Default value = 128. Intervals : array_like Intervals spaced evenly over 1:K/2. Returns ------- F : array_like length(Intervals) vector containing spectral energy of cumulative angular function, summed over defined 'Intervals'. """ # check input 'Intervals' if Intervals[0] != 1.: Intervals = np.hstack((1., Intervals)) if Intervals[-1] != (K / 2): Intervals = np.hstack((Intervals, float(K))) # get length of intervals L = len(Intervals) # initialize F F = np.zeros((L - 1, )).astype(float) # interpolate boundaries iX, iY = _InterpolateArcLength(X, Y, K) # check if iXY.iX is not empty if iX.size: # calculate curvature Curvature = np.arctan2( (iY[1:] - iY[:-1]), (iX[1:] - iX[:-1]), ) # make curvature cumulative Curvature = Curvature - Curvature[0] # calculate FFT fX = np.fft.fft(Curvature).T # spectral energy fX = fX * fX.conj() fX = (fX / fX.sum()) if fX.sum() else fX # calculate 'F' values for i in range(L - 1): F[i] = np.round( fX[Intervals[i] - 1:Intervals[i + 1]].sum(), L, ).real.astype(float) return F def _GetBounds(bbox, delta, M, N): """ Returns bounds of object in global label image. Parameters ---------- bbox : tuple Bounding box (min_row, min_col, max_row, max_col). delta : int Used to dilate nuclei and define cytoplasm region. Default value = 8. M : int X size of label image. N : int Y size of label image. Returns ------- min_row : int Minimum row of the region bounds. max_row : int Maximum row of the region bounds. min_col : int Minimum column of the region bounds. max_col : int Maximum column of the region bounds. """ min_row, min_col, max_row, max_col = bbox min_row_out = max(0, (min_row - delta)) max_row_out = min(M - 1, (max_row + delta)) min_col_out = max(0, (min_col - delta)) max_col_out = min(N - 1, (max_col + delta)) return min_row_out, max_row_out, min_col_out, max_col_out