Source code for histomicstk.features.compute_nuclei_features

from histomicstk.cli import utils as cli_utils
from histomicstk.segmentation import label as htk_label

from .compute_fsd_features import compute_fsd_features
from .compute_gradient_features import compute_gradient_features
from .compute_haralick_features import compute_haralick_features
from .compute_intensity_features import compute_intensity_features
from .compute_morphometry_features import compute_morphometry_features


[docs] def compute_nuclei_features(im_label, im_nuclei=None, im_cytoplasm=None, # noqa fsd_bnd_pts=128, fsd_freq_bins=6, cyto_width=8, num_glcm_levels=32, morphometry_features_flag=True, fsd_features_flag=True, intensity_features_flag=True, gradient_features_flag=True, haralick_features_flag=True, tile_info=None, im_nuclei_seg_mask=None, format=None, return_nuclei_annotation=False, ): """ Calculates features for nuclei classification Parameters ---------- im_label : array_like A labeled mask image wherein intensity of a pixel is the ID of the object it belongs to. Non-zero values are considered to be foreground objects. im_nuclei : array_like Nucleus channel intensity image. im_cytoplasm : array_like Cytoplasm channel intensity image. fsd_bnd_pts : int, optional Number of points for boundary resampling to calculate fourier descriptors. Default value = 128. fsd_freq_bins : int, optional Number of frequency bins for calculating FSDs. Default value = 6. cyto_width : float, optional Estimated width of the ring-like neighborhood region around each nucleus to be considered as its cytoplasm. Default value = 8. num_glcm_levels: int, optional An integer specifying the number of gray levels For example, if `NumLevels` is 32, the intensity values of the input image are scaled so they are integers between 0 and 31. The number of gray levels determines the size of the gray-level co-occurrence matrix. Default: 32 morphometry_features_flag : bool, optional A flag that can be used to specify whether or not to compute morphometry (size and shape) features. See histomicstk.features.compute_morphometry_features for more details. fsd_features_flag : bool, optional A flag that can be used to specify whether or not to compute Fouried shape descriptor (FSD) features. See `histomicstk.features.compute_fsd_features` for more details. intensity_features_flag : bool, optional A flag that can be used to specify whether or not to compute intensity features from the nucleus and cytoplasm channels. See `histomicstk.features.compute_fsd_features` for more details. gradient_features_flag : bool, optional A flag that can be used to specify whether or not to compute gradient/edge features from intensity and cytoplasm channels. See `histomicstk.features.compute_gradient_features` for more details. haralick_features_flag : bool, optional A flag that can be used to specify whether or not to compute haralick features from intensity and cytoplasm channels. See `histomicstk.features.compute_haralick_features` for more details. return_nuclei_annotation : bool, optional Returns the nuclei annotation if kept True Returns ------- fdata : pandas.DataFrame A pandas data frame containing the features listed below for each object/label nuclei_annot_list : List List containing the boundaries of segmented nuclei in the input image. Notes ----- List of features computed by this function Identifier Location of the nucleus and its code in the input labeled mask. Columns are prefixed by *Identifier.*. These include ... Identifier.Label (int) - nucleus label in the input labeled mask Identifier.Xmin (int) - Left bound Identifier.Ymin (int) - Upper bound Identifier.Xmax (int) - Right bound Identifier.Ymax (int) - Lower bound Identifier.CentroidX (float) - X centroid (columns) Identifier.CentroidY (float) - Y centroid (rows) Identifier.WeightedCentroidX (float) - intensity-weighted X centroid Identifier.WeightedCentroidY (float) - intensity-weighted Y centroid Morphometry (size, shape, and orientation) features of the nuclei See histomicstk.features.compute_morphometry_features for more details. Feature names prefixed by *Size.*, *Shape.*, or *Orientation.*. Fourier shape descriptor features See `histomicstk.features.compute_fsd_features` for more details. Feature names are prefixed by *FSD*. Intensity features for the nucleus and cytoplasm channels See `histomicstk.features.compute_fsd_features` for more details. Feature names are prefixed by *Nucleus.Intensity.* for nucleus features and *Cytoplasm.Intensity.* for cytoplasm features. Gradient/edge features for the nucleus and cytoplasm channels See `histomicstk.features.compute_gradient_features` for more details. Feature names are prefixed by *Nucleus.Gradient.* for nucleus features and *Cytoplasm.Gradient.* for cytoplasm features. Haralick features for the nucleus and cytoplasm channels See `histomicstk.features.compute_haralick_features` for more details. Feature names are prefixed by *Nucleus.Haralick.* for nucleus features and *Cytoplasm.Haralick.* for cytoplasm features. See Also -------- histomicstk.features.compute_morphometry_features, histomicstk.features.compute_fsd_features, histomicstk.features.compute_intensity_features, histomicstk.features.compute_gradient_features, histomicstk.features.compute_haralick_features """ import pandas as pd from skimage.measure import regionprops # sanity checks if any([ intensity_features_flag, gradient_features_flag, haralick_features_flag, ]): assert im_nuclei is not None, 'You must provide nuclei intensity!' # TODO: this pipeline uses loops a lot. For each set of features it # iterates over all nuclei, which may become an issue when one needs to # do this for lots and lots of slides and 10^6+ nuclei. Consider # improving efficiency in the future somehow (cython? reuse? etc) feature_list = [] # get the objects in im_label nuclei_props = regionprops(im_label, intensity_image=im_nuclei) im_nuclei_bool = im_nuclei is not None data = [] def process_nucleus(nprop, im_nuclei_bool): for nprop in nuclei_props: row = { 'Label': nprop.label, 'Identifier.Xmin': nprop.bbox[1], 'Identifier.Ymin': nprop.bbox[0], 'Identifier.Xmax': nprop.bbox[3], 'Identifier.Ymax': nprop.bbox[2], 'Identifier.CentroidX': nprop.centroid[1], 'Identifier.CentroidY': nprop.centroid[0], } if im_nuclei_bool: wcy, wcx = nprop.weighted_centroid row['Identifier.WeightedCentroidX'] = wcx row['Identifier.WeightedCentroidY'] = wcy data.append(row) return pd.DataFrame(data) # create the DataFrame in one step idata = process_nucleus(nuclei_props, im_nuclei_bool) feature_list.append(idata) def conditional(flag, func, args, kwargs, prefix=None): if flag: output = func(*args, **kwargs) if prefix: output.columns = [prefix + col for col in output.columns] return output else: return pd.DataFrame() # return an empty DataFrame if condition is not met # compute cytoplasm mask if im_cytoplasm is not None: cyto_mask = htk_label.dilate_xor(im_label, neigh_width=cyto_width) cyto_props = regionprops(cyto_mask, intensity_image=im_cytoplasm) # ensure that cytoplasm props order corresponds to the nuclei lablocs = {v['label']: i for i, v in enumerate(cyto_props)} cyto_props = [cyto_props[lablocs[v['label']]] if v['label'] in lablocs else None for v in nuclei_props] # compute morphometry features feature_list.append(conditional( morphometry_features_flag, compute_morphometry_features, [im_label], {'rprops': nuclei_props}, )) # compute FSD features feature_list.append(conditional( fsd_features_flag, compute_fsd_features, [im_label, fsd_bnd_pts, fsd_freq_bins, cyto_width], {'rprops': nuclei_props}, )) # compute nuclei intensity features feature_list.append(conditional( intensity_features_flag, compute_intensity_features, [im_label, im_nuclei], {'rprops': nuclei_props}, prefix='Nucleus.', )) if (tile_info and tile_info.get('tile') is not None and len(tile_info['tile'].shape) == 3 and tile_info['tile'].shape[-1] > 1): for band in range(tile_info['tile'].shape[-1]): feature_list.append(conditional( intensity_features_flag, compute_intensity_features, [im_label, tile_info['tile'][:, :, band].astype(float)], {'rprops': nuclei_props}, prefix=f'Nucleus.Band{band}.', )) # compute cytoplasm intensity features if im_cytoplasm is not None: feature_list.append(conditional( intensity_features_flag, compute_intensity_features, [cyto_mask, im_cytoplasm], {'rprops': cyto_props}, prefix='Cytoplasm.', )) # compute nuclei gradient features feature_list.append(conditional( gradient_features_flag, compute_gradient_features, [im_label, im_nuclei], {'rprops': nuclei_props}, prefix='Nucleus.', )) # compute cytoplasm gradient features if im_cytoplasm is not None: feature_list.append(conditional( gradient_features_flag, compute_gradient_features, [cyto_mask, im_cytoplasm], {'rprops': cyto_props}, prefix='Cytoplasm.', )) # compute nuclei haralick features feature_list.append(conditional( haralick_features_flag, compute_haralick_features, [im_label, im_nuclei], {'num_levels': num_glcm_levels, 'rprops': nuclei_props}, prefix='Nucleus.', )) # compute cytoplasm haralick features if im_cytoplasm is not None: feature_list.append(conditional( haralick_features_flag, compute_haralick_features, [cyto_mask, im_cytoplasm], {'num_levels': num_glcm_levels, 'rprops': cyto_props}, prefix='Cytoplasm.', )) # Merge all features fdata = pd.concat(feature_list, axis=1) if return_nuclei_annotation: # Create nuclei segmentation with the generated regionprops nuclei_annot_list, selected_rows = cli_utils.create_tile_nuclei_annotations( im_nuclei_seg_mask, tile_info, format, nuclei_props) # Drop all rows which are not found in nuclei detection fdata = fdata[fdata.index.isin(selected_rows)] return fdata, nuclei_annot_list return fdata