from histomicstk.cli import utils as cli_utils
from histomicstk.segmentation import label as htk_label
from .compute_fsd_features import compute_fsd_features
from .compute_gradient_features import compute_gradient_features
from .compute_haralick_features import compute_haralick_features
from .compute_intensity_features import compute_intensity_features
from .compute_morphometry_features import compute_morphometry_features
[docs]
def compute_nuclei_features(im_label, im_nuclei=None, im_cytoplasm=None, # noqa
fsd_bnd_pts=128, fsd_freq_bins=6, cyto_width=8,
num_glcm_levels=32,
morphometry_features_flag=True,
fsd_features_flag=True,
intensity_features_flag=True,
gradient_features_flag=True,
haralick_features_flag=True,
tile_info=None,
im_nuclei_seg_mask=None,
format=None,
return_nuclei_annotation=False,
):
"""
Calculates features for nuclei classification
Parameters
----------
im_label : array_like
A labeled mask image wherein intensity of a pixel is the ID of the
object it belongs to. Non-zero values are considered to be foreground
objects.
im_nuclei : array_like
Nucleus channel intensity image.
im_cytoplasm : array_like
Cytoplasm channel intensity image.
fsd_bnd_pts : int, optional
Number of points for boundary resampling to calculate fourier
descriptors. Default value = 128.
fsd_freq_bins : int, optional
Number of frequency bins for calculating FSDs. Default value = 6.
cyto_width : float, optional
Estimated width of the ring-like neighborhood region around each
nucleus to be considered as its cytoplasm. Default value = 8.
num_glcm_levels: int, optional
An integer specifying the number of gray levels For example, if
`NumLevels` is 32, the intensity values of the input image are
scaled so they are integers between 0 and 31. The number of gray
levels determines the size of the gray-level co-occurrence matrix.
Default: 32
morphometry_features_flag : bool, optional
A flag that can be used to specify whether or not to compute
morphometry (size and shape) features.
See histomicstk.features.compute_morphometry_features for more details.
fsd_features_flag : bool, optional
A flag that can be used to specify whether or not to compute
Fouried shape descriptor (FSD) features.
See `histomicstk.features.compute_fsd_features` for more details.
intensity_features_flag : bool, optional
A flag that can be used to specify whether or not to compute
intensity features from the nucleus and cytoplasm channels.
See `histomicstk.features.compute_fsd_features` for more details.
gradient_features_flag : bool, optional
A flag that can be used to specify whether or not to compute
gradient/edge features from intensity and cytoplasm channels.
See `histomicstk.features.compute_gradient_features` for more details.
haralick_features_flag : bool, optional
A flag that can be used to specify whether or not to compute
haralick features from intensity and cytoplasm channels.
See `histomicstk.features.compute_haralick_features` for more details.
return_nuclei_annotation : bool, optional
Returns the nuclei annotation if kept True
Returns
-------
fdata : pandas.DataFrame
A pandas data frame containing the features listed below for each
object/label
nuclei_annot_list : List
List containing the boundaries of segmented nuclei in the input image.
Notes
-----
List of features computed by this function
Identifier
Location of the nucleus and its code in the input labeled mask.
Columns are prefixed by *Identifier.*. These include ...
Identifier.Label (int) - nucleus label in the input labeled mask
Identifier.Xmin (int) - Left bound
Identifier.Ymin (int) - Upper bound
Identifier.Xmax (int) - Right bound
Identifier.Ymax (int) - Lower bound
Identifier.CentroidX (float) - X centroid (columns)
Identifier.CentroidY (float) - Y centroid (rows)
Identifier.WeightedCentroidX (float) - intensity-weighted X centroid
Identifier.WeightedCentroidY (float) - intensity-weighted Y centroid
Morphometry (size, shape, and orientation) features of the nuclei
See histomicstk.features.compute_morphometry_features for more details.
Feature names prefixed by *Size.*, *Shape.*, or *Orientation.*.
Fourier shape descriptor features
See `histomicstk.features.compute_fsd_features` for more details.
Feature names are prefixed by *FSD*.
Intensity features for the nucleus and cytoplasm channels
See `histomicstk.features.compute_fsd_features` for more details.
Feature names are prefixed by *Nucleus.Intensity.* for nucleus features
and *Cytoplasm.Intensity.* for cytoplasm features.
Gradient/edge features for the nucleus and cytoplasm channels
See `histomicstk.features.compute_gradient_features` for more details.
Feature names are prefixed by *Nucleus.Gradient.* for nucleus features
and *Cytoplasm.Gradient.* for cytoplasm features.
Haralick features for the nucleus and cytoplasm channels
See `histomicstk.features.compute_haralick_features` for more details.
Feature names are prefixed by *Nucleus.Haralick.* for nucleus features
and *Cytoplasm.Haralick.* for cytoplasm features.
See Also
--------
histomicstk.features.compute_morphometry_features,
histomicstk.features.compute_fsd_features,
histomicstk.features.compute_intensity_features,
histomicstk.features.compute_gradient_features,
histomicstk.features.compute_haralick_features
"""
import pandas as pd
from skimage.measure import regionprops
# sanity checks
if any([
intensity_features_flag,
gradient_features_flag,
haralick_features_flag,
]):
assert im_nuclei is not None, 'You must provide nuclei intensity!'
# TODO: this pipeline uses loops a lot. For each set of features it
# iterates over all nuclei, which may become an issue when one needs to
# do this for lots and lots of slides and 10^6+ nuclei. Consider
# improving efficiency in the future somehow (cython? reuse? etc)
feature_list = []
# get the objects in im_label
nuclei_props = regionprops(im_label, intensity_image=im_nuclei)
im_nuclei_bool = im_nuclei is not None
data = []
def process_nucleus(nprop, im_nuclei_bool):
for nprop in nuclei_props:
row = {
'Label': nprop.label,
'Identifier.Xmin': nprop.bbox[1],
'Identifier.Ymin': nprop.bbox[0],
'Identifier.Xmax': nprop.bbox[3],
'Identifier.Ymax': nprop.bbox[2],
'Identifier.CentroidX': nprop.centroid[1],
'Identifier.CentroidY': nprop.centroid[0],
}
if im_nuclei_bool:
wcy, wcx = nprop.weighted_centroid
row['Identifier.WeightedCentroidX'] = wcx
row['Identifier.WeightedCentroidY'] = wcy
data.append(row)
return pd.DataFrame(data)
# create the DataFrame in one step
idata = process_nucleus(nuclei_props, im_nuclei_bool)
feature_list.append(idata)
def conditional(flag, func, args, kwargs, prefix=None):
if flag:
output = func(*args, **kwargs)
if prefix:
output.columns = [prefix + col for col in output.columns]
return output
else:
return pd.DataFrame() # return an empty DataFrame if condition is not met
# compute cytoplasm mask
if im_cytoplasm is not None:
cyto_mask = htk_label.dilate_xor(im_label, neigh_width=cyto_width)
cyto_props = regionprops(cyto_mask, intensity_image=im_cytoplasm)
# ensure that cytoplasm props order corresponds to the nuclei
lablocs = {v['label']: i for i, v in enumerate(cyto_props)}
cyto_props = [cyto_props[lablocs[v['label']]] if v['label'] in lablocs else None
for v in nuclei_props]
# compute morphometry features
feature_list.append(conditional(
morphometry_features_flag,
compute_morphometry_features,
[im_label], {'rprops': nuclei_props},
))
# compute FSD features
feature_list.append(conditional(
fsd_features_flag,
compute_fsd_features,
[im_label, fsd_bnd_pts, fsd_freq_bins, cyto_width], {'rprops': nuclei_props},
))
# compute nuclei intensity features
feature_list.append(conditional(
intensity_features_flag,
compute_intensity_features,
[im_label, im_nuclei], {'rprops': nuclei_props}, prefix='Nucleus.',
))
if (tile_info and tile_info.get('tile') is not None and
len(tile_info['tile'].shape) == 3 and tile_info['tile'].shape[-1] > 1):
for band in range(tile_info['tile'].shape[-1]):
feature_list.append(conditional(
intensity_features_flag,
compute_intensity_features,
[im_label, tile_info['tile'][:, :, band].astype(float)],
{'rprops': nuclei_props}, prefix=f'Nucleus.Band{band}.',
))
# compute cytoplasm intensity features
if im_cytoplasm is not None:
feature_list.append(conditional(
intensity_features_flag,
compute_intensity_features,
[cyto_mask, im_cytoplasm], {'rprops': cyto_props}, prefix='Cytoplasm.',
))
# compute nuclei gradient features
feature_list.append(conditional(
gradient_features_flag,
compute_gradient_features,
[im_label, im_nuclei], {'rprops': nuclei_props}, prefix='Nucleus.',
))
# compute cytoplasm gradient features
if im_cytoplasm is not None:
feature_list.append(conditional(
gradient_features_flag,
compute_gradient_features,
[cyto_mask, im_cytoplasm], {'rprops': cyto_props}, prefix='Cytoplasm.',
))
# compute nuclei haralick features
feature_list.append(conditional(
haralick_features_flag,
compute_haralick_features,
[im_label, im_nuclei], {'num_levels': num_glcm_levels, 'rprops': nuclei_props},
prefix='Nucleus.',
))
# compute cytoplasm haralick features
if im_cytoplasm is not None:
feature_list.append(conditional(
haralick_features_flag,
compute_haralick_features,
[cyto_mask, im_cytoplasm], {'num_levels': num_glcm_levels, 'rprops': cyto_props},
prefix='Cytoplasm.',
))
# Merge all features
fdata = pd.concat(feature_list, axis=1)
if return_nuclei_annotation:
# Create nuclei segmentation with the generated regionprops
nuclei_annot_list, selected_rows = cli_utils.create_tile_nuclei_annotations(
im_nuclei_seg_mask, tile_info, format, nuclei_props)
# Drop all rows which are not found in nuclei detection
fdata = fdata[fdata.index.isin(selected_rows)]
return fdata, nuclei_annot_list
return fdata