Source code for histomicstk.annotations_and_masks.annotations_to_masks_handler

"""
Created on Mon Aug 12 18:33:48 2019.

@author: tageldim

"""

import copy
import io
import os
from warnings import warn

import numpy as np
import pandas as pd
from imageio import imwrite
from matplotlib.patches import Polygon as mpPolygon
from PIL import Image
from shapely.geometry.polygon import Polygon

from histomicstk.annotations_and_masks.annotation_and_mask_utils import (
    _get_and_add_element_to_roi, _get_element_mask, _get_idxs_for_all_rois,
    get_bboxes_from_slide_annotations,
    get_idxs_for_annots_overlapping_roi_by_bbox, get_image_from_htk_response,
    get_scale_factor_and_appendStr, scale_slide_annotations)
from histomicstk.annotations_and_masks.masks_to_annotations_handler import \
    get_contours_from_mask



[docs]
def get_roi_mask(
        slide_annotations, element_infos, GTCodes_df,
        idx_for_roi, iou_thresh=0.0, roiinfo=None,
        crop_to_roi=True, use_shapely=True,
        verbose=False, monitorPrefix=''):
    """Parse annotations and gets a ground truth mask for a single ROI.

    This will look at all slide annotations and get ones that
    overlap with the region of interest (ROI) and assigns them to mask.

    Parameters
    ----------
    slide_annotations : list of dicts
        response from server request
    element_infos : pandas DataFrame.
        The columns annidx and elementidx
        encode the dict index of annotation document and element,
        respectively, in the original slide_annotations list of dictionaries.
        This can be obain by get_bboxes_from_slide_annotations() method
    GTCodes_df : pandas Dataframe
        the ground truth codes and information dataframe.
        WARNING: Modified inside this method so pass a copy.
        This is a dataframe that is indexed by the annotation group name and
        has the following columns:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlaid
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may decide that
        any pixel inside the ROI is considered stroma.
    idx_for_roi : int
        index of ROI within the element_infos dataframe.
    iou_thresh : float
        how much bounding box overlap is enough to
        consider an annotation to belong to the region of interest
    roiinfo : pandas series or dict
        contains information about the roi. Keys will be added to this
        index containing info about the roi like bounding box
        location and size.
    crop_to_roi : bool
        flag of whether to crop polygons to roi
        (prevent overflow beyond roi edge)
    use_shapely : bool
        flag of whether to precisely determine whether an element
        belongs to an ROI using shapely polygons. Slightly slower. If
        set to False, overlapping bounding box is used as a cheap but
        less precise indicator of inclusion.
    verbose : bool
        Print progress to screen?
    monitorPrefix : str
        text to prepend to printed statements

    Returns
    -------
    Np array
        (N x 2), where pixel values encode class membership.
        IMPORTANT NOTE: Zero pixels have special meaning and do NOT
        encode specific ground truth class. Instead, they simply
        mean Outside ROI and should be IGNORED during model training
        or evaluation.
    Dict
        information about ROI

    """
    # This stores information about the ROI like bounds, slide_name, etc
    # Allows passing many parameters and good forward/backward compatibility
    if roiinfo is None:
        roiinfo = {}

    # isolate annotations that potentially overlap (belong to) mask (incl. ROI)
    overlaps = get_idxs_for_annots_overlapping_roi_by_bbox(
        element_infos, idx_for_roi=idx_for_roi, iou_thresh=iou_thresh)
    idxs_for_all_rois = _get_idxs_for_all_rois(
        GTCodes=GTCodes_df, element_infos=element_infos)
    overlaps = list(set(overlaps) - set(idxs_for_all_rois))
    elinfos_roi = element_infos.loc[[idx_for_roi] + overlaps, :]

    # Add roiinfo
    roiinfo['XMIN'] = int(np.min(elinfos_roi.xmin))
    roiinfo['YMIN'] = int(np.min(elinfos_roi.ymin))
    roiinfo['XMAX'] = int(np.max(elinfos_roi.xmax))
    roiinfo['YMAX'] = int(np.max(elinfos_roi.ymax))
    roiinfo['BBOX_WIDTH'] = roiinfo['XMAX'] - roiinfo['XMIN']
    roiinfo['BBOX_HEIGHT'] = roiinfo['YMAX'] - roiinfo['YMIN']

    # get roi polygon
    if use_shapely:
        coords, _ = _get_element_mask(
            elinfo=elinfos_roi.loc[idx_for_roi],
            slide_annotations=slide_annotations)
        roi_polygon = Polygon(coords)

    # Init mask
    ROI = np.zeros(
        (roiinfo['BBOX_HEIGHT'], roiinfo['BBOX_WIDTH']), dtype=np.uint8)

    # only parse if roi is polygonal or rectangular
    if elinfos_roi.loc[idx_for_roi, 'type'] == 'point':
        msg = 'roi cannot be a point!'
        raise Exception(msg)

    # make sure ROI is overlaid first & assigned background class if relevant
    roi_group = elinfos_roi.loc[idx_for_roi, 'group']
    GTCodes_df.loc[roi_group, 'overlay_order'] = np.min(
        GTCodes_df.loc[:, 'overlay_order']) - 1
    bck_classes = GTCodes_df.loc[
        GTCodes_df.loc[:, 'is_background_class'] == 1, :]
    if bck_classes.shape[0] > 0:
        GTCodes_df.loc[
            roi_group, 'GT_code'] = bck_classes.iloc[0, :]['GT_code']

    # Add annotations in overlay order
    overlay_orders = sorted(set(GTCodes_df.loc[:, 'overlay_order']))
    N_elements = elinfos_roi.shape[0]
    elNo = 0
    for overlay_level in overlay_orders:

        # get indices of relevant groups
        relevant_groups = list(GTCodes_df.loc[
            GTCodes_df.loc[:, 'overlay_order'] == overlay_level, 'group'])
        relIdxs = []
        for group_name in relevant_groups:
            relIdxs.extend(list(elinfos_roi.loc[
                elinfos_roi.group == group_name, :].index))

        # get relevnt infos and sort from largest to smallest (by bbox area)
        # so that the smaller elements are layered last. This helps partially
        # address issues describe in:
        # https://github.com/DigitalSlideArchive/HistomicsTK/issues/675
        elinfos_relevant = elinfos_roi.loc[relIdxs, :].copy()
        elinfos_relevant.sort_values(
            'bbox_area', axis=0, ascending=False, inplace=True)

        # Go through elements and add to ROI mask
        for _elId, elinfo in elinfos_relevant.iterrows():

            elNo += 1
            elcountStr = '%s: Overlay level %d: Element %d of %d: %s' % (
                monitorPrefix, overlay_level, elNo, N_elements,
                elinfo['group'])
            if verbose:
                print(elcountStr)

            # now add element to ROI
            ROI = _get_and_add_element_to_roi(
                elinfo=elinfo, slide_annotations=slide_annotations, ROI=ROI,
                roiinfo=roiinfo, roi_polygon=roi_polygon,
                GT_code=GTCodes_df.loc[elinfo['group'], 'GT_code'],
                use_shapely=use_shapely, verbose=verbose,
                monitorPrefix=elcountStr)

            # save a copy of ROI-only mask to crop to it later if needed
            if crop_to_roi and (overlay_level == GTCodes_df.loc[
                    roi_group, 'overlay_order']):
                roi_only_mask = ROI.copy()

    # Crop polygons to roi if needed (prevent 'overflow' beyond roi edge)
    if crop_to_roi:
        ROI[roi_only_mask == 0] = 0

    # tighten boundary --remember, so far we've use element bboxes to
    # make an over-estimated margin around ROI boundary.
    nz = np.nonzero(ROI)
    ymin, xmin = (np.min(arr) for arr in nz)
    ymax, xmax = (np.max(arr) for arr in nz)
    ROI = ROI[ymin:ymax, xmin:xmax]

    # update roi offset
    roiinfo['XMIN'] += xmin
    roiinfo['YMIN'] += ymin
    roiinfo['XMAX'] += xmin
    roiinfo['YMAX'] += ymin
    roiinfo['BBOX_WIDTH'] = roiinfo['XMAX'] - roiinfo['XMIN']
    roiinfo['BBOX_HEIGHT'] = roiinfo['YMAX'] - roiinfo['YMIN']

    return ROI, roiinfo




[docs]
def get_mask_from_slide(
        GTCodes_dict, roiinfo, slide_annotations,
        element_infos, sf=1.0, get_roi_mask_kwargs=None):
    """Parse region from the slide and get its corresponding labeled mask.

    This is a wrapper around get_roi_mask() which should be referred to for
    implementation details. If roiinfo is None, all annotations in the slide
    are parsed into labeled image (mask) form. Otherwise, the bounding box
    coordinates in roiinfo are used.

    Parameters
    ----------
    GTCodes_dict : dict
        the ground truth codes and information dict.
        This is a dict that is indexed by the annotation group name and
        each entry is in turn a dict with the following keys:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlaid
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may decide that
        any pixel inside the ROI is considered stroma.

    roiinfo : dict or None
        if not None, has keys 'XMIN', 'XMAX', 'YMIN', 'YMAX' for slide
        region coordinates (AT BASE MAGNIFICATION) to get labeled image
        (mask) for.

    sf : float
        scale factor to multiple coordinates (eg 0.5 would halve size)

    slide_annotations : list
        Make sure you have used
        scale_slide_annotations() to scale them up/down by sf BEFOREHAND.

    element_infos : pandas DataFrame.
        The columns annidx and elementidx
        encode the dict index of annotation document and element,
        respectively, in the original slide_annotations list of dictionaries.
        This can be obained by get_bboxes_from_slide_annotations() method.
        Make sure you have used scale_slide_annotations().

    get_roi_mask_kwargs : dict
        extra kwargs for get_roi_mask()

    Returns
    -------
    Np array
        (N x 2), where pixel values encode class membership.
        IMPORTANT NOTE: Zero pixels have special meaning and do NOT
        encode specific ground truth class. Instead, they simply
        mean Outside mask and should be IGNORED during model training
        or evaluation.

    Dict
        information about mask

    """
    from pandas import DataFrame

    # convert from dict to required dataframe
    if get_roi_mask_kwargs is None:
        get_roi_mask_kwargs = {}
    GTCodes = DataFrame.from_dict(GTCodes_dict, orient='index')

    # some sanity checks
    assert all(j in GTCodes.columns for j in [
        'group', 'overlay_order', 'GT_code', 'is_roi', 'is_background_class',
        'color']), 'GTCodes_dict does not follow schema'
    assert all(GTCodes.loc[:, 'GT_code'] > 0), 'All GT_code must be > 0'
    assert sf > 0, 'sf must be positive.'
    assert (roiinfo['XMAX'] > roiinfo['XMIN'])
    assert (
        roiinfo['YMAX'] > roiinfo['YMIN'])

    # use given ROI bounds, after scaling
    XMIN = int(roiinfo['XMIN'] * sf)
    YMIN = int(roiinfo['YMIN'] * sf)
    XMAX = int(roiinfo['XMAX'] * sf)
    YMAX = int(roiinfo['YMAX'] * sf)
    WIDTH = XMAX - XMIN
    HEIGHT = YMAX - YMIN

    # add to slide annotations list
    slide_annotations.append({'annotation': {
        'description': '',
        'elements': [
            {'center': [int(XMIN + WIDTH / 2), int(YMIN + HEIGHT / 2), 0],
             'width': WIDTH,
             'height': HEIGHT,
             'normal': [0, 0, 1],
             'rotation': 0,
             'group': 'super_roi',
             'label': {'value': 'super_roi'},
             'lineColor': 'rgb(0, 0, 0)',
             'fillColor': 'rgba(0, 0, 0, 0)',
             'lineWidth': 4.6,
             'type': 'rectangle'},
        ],
        'name': 'superROI'},
    })

    # add to bounding boxes dataframe
    element_infos = pd.concat([element_infos, pd.DataFrame([{
        'annidx': len(slide_annotations) - 1,
        'elementidx': 0,
        'type': 'rectangle',
        'group': 'super_roi',
        'xmin': XMIN,
        'xmax': XMAX,
        'ymin': YMIN,
        'ymax': YMAX,
        'bbox_area': WIDTH * HEIGHT,
    }])], ignore_index=True)

    # find roi and background codes to use later
    roi_codes = list(GTCodes.loc[GTCodes.loc[:, 'is_roi'] == 1, 'GT_code'])
    bck_code = GTCodes.loc[
        GTCodes.loc[:, 'is_background_class'] == 1, 'GT_code']
    if bck_code.shape[0] > 0:
        bck_code = int(bck_code.iloc[0])
    else:
        bck_code = 0

    # add to gtcodes dataframe
    assert np.max(GTCodes.loc[:, 'GT_code']) < 255
    GTCodes.loc[:, 'is_roi'] = 0  # treat other ROIs as ordinary annotations
    GTCodes.loc[:, 'is_background_class'] = 0  # we'll adjust later
    GTCodes = pd.concat([GTCodes, pd.DataFrame([{
        'GT_code': 255,
        'overlay_order': 0,
        'color': 'rgb(0,0,0)',
        'group': 'super_roi',
        'is_background_class': 0,
        'is_roi': 1,
    }])], ignore_index=True)
    GTCodes.index = GTCodes.loc[:, 'group']

    # now get mask
    ROI, roiinfo = get_roi_mask(
        slide_annotations=slide_annotations, element_infos=element_infos,
        GTCodes_df=GTCodes.copy(),
        idx_for_roi=element_infos.index[-1],  # <- bounding roi
        **get_roi_mask_kwargs)
    ROI[ROI == 255] = 0

    # replace roi codes with background code
    for roi_code in roi_codes:
        ROI[ROI == roi_code] = bck_code

    # scale back coords
    roiinfo = {k: int(v / sf) for k, v in roiinfo.items()}

    return ROI, roiinfo



def _visualize_annotations_on_rgb(
        rgb, contours_list, linewidth=0.2, x_offset=0, y_offset=0,
        text=False):
    import matplotlib.pyplot as plt

    # later on flipped by matplotlib for weird reason
    rgb = np.flipud(rgb)

    fig = plt.figure(
        figsize=(rgb.shape[1] / 1000, rgb.shape[0] / 1000), dpi=100)
    ax = plt.subplot(111)
    ax.imshow(rgb)

    plt.axis('off')
    ax = plt.gca()
    ax.set_xlim(0.0, rgb.shape[1])
    ax.set_ylim(0.0, rgb.shape[0])

    for _idx, ann in enumerate(contours_list):
        xy = np.array([
            [int(j) for j in ann[k].split(',')]
            for k in ('coords_x', 'coords_y')]).T
        xy[:, 0] = xy[:, 0] - x_offset
        xy[:, 1] = rgb.shape[0] - (xy[:, 1] - y_offset) + 1
        polygon = mpPolygon(
            xy=xy,
            color=[int(j) / 255 for j in ann['color'].split(
                'rgb(')[1][:-1].split(',')],
            closed=True, fill=False,
            linewidth=linewidth,
        )
        ax.add_patch(polygon)

        # add label text
        if text:
            txtshift = 0
            size = 1e-4 * rgb.shape[1]
            ax.text(
                int(np.min(xy[:, 0])),
                int(np.max(xy[:, 1])) - txtshift,
                ann['group'][:5],
                color='w', fontsize=size, backgroundcolor='none',
            )

    ax.axis('off')
    fig.subplots_adjust(bottom=0, top=1, left=0, right=1)

    buf = io.BytesIO()
    plt.savefig(buf, format='png', pad_inches=0, dpi=1000)
    buf.seek(0)
    rgb_vis = np.uint8(Image.open(buf))[..., :3]
    plt.close()

    return rgb_vis


def _sanity_checks(
        MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs,
        get_rgb, get_contours, get_visualization):

    # MPP precedes MAG
    if all(j is not None for j in (MPP, MAG)):
        MAG = None

    # some sanity checks

    for mf in (MPP, MAG):
        if mf is not None:
            assert mf > 0, 'MPP or MAG must be positive.'

    if mode in ['wsi', 'min_bounding_box']:
        bounds = None
        idx_for_roi = None

    if idx_for_roi is not None:
        mode = 'polygonal_bounds'
    elif bounds is not None:
        mode = 'manual_bounds'

    assert mode in [
        'wsi', 'min_bounding_box', 'manual_bounds', 'polygonal_bounds'], \
        'mode %s not recognized' % mode

    if get_visualization:
        get_contours = True
        assert get_rgb, 'cannot get visualization without rgb.'

    if not get_roi_mask_kwargs['crop_to_roi']:
        assert not get_rgb, \
            'Handling overflowing annotations while also getting RGB is not currently supported.'
        assert not get_visualization, \
            'Handling overflowing annotations while also getting RGB is not currently supported.'

    return (
        MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs,
        get_rgb, get_contours, get_visualization)


def _get_roi_bounds_by_run_mode(
        gc, slide_id, mode, bounds, element_infos, idx_for_roi, sf):

    if mode == 'polygonal_bounds':
        # get bounds based on specified polygonal/rotated roi
        elinfo = element_infos.loc[idx_for_roi]
        bounds = {
            'XMIN': int(elinfo['xmin'] / sf),
            'XMAX': int(elinfo['xmax'] / sf),
            'YMIN': int(elinfo['ymin'] / sf),
            'YMAX': int(elinfo['ymax'] / sf),
        }

    elif mode == 'manual_bounds':
        assert (bounds['XMAX'] > bounds['XMIN'])
        assert (
            bounds['YMAX'] > bounds['YMIN'])

    elif mode == 'min_bounding_box':
        # get minimum box for all annotations in slide
        bounds = {
            'XMIN': int(np.min(element_infos.xmin) / sf),
            'YMIN': int(np.min(element_infos.ymin) / sf),
            'XMAX': int(np.max(element_infos.xmax) / sf),
            'YMAX': int(np.max(element_infos.ymax) / sf),
        }
    else:
        # get scaled up/down version of mask of whole slide
        slide_info = gc.get('/item/%s/tiles' % slide_id)
        bounds = {
            'XMIN': 0,
            'XMAX': slide_info['sizeX'],
            'YMIN': 0,
            'YMAX': slide_info['sizeY'],
        }

    return bounds


def _get_rgb_and_pad_roi(gc, slide_id, bounds, appendStr, ROI, tau=10):

    getStr = \
        '/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d&encoding=PNG' \
        % (slide_id,
           bounds['XMIN'], bounds['XMAX'],
           bounds['YMIN'], bounds['YMAX'])
    getStr += appendStr
    resp = gc.get(getStr, jsonResp=False)
    rgb = get_image_from_htk_response(resp)

    # sometimes there's a couple of pixel difference d.t. rounding, so pad
    pad_y = rgb.shape[0] - ROI.shape[0]
    pad_x = rgb.shape[1] - ROI.shape[1]
    assert all(np.abs(j) < tau for j in (pad_y, pad_x)), \
        'too much difference in size between image and mask.  Something is wrong!'

    if pad_y > 0:
        ROI = np.pad(ROI, pad_width=((0, pad_y), (0, 0)), mode='constant')
    elif pad_y < 0:
        ROI = ROI[:pad_y, :]

    if pad_x > 0:
        ROI = np.pad(ROI, pad_width=((0, 0), (0, pad_x)), mode='constant')
    elif pad_x < 0:
        ROI = ROI[:, :pad_x]

    return rgb, ROI



[docs]
def get_image_and_mask_from_slide(
        gc, slide_id, GTCodes_dict,
        MPP=5.0, MAG=None, mode='min_bounding_box',
        bounds=None, idx_for_roi=None,
        slide_annotations=None, element_infos=None,
        get_roi_mask_kwargs=None, get_contours_kwargs=None, linewidth=0.2,
        get_rgb=True, get_contours=True, get_visualization=True, tau=10):
    """Parse region from the slide and get its corresponding labeled mask.

    This is a wrapper around get_roi_mask() which should be referred to for
    implementation details.

    Parameters
    ----------
    gc : object
        girder client object to make requests, for example:
        gc = girder_client.GirderClient(apiUrl = APIURL)
        gc.authenticate(interactive=True)

    slide_id : str
        girder id for item (slide)

    GTCodes_dict : dict
        the ground truth codes and information dict.
        This is a dict that is indexed by the annotation group name and
        each entry is in turn a dict with the following keys:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlaid
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may decide that
        any pixel inside the ROI is considered stroma.

    MPP : float or None
        Microns-per-pixel -- best use this as it's more well-defined than
        magnification which is more scanner/manufacturer specific.
        MPP of 0.25 often roughly translates to 40x

    MAG : float or None
        If you prefer to use whatever magnification is reported in slide.
        If neither MPP or MAG is provided, everything is retrieved without
        scaling at base (scan) magnification.

    mode : str
        This specifies which part of the slide to get the mask from. Allowed
        modes include the following
        - wsi: get scaled up/down version of mask of whole slide
        - min_bounding_box: get minimum box for all annotations in slide
        - manual_bounds: use given ROI bounds provided by the 'bounds' param
        - polygonal_bounds: use the idx_for_roi param to get coordinates

    bounds : dict or None
        if not None, has keys 'XMIN', 'XMAX', 'YMIN', 'YMAX' for slide
        region coordinates (AT BASE MAGNIFICATION) to get labeled image
        (mask) for. Use this with the 'manual_bounds' run mode.

    idx_for_roi : int
        index of ROI within the element_infos dataframe.
        Use this with the 'polygonal_bounds' run mode.

    slide_annotations : list or None
        Give this parameter to avoid re-getting slide annotations. If you do
        provide the annotations, though, make sure you have used
        scale_slide_annotations() to scale them up/down by sf BEFOREHAND.

    element_infos : pandas DataFrame.
        The columns annidx and elementidx
        encode the dict index of annotation document and element,
        respectively, in the original slide_annotations list of dictionaries.
        This can be obained by get_bboxes_from_slide_annotations() method.
        Make sure you have used scale_slide_annotations().

    get_roi_mask_kwargs : dict
        extra kwargs for get_roi_mask()

    get_contours_kwargs : dict
        extra kwargs for get_contours_from_mask()

    linewidth : float
        visualization line width

    get_rgb: bool
        get rgb image?

    get_contours : bool
        get annotation contours? (relative to final mask)

    get_visualization : bool
        get overlaid annotation bounds over RGB for visualization

    tau : int
        maximum difference (in pixels) between fetched image and mask allowed.
        Above this threshold, an error is raised indicating you may have some
        problem in your parameters or elsewhere. If the difference is less then
        tau, the rgb image and mask are resized to match each other before
        being returned

    Returns
    -------
    dict
        Results dict containing one or more of the following keys
        bounds: dict of bounds at scan magnification
        ROI - (mxn) labeled image (mask)
        rgb - (mxnx3 np array) corresponding rgb image
        contours - list, each entry is a dict version of a row from the output
        of masks_to_annotations_handler.get_contours_from_mask()
        visualization - (mxnx3 np array) visualization overlay

    """
    from pandas import DataFrame

    get_roi_mask_kwargs = get_roi_mask_kwargs or {}
    get_contours_kwargs = get_contours_kwargs or {}
    # important sanity checks
    (MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs,
     get_rgb, get_contours, get_visualization) = _sanity_checks(
        MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs,
        get_rgb, get_contours, get_visualization)

    # calculate the scale factor
    sf, appendStr = get_scale_factor_and_appendStr(
        gc=gc, slide_id=slide_id, MPP=MPP, MAG=MAG)

    if slide_annotations is not None:
        assert element_infos is not None, 'must also provide element_infos'
    else:
        # get annotations for slide
        slide_annotations = gc.get('/annotation/item/' + slide_id)

        # scale up/down annotations by a factor
        slide_annotations = scale_slide_annotations(slide_annotations, sf=sf)

        # get bounding box information for all annotations -> scaled by sf
        element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # Determine get region based on run mode, keeping in mind that it
    # must be at BASE MAGNIFICATION coordinates before it is passed
    # on to get_mask_from_slide()
    bounds = _get_roi_bounds_by_run_mode(
        gc=gc, slide_id=slide_id, mode=mode, bounds=bounds,
        element_infos=element_infos, idx_for_roi=idx_for_roi, sf=sf)
    result = {'bounds': bounds}

    # get mask for specified area
    if mode == 'polygonal_bounds':
        # get roi mask and info
        ROI, _ = get_roi_mask(
            slide_annotations=slide_annotations, element_infos=element_infos,
            GTCodes_df=DataFrame.from_dict(GTCodes_dict, orient='index'),
            idx_for_roi=idx_for_roi, **get_roi_mask_kwargs)
    else:
        ROI, _ = get_mask_from_slide(
            GTCodes_dict=GTCodes_dict, roiinfo=copy.deepcopy(bounds),
            slide_annotations=slide_annotations, element_infos=element_infos,
            sf=sf, get_roi_mask_kwargs=get_roi_mask_kwargs)

    # get RGB
    if get_rgb:
        rgb, ROI = _get_rgb_and_pad_roi(
            gc=gc, slide_id=slide_id, bounds=bounds,
            appendStr=appendStr, ROI=ROI, tau=tau)
        result['rgb'] = rgb

    # pack result (we have to do it here in case of padding)
    result['ROI'] = ROI

    # get contours
    if get_contours:
        contours_list = get_contours_from_mask(
            MASK=ROI,
            GTCodes_df=DataFrame.from_dict(GTCodes_dict, orient='index'),
            **get_contours_kwargs)
        contours_list = contours_list.to_dict(orient='records')
        result['contours'] = contours_list

    # get visualization of annotations on RGB
    if get_visualization:
        result['visualization'] = _visualize_annotations_on_rgb(
            rgb=rgb, contours_list=contours_list, linewidth=linewidth)

    return result



def _roi_getter_asis(
        gc, slide_id, GTCodes_dict, slide_annotations, element_infos,
        get_kwargs, monitor='', verbose=False):
    """Download special ROI regions as-is, even if they are very large."""
    from pandas import DataFrame

    # get idx of all 'special' roi annotations
    GTCodes_df = DataFrame.from_dict(GTCodes_dict, orient='index')
    idxs_for_all_rois = _get_idxs_for_all_rois(
        GTCodes=GTCodes_df, element_infos=element_infos)

    # go through rois and download as-is
    for roino, idx_for_roi in enumerate(idxs_for_all_rois):

        roistr = '%s: roi %d of %d' % (
            monitor, roino + 1, len(idxs_for_all_rois))
        if verbose:
            print(roistr)

        try:
            roi_out = get_image_and_mask_from_slide(
                gc=gc, slide_id=slide_id, GTCodes_dict=GTCodes_dict,
                mode='polygonal_bounds', idx_for_roi=idx_for_roi,
                slide_annotations=slide_annotations,
                element_infos=element_infos, **get_kwargs)
        except Exception as e:
            problem = '\n   '
            problem += e.__repr__()
            problem += '\n'
            warn(problem)  # noqa B028
            roi_out = None

        yield roi_out


def _roi_getter_tiled(
        gc, slide_id, GTCodes_dict, slide_annotations, element_infos,
        sf, max_roiside,
        get_kwargs, monitor='', verbose=False):
    """Download special ROI regions in a tiled fashion."""
    # isolate rois
    rois = element_infos.loc[element_infos.loc[:, 'group'] == 'roi', :].copy()

    # split ROIs into max_roiside tiled regions
    for roidx, roi in rois.iterrows():

        # bounds for tiled sub-rois
        xbounds = list(np.arange(roi['xmin'], roi['xmax'], max_roiside))
        xbounds.append(roi['xmax'])
        ybounds = list(np.arange(roi['ymin'], roi['ymax'], max_roiside))
        ybounds.append(roi['ymax'])

        roidx += 1
        roistr = f'{monitor}: roi {roidx} of {rois.shape[0]}'
        if verbose:
            print(roistr)

        subroidx = 0
        nsubrois = (len(xbounds) - 1) * (len(ybounds) - 1)

        # go through tiled sub-rois
        for xi, xmin in enumerate(xbounds[:-1]):
            xmax = xbounds[xi + 1]
            for yi, ymin in enumerate(ybounds[:-1]):
                ymax = ybounds[yi + 1]

                subroidx += 1
                subroistr = f'{roistr}: sub-roi {subroidx} of {nsubrois}'
                if verbose:
                    print(subroistr)

                # get specified region
                get_kwargs['bounds'] = {
                    'XMIN': xmin / sf,
                    'XMAX': xmax / sf,
                    'YMIN': ymin / sf,
                    'YMAX': ymax / sf,
                }
                try:
                    roi_out = get_image_and_mask_from_slide(
                        gc=gc, slide_id=slide_id, GTCodes_dict=GTCodes_dict,
                        mode='manual_bounds',
                        slide_annotations=slide_annotations,
                        element_infos=element_infos,
                        **get_kwargs)
                except Exception as e:
                    problem = '\n'
                    problem += e.__repr__()
                    problem += '\n'
                    warn(problem)  # noqa B028
                    roi_out = None

                yield roi_out



[docs]
def get_all_rois_from_slide(  # noqa: C901
        gc, slide_id, GTCodes_dict, save_directories,
        get_image_and_mask_from_slide_kwargs=None, max_roiside=None,
        slide_name=None, verbose=True, monitorPrefix=''):
    """Parse annotations and saves ground truth masks for ALL ROIs.

    Get all ROIs in a single slide. This is mainly uses
    get_image_and_mask_from_slide(), which should be referred to
    for implementation details.

    Parameters
    ----------
    gc : object
        girder client object to make requests, for example:
        gc = girder_client.GirderClient(apiUrl = APIURL)
        gc.authenticate(interactive=True)

    slide_id : str
        girder id for item (slide)

    GTCodes_dict : dict
        the ground truth codes and information dict.
        This is a dict that is indexed by the annotation group name and
        each entry is in turn a dict with the following keys:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlaid
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may decide that
        any pixel inside the ROI is considered stroma.

    save_directories : dict
        paths to directories to save data. Each entry is a string, and the
        following keys are allowed
        - ROI: path to save masks (labeled images)
        - rgb: path to save rgb images
        - contours: path to save annotation contours
        - visualization: path to save rgb visualization overlays

    get_image_and_mask_from_slide_kwargs : dict
        kwargs to pass to get_image_and_mask_from_slide()
        default values are assigned if speceific parameters are not given.

    max_roiside : int or None
        If int, this is the maximum allowed side for a downloaded region. If
        a region-of-interest is larger than this size, then it is tiled into
        non-overlapping regions whose maximal side is max_roiside.
        If None, the ROI is downloaded as-is, even if it was extremely large.
        If you know your slides have very large ROI annotations, the safer
        option is to set a max_roiside. A good value may be 5000-8000 pixels.

    slide_name : str or None
        If not given, it's inferred using a server request using girder client.

    verbose : bool
        Print progress to screen?

    monitorPrefix : str
        text to prepend to printed statements

    Returns
    -------
    list of dicts
        each entry contains the following keys
        - ROI: path to saved mask (labeled image)
        - rgb: path to saved rgb image
        - contours: path to saved annotation contours
        - visualization: path to saved rgb visualization overlay

    """
    from pandas import DataFrame

    # assign defaults if nothing given
    default_keyvalues = {
        'MPP': 5.0,
        'MAG': None,
        'get_roi_mask_kwargs': {
            'iou_thresh': 0.0, 'crop_to_roi': True,
            'use_shapely': True, 'verbose': False},
        'get_contours_kwargs': {
            'groups_to_get': None,
            'roi_group': 'roi',
            'get_roi_contour': True,
            'discard_nonenclosed_background': True,
            'background_group': 'mostly_stroma',
            'MIN_SIZE': 10, 'MAX_SIZE': None,
            'verbose': False, 'monitorPrefix': '',
        },
        'get_rgb': True,
        'get_contours': True,
        'get_visualization': True,
    }

    kvp = get_image_and_mask_from_slide_kwargs or {}  # for easy referencing
    for k, v in default_keyvalues.items():
        if k not in kvp.keys():
            kvp[k] = v

    # convert to df and sanity check
    GTCodes_df = DataFrame.from_dict(GTCodes_dict, orient='index')
    if any(GTCodes_df.loc[:, 'GT_code'] <= 0):
        msg = 'All GT_code must be > 0'
        raise Exception(msg)

    # if not given, assign name of first file associated with girder item
    if slide_name is None:
        resp = gc.get('/item/%s/files' % slide_id)
        slide_name = resp[0]['name']
        slide_name = slide_name[:slide_name.rfind('.')]

    # get annotations for slide
    slide_annotations = gc.get('/annotation/item/' + slide_id)

    # scale up/down annotations by a factor
    sf, _ = get_scale_factor_and_appendStr(
        gc=gc, slide_id=slide_id, MPP=kvp['MPP'], MAG=kvp['MAG'])
    slide_annotations = scale_slide_annotations(slide_annotations, sf=sf)

    # get bounding box information for all annotations
    element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # define roi_getter, which yields one roi at a time
    if max_roiside is None:
        roig = _roi_getter_asis(
            gc=gc, slide_id=slide_id, GTCodes_dict=GTCodes_dict,
            slide_annotations=slide_annotations, element_infos=element_infos,
            get_kwargs=kvp, monitor=monitorPrefix, verbose=verbose,
        )
    else:
        roig = _roi_getter_tiled(
            gc=gc, slide_id=slide_id, GTCodes_dict=GTCodes_dict,
            slide_annotations=slide_annotations, element_infos=element_infos,
            sf=sf, max_roiside=max_roiside,
            get_kwargs=kvp, monitor=monitorPrefix, verbose=verbose,
        )

    savenames = []

    for roi_out in roig:

        # if something went wrong, just move on
        if roi_out is None:
            continue

        # now save roi (mask, rgb, contours, vis)

        this_roi_savenames = {}
        ROINAMESTR = '%s_left-%d_top-%d_bottom-%d_right-%d' % (
            slide_name,
            roi_out['bounds']['XMIN'], roi_out['bounds']['YMIN'],
            roi_out['bounds']['YMAX'], roi_out['bounds']['XMAX'])

        for imtype in ['ROI', 'rgb', 'visualization']:
            if imtype in roi_out.keys():
                savename = os.path.join(
                    save_directories[imtype], ROINAMESTR + '.png')
                if verbose:
                    print('   Saving %s\n' % savename)
                imwrite(im=roi_out[imtype], uri=savename)
                this_roi_savenames[imtype] = savename

        if 'contours' in roi_out.keys():
            savename = os.path.join(
                save_directories['contours'], ROINAMESTR + '.csv')
            if verbose:
                print('   Saving %s\n' % savename)
            contours_df = DataFrame(roi_out['contours'])
            contours_df.to_csv(savename)
            this_roi_savenames['contours'] = savename

        savenames.append(this_roi_savenames)

    return savenames