Source code for histomicstk.annotations_and_masks.annotations_to_masks_handler

"""
Created on Mon Aug 12 18:33:48 2019.

@author: tageldim

"""

import copy
import io
import os
from warnings import warn

import numpy as np
import pandas as pd
from imageio import imwrite
from matplotlib.patches import Polygon as mpPolygon
from PIL import Image
from shapely.geometry.polygon import Polygon

from histomicstk.annotations_and_masks.annotation_and_mask_utils import (
    _get_and_add_element_to_roi, _get_element_mask, _get_idxs_for_all_rois,
    get_bboxes_from_slide_annotations,
    get_idxs_for_annots_overlapping_roi_by_bbox, get_image_from_htk_response,
    get_scale_factor_and_appendStr, scale_slide_annotations)
from histomicstk.annotations_and_masks.masks_to_annotations_handler import \
    get_contours_from_mask


[docs] def get_roi_mask( slide_annotations, element_infos, GTCodes_df, idx_for_roi, iou_thresh=0.0, roiinfo=None, crop_to_roi=True, use_shapely=True, verbose=False, monitorPrefix=''): """Parse annotations and gets a ground truth mask for a single ROI. This will look at all slide annotations and get ones that overlap with the region of interest (ROI) and assigns them to mask. Parameters ---------- slide_annotations : list of dicts response from server request element_infos : pandas DataFrame. The columns annidx and elementidx encode the dict index of annotation document and element, respectively, in the original slide_annotations list of dictionaries. This can be obain by get_bboxes_from_slide_annotations() method GTCodes_df : pandas Dataframe the ground truth codes and information dataframe. WARNING: Modified inside this method so pass a copy. This is a dataframe that is indexed by the annotation group name and has the following columns: - group: group name of annotation (string), eg. mostly_tumor - overlay_order: int, how early to place the annotation in the mask. Larger values means this annotation group is overlaid last and overwrites whatever overlaps it. - GT_code: int, desired ground truth code (in the mask) Pixels of this value belong to corresponding group (class) - is_roi: Flag for whether this group encodes an ROI - is_background_class: Flag, whether this group is the default fill value inside the ROI. For example, you may decide that any pixel inside the ROI is considered stroma. idx_for_roi : int index of ROI within the element_infos dataframe. iou_thresh : float how much bounding box overlap is enough to consider an annotation to belong to the region of interest roiinfo : pandas series or dict contains information about the roi. Keys will be added to this index containing info about the roi like bounding box location and size. crop_to_roi : bool flag of whether to crop polygons to roi (prevent overflow beyond roi edge) use_shapely : bool flag of whether to precisely determine whether an element belongs to an ROI using shapely polygons. Slightly slower. If set to False, overlapping bounding box is used as a cheap but less precise indicator of inclusion. verbose : bool Print progress to screen? monitorPrefix : str text to prepend to printed statements Returns ------- Np array (N x 2), where pixel values encode class membership. IMPORTANT NOTE: Zero pixels have special meaning and do NOT encode specific ground truth class. Instead, they simply mean Outside ROI and should be IGNORED during model training or evaluation. Dict information about ROI """ # This stores information about the ROI like bounds, slide_name, etc # Allows passing many parameters and good forward/backward compatibility if roiinfo is None: roiinfo = {} # isolate annotations that potentially overlap (belong to) mask (incl. ROI) overlaps = get_idxs_for_annots_overlapping_roi_by_bbox( element_infos, idx_for_roi=idx_for_roi, iou_thresh=iou_thresh) idxs_for_all_rois = _get_idxs_for_all_rois( GTCodes=GTCodes_df, element_infos=element_infos) overlaps = list(set(overlaps) - set(idxs_for_all_rois)) elinfos_roi = element_infos.loc[[idx_for_roi] + overlaps, :] # Add roiinfo roiinfo['XMIN'] = int(np.min(elinfos_roi.xmin)) roiinfo['YMIN'] = int(np.min(elinfos_roi.ymin)) roiinfo['XMAX'] = int(np.max(elinfos_roi.xmax)) roiinfo['YMAX'] = int(np.max(elinfos_roi.ymax)) roiinfo['BBOX_WIDTH'] = roiinfo['XMAX'] - roiinfo['XMIN'] roiinfo['BBOX_HEIGHT'] = roiinfo['YMAX'] - roiinfo['YMIN'] # get roi polygon if use_shapely: coords, _ = _get_element_mask( elinfo=elinfos_roi.loc[idx_for_roi], slide_annotations=slide_annotations) roi_polygon = Polygon(coords) # Init mask ROI = np.zeros( (roiinfo['BBOX_HEIGHT'], roiinfo['BBOX_WIDTH']), dtype=np.uint8) # only parse if roi is polygonal or rectangular if elinfos_roi.loc[idx_for_roi, 'type'] == 'point': msg = 'roi cannot be a point!' raise Exception(msg) # make sure ROI is overlaid first & assigned background class if relevant roi_group = elinfos_roi.loc[idx_for_roi, 'group'] GTCodes_df.loc[roi_group, 'overlay_order'] = np.min( GTCodes_df.loc[:, 'overlay_order']) - 1 bck_classes = GTCodes_df.loc[ GTCodes_df.loc[:, 'is_background_class'] == 1, :] if bck_classes.shape[0] > 0: GTCodes_df.loc[ roi_group, 'GT_code'] = bck_classes.iloc[0, :]['GT_code'] # Add annotations in overlay order overlay_orders = sorted(set(GTCodes_df.loc[:, 'overlay_order'])) N_elements = elinfos_roi.shape[0] elNo = 0 for overlay_level in overlay_orders: # get indices of relevant groups relevant_groups = list(GTCodes_df.loc[ GTCodes_df.loc[:, 'overlay_order'] == overlay_level, 'group']) relIdxs = [] for group_name in relevant_groups: relIdxs.extend(list(elinfos_roi.loc[ elinfos_roi.group == group_name, :].index)) # get relevnt infos and sort from largest to smallest (by bbox area) # so that the smaller elements are layered last. This helps partially # address issues describe in: # https://github.com/DigitalSlideArchive/HistomicsTK/issues/675 elinfos_relevant = elinfos_roi.loc[relIdxs, :].copy() elinfos_relevant.sort_values( 'bbox_area', axis=0, ascending=False, inplace=True) # Go through elements and add to ROI mask for _elId, elinfo in elinfos_relevant.iterrows(): elNo += 1 elcountStr = '%s: Overlay level %d: Element %d of %d: %s' % ( monitorPrefix, overlay_level, elNo, N_elements, elinfo['group']) if verbose: print(elcountStr) # now add element to ROI ROI = _get_and_add_element_to_roi( elinfo=elinfo, slide_annotations=slide_annotations, ROI=ROI, roiinfo=roiinfo, roi_polygon=roi_polygon, GT_code=GTCodes_df.loc[elinfo['group'], 'GT_code'], use_shapely=use_shapely, verbose=verbose, monitorPrefix=elcountStr) # save a copy of ROI-only mask to crop to it later if needed if crop_to_roi and (overlay_level == GTCodes_df.loc[ roi_group, 'overlay_order']): roi_only_mask = ROI.copy() # Crop polygons to roi if needed (prevent 'overflow' beyond roi edge) if crop_to_roi: ROI[roi_only_mask == 0] = 0 # tighten boundary --remember, so far we've use element bboxes to # make an over-estimated margin around ROI boundary. nz = np.nonzero(ROI) ymin, xmin = (np.min(arr) for arr in nz) ymax, xmax = (np.max(arr) for arr in nz) ROI = ROI[ymin:ymax, xmin:xmax] # update roi offset roiinfo['XMIN'] += xmin roiinfo['YMIN'] += ymin roiinfo['XMAX'] += xmin roiinfo['YMAX'] += ymin roiinfo['BBOX_WIDTH'] = roiinfo['XMAX'] - roiinfo['XMIN'] roiinfo['BBOX_HEIGHT'] = roiinfo['YMAX'] - roiinfo['YMIN'] return ROI, roiinfo
[docs] def get_mask_from_slide( GTCodes_dict, roiinfo, slide_annotations, element_infos, sf=1.0, get_roi_mask_kwargs=None): """Parse region from the slide and get its corresponding labeled mask. This is a wrapper around get_roi_mask() which should be referred to for implementation details. If roiinfo is None, all annotations in the slide are parsed into labeled image (mask) form. Otherwise, the bounding box coordinates in roiinfo are used. Parameters ---------- GTCodes_dict : dict the ground truth codes and information dict. This is a dict that is indexed by the annotation group name and each entry is in turn a dict with the following keys: - group: group name of annotation (string), eg. mostly_tumor - overlay_order: int, how early to place the annotation in the mask. Larger values means this annotation group is overlaid last and overwrites whatever overlaps it. - GT_code: int, desired ground truth code (in the mask) Pixels of this value belong to corresponding group (class) - is_roi: Flag for whether this group encodes an ROI - is_background_class: Flag, whether this group is the default fill value inside the ROI. For example, you may decide that any pixel inside the ROI is considered stroma. roiinfo : dict or None if not None, has keys 'XMIN', 'XMAX', 'YMIN', 'YMAX' for slide region coordinates (AT BASE MAGNIFICATION) to get labeled image (mask) for. sf : float scale factor to multiple coordinates (eg 0.5 would halve size) slide_annotations : list Make sure you have used scale_slide_annotations() to scale them up/down by sf BEFOREHAND. element_infos : pandas DataFrame. The columns annidx and elementidx encode the dict index of annotation document and element, respectively, in the original slide_annotations list of dictionaries. This can be obained by get_bboxes_from_slide_annotations() method. Make sure you have used scale_slide_annotations(). get_roi_mask_kwargs : dict extra kwargs for get_roi_mask() Returns ------- Np array (N x 2), where pixel values encode class membership. IMPORTANT NOTE: Zero pixels have special meaning and do NOT encode specific ground truth class. Instead, they simply mean Outside mask and should be IGNORED during model training or evaluation. Dict information about mask """ from pandas import DataFrame # convert from dict to required dataframe if get_roi_mask_kwargs is None: get_roi_mask_kwargs = {} GTCodes = DataFrame.from_dict(GTCodes_dict, orient='index') # some sanity checks assert all(j in GTCodes.columns for j in [ 'group', 'overlay_order', 'GT_code', 'is_roi', 'is_background_class', 'color']), 'GTCodes_dict does not follow schema' assert all(GTCodes.loc[:, 'GT_code'] > 0), 'All GT_code must be > 0' assert sf > 0, 'sf must be positive.' assert (roiinfo['XMAX'] > roiinfo['XMIN']) assert ( roiinfo['YMAX'] > roiinfo['YMIN']) # use given ROI bounds, after scaling XMIN = int(roiinfo['XMIN'] * sf) YMIN = int(roiinfo['YMIN'] * sf) XMAX = int(roiinfo['XMAX'] * sf) YMAX = int(roiinfo['YMAX'] * sf) WIDTH = XMAX - XMIN HEIGHT = YMAX - YMIN # add to slide annotations list slide_annotations.append({'annotation': { 'description': '', 'elements': [ {'center': [int(XMIN + WIDTH / 2), int(YMIN + HEIGHT / 2), 0], 'width': WIDTH, 'height': HEIGHT, 'normal': [0, 0, 1], 'rotation': 0, 'group': 'super_roi', 'label': {'value': 'super_roi'}, 'lineColor': 'rgb(0, 0, 0)', 'fillColor': 'rgba(0, 0, 0, 0)', 'lineWidth': 4.6, 'type': 'rectangle'}, ], 'name': 'superROI'}, }) # add to bounding boxes dataframe element_infos = pd.concat([element_infos, pd.DataFrame([{ 'annidx': len(slide_annotations) - 1, 'elementidx': 0, 'type': 'rectangle', 'group': 'super_roi', 'xmin': XMIN, 'xmax': XMAX, 'ymin': YMIN, 'ymax': YMAX, 'bbox_area': WIDTH * HEIGHT, }])], ignore_index=True) # find roi and background codes to use later roi_codes = list(GTCodes.loc[GTCodes.loc[:, 'is_roi'] == 1, 'GT_code']) bck_code = GTCodes.loc[ GTCodes.loc[:, 'is_background_class'] == 1, 'GT_code'] if bck_code.shape[0] > 0: bck_code = int(bck_code.iloc[0]) else: bck_code = 0 # add to gtcodes dataframe assert np.max(GTCodes.loc[:, 'GT_code']) < 255 GTCodes.loc[:, 'is_roi'] = 0 # treat other ROIs as ordinary annotations GTCodes.loc[:, 'is_background_class'] = 0 # we'll adjust later GTCodes = pd.concat([GTCodes, pd.DataFrame([{ 'GT_code': 255, 'overlay_order': 0, 'color': 'rgb(0,0,0)', 'group': 'super_roi', 'is_background_class': 0, 'is_roi': 1, }])], ignore_index=True) GTCodes.index = GTCodes.loc[:, 'group'] # now get mask ROI, roiinfo = get_roi_mask( slide_annotations=slide_annotations, element_infos=element_infos, GTCodes_df=GTCodes.copy(), idx_for_roi=element_infos.index[-1], # <- bounding roi **get_roi_mask_kwargs) ROI[ROI == 255] = 0 # replace roi codes with background code for roi_code in roi_codes: ROI[ROI == roi_code] = bck_code # scale back coords roiinfo = {k: int(v / sf) for k, v in roiinfo.items()} return ROI, roiinfo
def _visualize_annotations_on_rgb( rgb, contours_list, linewidth=0.2, x_offset=0, y_offset=0, text=False): import matplotlib.pyplot as plt # later on flipped by matplotlib for weird reason rgb = np.flipud(rgb) fig = plt.figure( figsize=(rgb.shape[1] / 1000, rgb.shape[0] / 1000), dpi=100) ax = plt.subplot(111) ax.imshow(rgb) plt.axis('off') ax = plt.gca() ax.set_xlim(0.0, rgb.shape[1]) ax.set_ylim(0.0, rgb.shape[0]) for _idx, ann in enumerate(contours_list): xy = np.array([ [int(j) for j in ann[k].split(',')] for k in ('coords_x', 'coords_y')]).T xy[:, 0] = xy[:, 0] - x_offset xy[:, 1] = rgb.shape[0] - (xy[:, 1] - y_offset) + 1 polygon = mpPolygon( xy=xy, color=[int(j) / 255 for j in ann['color'].split( 'rgb(')[1][:-1].split(',')], closed=True, fill=False, linewidth=linewidth, ) ax.add_patch(polygon) # add label text if text: txtshift = 0 size = 1e-4 * rgb.shape[1] ax.text( int(np.min(xy[:, 0])), int(np.max(xy[:, 1])) - txtshift, ann['group'][:5], color='w', fontsize=size, backgroundcolor='none', ) ax.axis('off') fig.subplots_adjust(bottom=0, top=1, left=0, right=1) buf = io.BytesIO() plt.savefig(buf, format='png', pad_inches=0, dpi=1000) buf.seek(0) rgb_vis = np.uint8(Image.open(buf))[..., :3] plt.close() return rgb_vis def _sanity_checks( MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs, get_rgb, get_contours, get_visualization): # MPP precedes MAG if all(j is not None for j in (MPP, MAG)): MAG = None # some sanity checks for mf in (MPP, MAG): if mf is not None: assert mf > 0, 'MPP or MAG must be positive.' if mode in ['wsi', 'min_bounding_box']: bounds = None idx_for_roi = None if idx_for_roi is not None: mode = 'polygonal_bounds' elif bounds is not None: mode = 'manual_bounds' assert mode in [ 'wsi', 'min_bounding_box', 'manual_bounds', 'polygonal_bounds'], \ 'mode %s not recognized' % mode if get_visualization: get_contours = True assert get_rgb, 'cannot get visualization without rgb.' if not get_roi_mask_kwargs['crop_to_roi']: assert not get_rgb, \ 'Handling overflowing annotations while also getting RGB is not currently supported.' assert not get_visualization, \ 'Handling overflowing annotations while also getting RGB is not currently supported.' return ( MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs, get_rgb, get_contours, get_visualization) def _get_roi_bounds_by_run_mode( gc, slide_id, mode, bounds, element_infos, idx_for_roi, sf): if mode == 'polygonal_bounds': # get bounds based on specified polygonal/rotated roi elinfo = element_infos.loc[idx_for_roi] bounds = { 'XMIN': int(elinfo['xmin'] / sf), 'XMAX': int(elinfo['xmax'] / sf), 'YMIN': int(elinfo['ymin'] / sf), 'YMAX': int(elinfo['ymax'] / sf), } elif mode == 'manual_bounds': assert (bounds['XMAX'] > bounds['XMIN']) assert ( bounds['YMAX'] > bounds['YMIN']) elif mode == 'min_bounding_box': # get minimum box for all annotations in slide bounds = { 'XMIN': int(np.min(element_infos.xmin) / sf), 'YMIN': int(np.min(element_infos.ymin) / sf), 'XMAX': int(np.max(element_infos.xmax) / sf), 'YMAX': int(np.max(element_infos.ymax) / sf), } else: # get scaled up/down version of mask of whole slide slide_info = gc.get('/item/%s/tiles' % slide_id) bounds = { 'XMIN': 0, 'XMAX': slide_info['sizeX'], 'YMIN': 0, 'YMAX': slide_info['sizeY'], } return bounds def _get_rgb_and_pad_roi(gc, slide_id, bounds, appendStr, ROI, tau=10): getStr = \ '/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d&encoding=PNG' \ % (slide_id, bounds['XMIN'], bounds['XMAX'], bounds['YMIN'], bounds['YMAX']) getStr += appendStr resp = gc.get(getStr, jsonResp=False) rgb = get_image_from_htk_response(resp) # sometimes there's a couple of pixel difference d.t. rounding, so pad pad_y = rgb.shape[0] - ROI.shape[0] pad_x = rgb.shape[1] - ROI.shape[1] assert all(np.abs(j) < tau for j in (pad_y, pad_x)), \ 'too much difference in size between image and mask. Something is wrong!' if pad_y > 0: ROI = np.pad(ROI, pad_width=((0, pad_y), (0, 0)), mode='constant') elif pad_y < 0: ROI = ROI[:pad_y, :] if pad_x > 0: ROI = np.pad(ROI, pad_width=((0, 0), (0, pad_x)), mode='constant') elif pad_x < 0: ROI = ROI[:, :pad_x] return rgb, ROI
[docs] def get_image_and_mask_from_slide( gc, slide_id, GTCodes_dict, MPP=5.0, MAG=None, mode='min_bounding_box', bounds=None, idx_for_roi=None, slide_annotations=None, element_infos=None, get_roi_mask_kwargs=None, get_contours_kwargs=None, linewidth=0.2, get_rgb=True, get_contours=True, get_visualization=True, tau=10): """Parse region from the slide and get its corresponding labeled mask. This is a wrapper around get_roi_mask() which should be referred to for implementation details. Parameters ---------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) GTCodes_dict : dict the ground truth codes and information dict. This is a dict that is indexed by the annotation group name and each entry is in turn a dict with the following keys: - group: group name of annotation (string), eg. mostly_tumor - overlay_order: int, how early to place the annotation in the mask. Larger values means this annotation group is overlaid last and overwrites whatever overlaps it. - GT_code: int, desired ground truth code (in the mask) Pixels of this value belong to corresponding group (class) - is_roi: Flag for whether this group encodes an ROI - is_background_class: Flag, whether this group is the default fill value inside the ROI. For example, you may decide that any pixel inside the ROI is considered stroma. MPP : float or None Microns-per-pixel -- best use this as it's more well-defined than magnification which is more scanner/manufacturer specific. MPP of 0.25 often roughly translates to 40x MAG : float or None If you prefer to use whatever magnification is reported in slide. If neither MPP or MAG is provided, everything is retrieved without scaling at base (scan) magnification. mode : str This specifies which part of the slide to get the mask from. Allowed modes include the following - wsi: get scaled up/down version of mask of whole slide - min_bounding_box: get minimum box for all annotations in slide - manual_bounds: use given ROI bounds provided by the 'bounds' param - polygonal_bounds: use the idx_for_roi param to get coordinates bounds : dict or None if not None, has keys 'XMIN', 'XMAX', 'YMIN', 'YMAX' for slide region coordinates (AT BASE MAGNIFICATION) to get labeled image (mask) for. Use this with the 'manual_bounds' run mode. idx_for_roi : int index of ROI within the element_infos dataframe. Use this with the 'polygonal_bounds' run mode. slide_annotations : list or None Give this parameter to avoid re-getting slide annotations. If you do provide the annotations, though, make sure you have used scale_slide_annotations() to scale them up/down by sf BEFOREHAND. element_infos : pandas DataFrame. The columns annidx and elementidx encode the dict index of annotation document and element, respectively, in the original slide_annotations list of dictionaries. This can be obained by get_bboxes_from_slide_annotations() method. Make sure you have used scale_slide_annotations(). get_roi_mask_kwargs : dict extra kwargs for get_roi_mask() get_contours_kwargs : dict extra kwargs for get_contours_from_mask() linewidth : float visualization line width get_rgb: bool get rgb image? get_contours : bool get annotation contours? (relative to final mask) get_visualization : bool get overlaid annotation bounds over RGB for visualization tau : int maximum difference (in pixels) between fetched image and mask allowed. Above this threshold, an error is raised indicating you may have some problem in your parameters or elsewhere. If the difference is less then tau, the rgb image and mask are resized to match each other before being returned Returns ------- dict Results dict containing one or more of the following keys bounds: dict of bounds at scan magnification ROI - (mxn) labeled image (mask) rgb - (mxnx3 np array) corresponding rgb image contours - list, each entry is a dict version of a row from the output of masks_to_annotations_handler.get_contours_from_mask() visualization - (mxnx3 np array) visualization overlay """ from pandas import DataFrame get_roi_mask_kwargs = get_roi_mask_kwargs or {} get_contours_kwargs = get_contours_kwargs or {} # important sanity checks (MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs, get_rgb, get_contours, get_visualization) = _sanity_checks( MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs, get_rgb, get_contours, get_visualization) # calculate the scale factor sf, appendStr = get_scale_factor_and_appendStr( gc=gc, slide_id=slide_id, MPP=MPP, MAG=MAG) if slide_annotations is not None: assert element_infos is not None, 'must also provide element_infos' else: # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # scale up/down annotations by a factor slide_annotations = scale_slide_annotations(slide_annotations, sf=sf) # get bounding box information for all annotations -> scaled by sf element_infos = get_bboxes_from_slide_annotations(slide_annotations) # Determine get region based on run mode, keeping in mind that it # must be at BASE MAGNIFICATION coordinates before it is passed # on to get_mask_from_slide() bounds = _get_roi_bounds_by_run_mode( gc=gc, slide_id=slide_id, mode=mode, bounds=bounds, element_infos=element_infos, idx_for_roi=idx_for_roi, sf=sf) result = {'bounds': bounds} # get mask for specified area if mode == 'polygonal_bounds': # get roi mask and info ROI, _ = get_roi_mask( slide_annotations=slide_annotations, element_infos=element_infos, GTCodes_df=DataFrame.from_dict(GTCodes_dict, orient='index'), idx_for_roi=idx_for_roi, **get_roi_mask_kwargs) else: ROI, _ = get_mask_from_slide( GTCodes_dict=GTCodes_dict, roiinfo=copy.deepcopy(bounds), slide_annotations=slide_annotations, element_infos=element_infos, sf=sf, get_roi_mask_kwargs=get_roi_mask_kwargs) # get RGB if get_rgb: rgb, ROI = _get_rgb_and_pad_roi( gc=gc, slide_id=slide_id, bounds=bounds, appendStr=appendStr, ROI=ROI, tau=tau) result['rgb'] = rgb # pack result (we have to do it here in case of padding) result['ROI'] = ROI # get contours if get_contours: contours_list = get_contours_from_mask( MASK=ROI, GTCodes_df=DataFrame.from_dict(GTCodes_dict, orient='index'), **get_contours_kwargs) contours_list = contours_list.to_dict(orient='records') result['contours'] = contours_list # get visualization of annotations on RGB if get_visualization: result['visualization'] = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contours_list, linewidth=linewidth) return result
def _roi_getter_asis( gc, slide_id, GTCodes_dict, slide_annotations, element_infos, get_kwargs, monitor='', verbose=False): """Download special ROI regions as-is, even if they are very large.""" from pandas import DataFrame # get idx of all 'special' roi annotations GTCodes_df = DataFrame.from_dict(GTCodes_dict, orient='index') idxs_for_all_rois = _get_idxs_for_all_rois( GTCodes=GTCodes_df, element_infos=element_infos) # go through rois and download as-is for roino, idx_for_roi in enumerate(idxs_for_all_rois): roistr = '%s: roi %d of %d' % ( monitor, roino + 1, len(idxs_for_all_rois)) if verbose: print(roistr) try: roi_out = get_image_and_mask_from_slide( gc=gc, slide_id=slide_id, GTCodes_dict=GTCodes_dict, mode='polygonal_bounds', idx_for_roi=idx_for_roi, slide_annotations=slide_annotations, element_infos=element_infos, **get_kwargs) except Exception as e: problem = '\n ' problem += e.__repr__() problem += '\n' warn(problem) # noqa B028 roi_out = None yield roi_out def _roi_getter_tiled( gc, slide_id, GTCodes_dict, slide_annotations, element_infos, sf, max_roiside, get_kwargs, monitor='', verbose=False): """Download special ROI regions in a tiled fashion.""" # isolate rois rois = element_infos.loc[element_infos.loc[:, 'group'] == 'roi', :].copy() # split ROIs into max_roiside tiled regions for roidx, roi in rois.iterrows(): # bounds for tiled sub-rois xbounds = list(np.arange(roi['xmin'], roi['xmax'], max_roiside)) xbounds.append(roi['xmax']) ybounds = list(np.arange(roi['ymin'], roi['ymax'], max_roiside)) ybounds.append(roi['ymax']) roidx += 1 roistr = f'{monitor}: roi {roidx} of {rois.shape[0]}' if verbose: print(roistr) subroidx = 0 nsubrois = (len(xbounds) - 1) * (len(ybounds) - 1) # go through tiled sub-rois for xi, xmin in enumerate(xbounds[:-1]): xmax = xbounds[xi + 1] for yi, ymin in enumerate(ybounds[:-1]): ymax = ybounds[yi + 1] subroidx += 1 subroistr = f'{roistr}: sub-roi {subroidx} of {nsubrois}' if verbose: print(subroistr) # get specified region get_kwargs['bounds'] = { 'XMIN': xmin / sf, 'XMAX': xmax / sf, 'YMIN': ymin / sf, 'YMAX': ymax / sf, } try: roi_out = get_image_and_mask_from_slide( gc=gc, slide_id=slide_id, GTCodes_dict=GTCodes_dict, mode='manual_bounds', slide_annotations=slide_annotations, element_infos=element_infos, **get_kwargs) except Exception as e: problem = '\n' problem += e.__repr__() problem += '\n' warn(problem) # noqa B028 roi_out = None yield roi_out
[docs] def get_all_rois_from_slide( # noqa: C901 gc, slide_id, GTCodes_dict, save_directories, get_image_and_mask_from_slide_kwargs=None, max_roiside=None, slide_name=None, verbose=True, monitorPrefix=''): """Parse annotations and saves ground truth masks for ALL ROIs. Get all ROIs in a single slide. This is mainly uses get_image_and_mask_from_slide(), which should be referred to for implementation details. Parameters ---------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) GTCodes_dict : dict the ground truth codes and information dict. This is a dict that is indexed by the annotation group name and each entry is in turn a dict with the following keys: - group: group name of annotation (string), eg. mostly_tumor - overlay_order: int, how early to place the annotation in the mask. Larger values means this annotation group is overlaid last and overwrites whatever overlaps it. - GT_code: int, desired ground truth code (in the mask) Pixels of this value belong to corresponding group (class) - is_roi: Flag for whether this group encodes an ROI - is_background_class: Flag, whether this group is the default fill value inside the ROI. For example, you may decide that any pixel inside the ROI is considered stroma. save_directories : dict paths to directories to save data. Each entry is a string, and the following keys are allowed - ROI: path to save masks (labeled images) - rgb: path to save rgb images - contours: path to save annotation contours - visualization: path to save rgb visualization overlays get_image_and_mask_from_slide_kwargs : dict kwargs to pass to get_image_and_mask_from_slide() default values are assigned if speceific parameters are not given. max_roiside : int or None If int, this is the maximum allowed side for a downloaded region. If a region-of-interest is larger than this size, then it is tiled into non-overlapping regions whose maximal side is max_roiside. If None, the ROI is downloaded as-is, even if it was extremely large. If you know your slides have very large ROI annotations, the safer option is to set a max_roiside. A good value may be 5000-8000 pixels. slide_name : str or None If not given, it's inferred using a server request using girder client. verbose : bool Print progress to screen? monitorPrefix : str text to prepend to printed statements Returns ------- list of dicts each entry contains the following keys - ROI: path to saved mask (labeled image) - rgb: path to saved rgb image - contours: path to saved annotation contours - visualization: path to saved rgb visualization overlay """ from pandas import DataFrame # assign defaults if nothing given default_keyvalues = { 'MPP': 5.0, 'MAG': None, 'get_roi_mask_kwargs': { 'iou_thresh': 0.0, 'crop_to_roi': True, 'use_shapely': True, 'verbose': False}, 'get_contours_kwargs': { 'groups_to_get': None, 'roi_group': 'roi', 'get_roi_contour': True, 'discard_nonenclosed_background': True, 'background_group': 'mostly_stroma', 'MIN_SIZE': 10, 'MAX_SIZE': None, 'verbose': False, 'monitorPrefix': '', }, 'get_rgb': True, 'get_contours': True, 'get_visualization': True, } kvp = get_image_and_mask_from_slide_kwargs or {} # for easy referencing for k, v in default_keyvalues.items(): if k not in kvp.keys(): kvp[k] = v # convert to df and sanity check GTCodes_df = DataFrame.from_dict(GTCodes_dict, orient='index') if any(GTCodes_df.loc[:, 'GT_code'] <= 0): msg = 'All GT_code must be > 0' raise Exception(msg) # if not given, assign name of first file associated with girder item if slide_name is None: resp = gc.get('/item/%s/files' % slide_id) slide_name = resp[0]['name'] slide_name = slide_name[:slide_name.rfind('.')] # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # scale up/down annotations by a factor sf, _ = get_scale_factor_and_appendStr( gc=gc, slide_id=slide_id, MPP=kvp['MPP'], MAG=kvp['MAG']) slide_annotations = scale_slide_annotations(slide_annotations, sf=sf) # get bounding box information for all annotations element_infos = get_bboxes_from_slide_annotations(slide_annotations) # define roi_getter, which yields one roi at a time if max_roiside is None: roig = _roi_getter_asis( gc=gc, slide_id=slide_id, GTCodes_dict=GTCodes_dict, slide_annotations=slide_annotations, element_infos=element_infos, get_kwargs=kvp, monitor=monitorPrefix, verbose=verbose, ) else: roig = _roi_getter_tiled( gc=gc, slide_id=slide_id, GTCodes_dict=GTCodes_dict, slide_annotations=slide_annotations, element_infos=element_infos, sf=sf, max_roiside=max_roiside, get_kwargs=kvp, monitor=monitorPrefix, verbose=verbose, ) savenames = [] for roi_out in roig: # if something went wrong, just move on if roi_out is None: continue # now save roi (mask, rgb, contours, vis) this_roi_savenames = {} ROINAMESTR = '%s_left-%d_top-%d_bottom-%d_right-%d' % ( slide_name, roi_out['bounds']['XMIN'], roi_out['bounds']['YMIN'], roi_out['bounds']['YMAX'], roi_out['bounds']['XMAX']) for imtype in ['ROI', 'rgb', 'visualization']: if imtype in roi_out.keys(): savename = os.path.join( save_directories[imtype], ROINAMESTR + '.png') if verbose: print(' Saving %s\n' % savename) imwrite(im=roi_out[imtype], uri=savename) this_roi_savenames[imtype] = savename if 'contours' in roi_out.keys(): savename = os.path.join( save_directories['contours'], ROINAMESTR + '.csv') if verbose: print(' Saving %s\n' % savename) contours_df = DataFrame(roi_out['contours']) contours_df.to_csv(savename) this_roi_savenames['contours'] = savename savenames.append(this_roi_savenames) return savenames