Source code for histomicstk.annotations_and_masks.annotation_and_mask_utils

"""
Created on Sun Aug 11 22:30:06 2019.

@author: tageldim

"""
import copy
import warnings
from io import BytesIO

import numpy as np
from PIL import Image, ImageDraw
from shapely.geometry.polygon import Polygon

Image.MAX_IMAGE_PIXELS = None


[docs] def get_image_from_htk_response(resp): """Given a histomicsTK girder response, get np array image. Parameters ---------- resp : object response from server request Returns ------- Pillow Image object a pillow Image object of the image """ image_content = BytesIO(resp.content) image_content.seek(0) image = Image.open(image_content) image = image.convert('RGB') return np.uint8(image)
[docs] def scale_slide_annotations(slide_annotations, sf): """Scales up or down annotations in a slide. Works in place, but returns slide_annotations anyways. Parameters ---------- slide_annotations : list of dicts response from server request sf : float scale factor to multiply coordinates Returns ------- list of dicts """ if sf == 1.0: return slide_annotations for _annidx, ann in enumerate(slide_annotations): for element in ann['annotation']['elements']: for key in element.keys(): if key in ['height', 'width']: element[key] = int(element[key] * sf) elif key in ['center', 'points']: element[key] = (np.array(element[key]) * sf).astype(int).tolist() return slide_annotations
[docs] def get_scale_factor_and_appendStr(gc, slide_id, MPP=None, MAG=None): """Get how much is request region smaller than base. This also gets the string to append to server request for getting rgb. Parameters ---------- gc : Girder client instance gc should be authoenticated. slide_id : str girder id of slide MPP : float or None Microns-per-pixel -- best use this as it's more well-defined than magnification which is more scanner/manufacturer specific. MPP of 0.25 often roughly translates to 40x MAG : float or None If you prefer to use whatever magnification is reported in slide. If neither MPP or MAG is provided, everything is retrieved without scaling at base (scan) magnification. Returns ------- float how much smaller (0.1 means 10x smaller) is requested image compared to scan magnification (slide coordinates) str string to appnd to server request for getting slide region """ slide_info = gc.get('/item/%s/tiles' % slide_id) if (MPP is not None) and (slide_info['mm_x'] is not None): mm = 0.001 * MPP sf = slide_info['mm_x'] / mm appendStr = '&mm_x=%.8f&mm_y=%.8f' % (mm, mm) elif (MAG is not None) and (slide_info['magnification'] is not None): sf = MAG / slide_info['magnification'] appendStr = '&magnification=%.8f' % MAG else: warnings.warn( 'NO SLIDE MAGNIFICATION FOUND; BASE MAGNIFICATION USED!', RuntimeWarning, stacklevel=1) sf = 1.0 appendStr = '' return sf, appendStr
[docs] def rotate_point_list(point_list, rotation, center=(0, 0)): """Rotate a certain point list around a central point. Modified from. javascript version at: https://github.com/girder/large_image/blob/master/ ... web_client/annotations/rotate.js. Parameters ---------- point_list : list of tuples (x,y) coordinates rotation : float degrees (in radians) center : tuple of ints central point coordinates Returns ------- list of tuples """ point_list_rotated = [] for point in point_list: cos = np.cos(rotation) sin = np.sin(rotation) x = point[0] - center[0] y = point[1] - center[1] point_list_rotated.append(( int(x * cos - y * sin + center[0]), int(x * sin + y * cos + center[1]))) return point_list_rotated
[docs] def get_rotated_rectangular_coords( roi_center, roi_width, roi_height, roi_rotation=0): """Given data on rectangular ROI center/width/height/rotation. Get the unrotated abounding box coordinates around rotated ROI. This of course is applicable to any rotated rectangular annotation. Parameters ---------- roi_center : tuple or list (x, y) format roi_width : float roi_height : float roi_rotation : float Returns ------- dict includes roi corners (x, y) and bounds """ # Get false bounds x_min = roi_center[0] - int(roi_width / 2) x_max = roi_center[0] + int(roi_width / 2) y_min = roi_center[1] - int(roi_height / 2) y_max = roi_center[1] + int(roi_height / 2) # Get pseudo-corners of rectangle (before rotation) roi_corners_false = [ (x_min, y_min), (x_max, y_min), (x_max, y_max), (x_min, y_max)] # Get *true* coordinates of rectangle corners roi_corners = rotate_point_list(roi_corners_false, rotation=roi_rotation, center=roi_center) roi_corners = np.array(roi_corners, dtype=np.int32) # pack into dict roi_info = { 'roi_corners': roi_corners, 'x_min': roi_corners[:, 0].min(), 'x_max': roi_corners[:, 0].max(), 'y_min': roi_corners[:, 1].min(), 'y_max': roi_corners[:, 1].max(), } return roi_info
[docs] def get_bboxes_from_slide_annotations(slide_annotations): """Given a slide annotation list, gets information on bounding boxes. Parameters ---------- slide_annotations : list of dicts response from server request Returns ------- Pandas DataFrame The columns annidx and elementidx encode the dict index of annotation document and element, respectively, in the original slide_annotations list of dictionaries """ from pandas import DataFrame element_infos = DataFrame(columns=[ 'annidx', 'elementidx', 'type', 'group', 'xmin', 'xmax', 'ymin', 'ymax']) for annidx, ann in enumerate(slide_annotations): for elementidx, element in enumerate(ann['annotation']['elements']): elno = element_infos.shape[0] element_infos.loc[elno, 'annidx'] = annidx element_infos.loc[elno, 'elementidx'] = elementidx element_infos.loc[elno, 'type'] = element['type'] # get bounds if element['type'] == 'polyline': coords = np.array(element['points'])[:, :-1] xmin, ymin = (int(j) for j in np.min(coords, axis=0)) xmax, ymax = (int(j) for j in np.max(coords, axis=0)) elif element['type'] == 'rectangle': roiinfo = get_rotated_rectangular_coords( roi_center=element['center'], roi_width=element['width'], roi_height=element['height'], roi_rotation=element['rotation']) xmin, ymin = roiinfo['x_min'], roiinfo['y_min'] xmax, ymax = roiinfo['x_max'], roiinfo['y_max'] else: continue # add group or infer from label if 'group' in element.keys(): element_infos.loc[elno, 'group'] = element['group'] elif 'label' in element.keys(): element_infos.loc[elno, 'group'] = element['label']['value'] element_infos.loc[elno, 'xmin'] = xmin element_infos.loc[elno, 'xmax'] = xmax element_infos.loc[elno, 'ymin'] = ymin element_infos.loc[elno, 'ymax'] = ymax element_infos.loc[elno, 'bbox_area'] = int( (ymax - ymin) * (xmax - xmin)) return element_infos
def _get_coords_from_element(element): # get bounds if element['type'] == 'polyline': coords = np.int32(element['points'])[:, :-1] elif element['type'] == 'rectangle': roiinfo = get_rotated_rectangular_coords( roi_center=element['center'], roi_width=element['width'], roi_height=element['height'], roi_rotation=element['rotation']) coords = roiinfo['roi_corners'] # for rotated rectangles if element['rotation'] != 0: element['type'] = 'polyline' elif element['type'] == 'point': xmin = xmax = int(element['center'][0]) ymin = ymax = int(element['center'][1]) coords = np.array( [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax), (xmin, ymin)], dtype='int32') else: msg = 'Unsupported element type:' raise Exception(msg, element['type']) return coords def _maybe_crop_polygon(vertices, bounds_polygon): """Crop bounds to desired area using shapely polygons.""" all_vertices = [] # First, we get the polygon or polygons which result from # intersection. Keep in mind that a particular annotation may # "ecit then enter" the cropping ROI, so it may be split into # two or more polygons by the splitting process. That's which # this method's input is one set of vertices, but after cropping # it may return one or more sets of vertices try: elpoly = Polygon(vertices).buffer(0) polygon = bounds_polygon.intersection(elpoly) if polygon.geom_type in ('Polygon', 'LineString'): polygons_to_add = [polygon] else: polygons_to_add = [ p for p in polygon if p.geom_type == 'Polygon'] except Exception as e: # if weird shapely errors --> # ignore this polygon collection altogether print(e.__repr__()) return all_vertices # Second, once we have the shapely polygons, assuming no errors, # we parse into usable coordinates that we can add to the table for poly in polygons_to_add: try: if polygon.area > 2: all_vertices.append(np.array( poly.exterior.xy, dtype=np.int32).T) except Exception as e: # weird shapely errors --> ignore this polygon print(e.__repr__()) return all_vertices def _parse_coords_to_str(vertices): return ( ','.join(str(j) for j in vertices[:, 0]), ','.join(str(j) for j in vertices[:, 1])) def _add_element_to_final_df(vertices, cfg): """Add a single element to the final dataframe. Note that we wrap this into a method so that when (if) we split an annotation element into multiple polygons, we can add these separately. """ elno = cfg.element_infos.shape[0] # Add element information to element dataframe cfg.element_infos.loc[elno, 'annidx'] = cfg.annidx cfg.element_infos.loc[elno, 'annotation_girder_id'] = cfg.ann['_id'] cfg.element_infos.loc[elno, 'elementidx'] = cfg.elementidx cfg.element_infos.loc[elno, 'element_girder_id'] = cfg.element['id'] cfg.element_infos.loc[elno, 'color'] = str(cfg.element['lineColor']) # Now we can add offset to ensure coordinates are relative to the # cropping bounds (i.e. they would correspond to an RGB image # of the same region and could be used to create a mask or # to encode object boundaries etc if (cfg.cropping_bounds is not None) or ( cfg.cropping_polygon_vertices is not None): vertices[:, 0] = vertices[:, 0] - cfg.x_shift vertices[:, 1] = vertices[:, 1] - cfg.y_shift # get bounds for this polygon. Remember, these may have been # changed after it was cropped using shapely xmin, ymin = np.min(vertices, axis=0) xmax, ymax = np.max(vertices, axis=0) # parse to string for inclusion in pd dataframe x_coords, y_coords = _parse_coords_to_str(vertices) # add group or infer from label if 'group' in cfg.element.keys(): cfg.element_infos.loc[elno, 'group'] = str(cfg.element['group']) elif 'label' in cfg.element.keys(): cfg.element_infos.loc[elno, 'group'] = str( cfg.element['label']['value']) # add label or infer from group if 'label' in cfg.element.keys(): cfg.element_infos.loc[elno, 'label'] = str( cfg.element['label']['value']) elif 'group' in cfg.element.keys(): cfg.element_infos.loc[elno, 'label'] = cfg.element_infos.loc[ elno, 'group'] cfg.element_infos.loc[elno, 'type'] = str(cfg.element['type']) cfg.element_infos.loc[elno, 'xmin'] = int(xmin) cfg.element_infos.loc[elno, 'xmax'] = int(xmax) cfg.element_infos.loc[elno, 'ymin'] = int(ymin) cfg.element_infos.loc[elno, 'ymax'] = int(ymax) cfg.element_infos.loc[elno, 'bbox_area'] = int( (ymax - ymin) * (xmax - xmin)) cfg.element_infos.loc[elno, 'coords_x'] = x_coords cfg.element_infos.loc[elno, 'coords_y'] = y_coords
[docs] def parse_slide_annotations_into_tables( slide_annotations, cropping_bounds=None, cropping_polygon_vertices=None, use_shapely=False): """Given a slide annotation list, parse into convenient tabular format. If the annotation is a point, then it is just treated as if it is a rectangle with zero area (i.e. xmin=xmax). Rotated rectangles are treated as polygons for simplicity. Parameters ---------- slide_annotations : list of dicts response from server request cropping_bounds : dict or None if given, must have keys XMIN, XMAX, YMIN, YMAX. These are the bounds to which the polygons may be cropped using shapely, if the param use_shapely is True. Otherwise, the polygon coordinates are just shifted relative to these bounds without actually cropping. cropping_polygon_vertices : nd array or None if given, is an (m, 2) nd array of vertices to crop bounds. if the param use_shapely is True. Otherwise, the polygon coordinates are just shifted relative to these bounds without actually cropping. use_shapely : bool see cropping_bounds description. Returns ------- Pandas DataFrame Summary of key properties of the annotation documents. It has the following columns: - annotation_girder_id - _modelType - _version - itemId - created - creatorId - public - updated - updatedId - groups - element_count - element_details Pandas DataFrame The individual annotation elements (polygons, points, rectangles). The columns annidx and elementidx encode the dict index of annotation document and element, respectively, in the original slide_annotations list of dictionaries. It has the following columns: - annidx - annotation_girder_id - elementidx - element_girder_id - type - group - label - color - xmin - xmax - ymin - ymax - bbox_area - coords_x - coords_y """ from pandas import DataFrame # we use this object to pass params to split method into sub-methods # and avoid annoying linting ("method too complex") issue class Cfg: def __init__(self): pass cfg = Cfg() cfg.cropping_bounds = cropping_bounds cfg.cropping_polygon_vertices = cropping_polygon_vertices cfg.use_shapely = use_shapely cfg.annotation_infos = DataFrame(columns=[ 'annotation_girder_id', '_modelType', '_version', 'itemId', 'created', 'creatorId', 'public', 'updated', 'updatedId', 'groups', 'element_count', 'element_details', ]) cfg.element_infos = DataFrame(columns=[ 'annidx', 'annotation_girder_id', 'elementidx', 'element_girder_id', 'type', 'group', 'label', 'color', 'xmin', 'xmax', 'ymin', 'ymax', 'bbox_area', 'coords_x', 'coords_y', ]) if cfg.cropping_bounds is not None: assert cfg.cropping_polygon_vertices is None, \ 'either give cropping bounds or vertices, not both' xmin, xmax, ymin, ymax = ( cfg.cropping_bounds['XMIN'], cfg.cropping_bounds['XMAX'], cfg.cropping_bounds['YMIN'], cfg.cropping_bounds['YMAX']) bounds_polygon = Polygon(np.array([ (xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax), (xmin, ymin), ], dtype='int32')) cfg.x_shift = xmin cfg.y_shift = ymin elif cfg.cropping_polygon_vertices is not None: bounds_polygon = Polygon(np.int32(cfg.cropping_polygon_vertices)) cfg.x_shift, cfg.y_shift = np.min(cfg.cropping_polygon_vertices, 0) # go through annotation elements and add as needed for cfg.annidx, cfg.ann in enumerate(slide_annotations): annno = cfg.annotation_infos.shape[0] # Add annotation document info to annotations dataframe cfg.annotation_infos.loc[annno, 'annotation_girder_id'] = cfg.ann['_id'] for key in [ '_modelType', '_version', 'itemId', 'created', 'creatorId', 'public', 'updated', 'updatedId']: if key in cfg.ann: cfg.annotation_infos.loc[annno, key] = cfg.ann[key] if 'groups' in cfg.ann: cfg.annotation_infos.loc[annno, 'groups'] = str(cfg.ann['groups']) if '_elementQuery' in cfg.ann: cfg.annotation_infos.loc[annno, 'element_count'] = cfg.ann[ '_elementQuery']['count'] cfg.annotation_infos.loc[annno, 'element_details'] = cfg.ann[ '_elementQuery']['details'] for idx, element in enumerate(cfg.ann['annotation']['elements']): cfg.elementidx = idx cfg.element = element coords = _get_coords_from_element(copy.deepcopy(element)) # crop using shapely to desired bounds if needed # IMPORTANT: everything till this point needs to be # relative to the whole slide image if ((cfg.cropping_bounds is None) and (cfg.cropping_polygon_vertices is None)) \ or (element['type'] == 'point') \ or (not use_shapely): all_coords = [coords] else: all_coords = _maybe_crop_polygon(coords, bounds_polygon) # now add polygons one by one for vertices in all_coords: _add_element_to_final_df(vertices, cfg=cfg) return cfg.annotation_infos, cfg.element_infos
[docs] def np_vec_no_jit_iou(bboxes1, bboxes2): """Fast, vectorized IoU. Source: https://medium.com/@venuktan/vectorized-intersection-over-union ... -iou-in-numpy-and-tensor-flow-4fa16231b63d Parameters ---------- bboxes1 : np array columns encode bounding box corners xmin, ymin, xmax, ymax bboxes2 : np array same as bboxes 1 Returns ------- np array IoU values for each pair from bboxes1 & bboxes2 """ x11, y11, x12, y12 = np.split(bboxes1, 4, axis=1) x21, y21, x22, y22 = np.split(bboxes2, 4, axis=1) xA = np.maximum(x11, np.transpose(x21)) yA = np.maximum(y11, np.transpose(y21)) xB = np.minimum(x12, np.transpose(x22)) yB = np.minimum(y12, np.transpose(y22)) interArea = np.maximum((xB - xA + 1), 0) * np.maximum((yB - yA + 1), 0) boxAArea = (x12 - x11 + 1) * (y12 - y11 + 1) boxBArea = (x22 - x21 + 1) * (y22 - y21 + 1) iou = interArea / (boxAArea + np.transpose(boxBArea) - interArea) return iou
def _get_idxs_for_all_rois(GTCodes, element_infos): """Get indices of ROIs within the element_infos dataframe. (Internal) """ roi_labels = list(GTCodes.loc[GTCodes.loc[:, 'is_roi'] == 1, 'group']) idxs_for_all_rois = [] for idx, elinfo in element_infos.iterrows(): if elinfo.group in roi_labels: idxs_for_all_rois.append(idx) return idxs_for_all_rois
[docs] def get_idxs_for_annots_overlapping_roi_by_bbox( element_infos, idx_for_roi, iou_thresh=0.0): """Find indices of **potentially** included annotations within the ROI. We say "potentially" because this uses the IoU of roi and annotation as a fast indicator of potential inclusion. This helps dramatically scale down the number of annotations to look through. Later on, a detailed look at whether the annotation polygons actually overlap the ROI can be done. Parameters ---------- element_infos : pandas DataFrame result from running get_bboxes_from_slide_annotations() idx_for_roi : int index for roi annotation within the element_infos DF iou_thresh : float overlap threshold to be considered within ROI Returns ------- list indices relative to element_infos """ bboxes = np.array( element_infos.loc[:, ['xmin', 'ymin', 'xmax', 'ymax']], dtype='float') iou = np_vec_no_jit_iou(bboxes[idx_for_roi, :][None, ...], bboxes2=bboxes) iou = np.concatenate((np.arange(iou.shape[1])[None, ...], iou)) iou = iou[:, iou[1, :] > iou_thresh].astype(int) overlaps = set(iou[0, :].tolist()) - {idx_for_roi} return list(overlaps)
[docs] def create_mask_from_coords(coords): """Create a binary mask from given vertices coordinates. Source: This is modified from code by Juan Carlos from David Gutman Lab. Parameters ---------- coords : np array must be in the form (e.g. ([x1,y1],[x2,y2],[x3,y3],.....,[xn,yn])), where xn and yn corresponds to the nth vertex coordinate. Returns ------- np array binary mask """ polygon = coords.copy() # use the smallest bounding region, calculated from vertices min_x, min_y = np.min(polygon, axis=0) max_x, max_y = np.max(polygon, axis=0) # get the new width and height width = int(max_x - min_x) height = int(max_y - min_y) # shift all vertices to account for location of the smallest bounding box polygon[:, 0] = polygon[:, 0] - min_x polygon[:, 1] = polygon[:, 1] - min_y # convert to tuple form for masking function (nd array does not work) vertices_tuple = tuple(map(tuple, polygon)) # make the mask img = Image.new('L', (width, height), 0) ImageDraw.Draw(img).polygon(vertices_tuple, outline=1, fill=1) return np.array(img, dtype='int32')
def _get_element_mask(elinfo, slide_annotations): """Get coordinates and mask for annotation element. (Internal) """ element = slide_annotations[int(elinfo['annidx'])][ 'annotation']['elements'][int(elinfo['elementidx'])] if elinfo['type'] == 'polyline': coords = np.array(element['points'])[:, :-1] elif elinfo['type'] == 'rectangle': infoDict = get_rotated_rectangular_coords( roi_center=element['center'], roi_width=element['width'], roi_height=element['height'], roi_rotation=element['rotation']) coords = infoDict['roi_corners'] else: msg = 'cannot create mask from point annotation!' raise Exception(msg) mask = create_mask_from_coords(coords) return coords, mask def _add_element_to_roi(elinfo, ROI, GT_code, element_mask, roiinfo): """Add single polygon to ROI given mask (Internal).""" ymin = int(elinfo.ymin - roiinfo['YMIN']) ymax = int(elinfo.ymax - roiinfo['YMIN']) xmin = int(elinfo.xmin - roiinfo['XMIN']) xmax = int(elinfo.xmax - roiinfo['XMIN']) patch = ROI[ymin:ymax, xmin:xmax] # to account for non-integer pixels if element_mask.shape != patch.shape: element_mask = np.pad(element_mask, pad_width=( (patch.shape[0] - element_mask.shape[0], 0), (patch.shape[1] - element_mask.shape[1], 0)), mode='constant') # add to ROI mask patch[element_mask > 0] = GT_code ROI[ymin:ymax, xmin:xmax] = patch.copy() element = { 'mask': element_mask, 'xmin': xmin, 'xmax': xmax, 'ymin': ymin, 'ymax': ymax, } return ROI, element def _get_and_add_element_to_roi( elinfo, slide_annotations, ROI, roiinfo, roi_polygon, GT_code, use_shapely=True, verbose=True, monitorPrefix=''): """Get element coords and mask and add to ROI (Internal).""" try: coords, element_mask = _get_element_mask( elinfo=elinfo, slide_annotations=slide_annotations) ADD_TO_ROI = True # ignore if outside ROI (precise) if use_shapely: el_polygon = Polygon(coords) if el_polygon.distance(roi_polygon) > 2: if verbose: print('%s: OUTSIDE ROI.' % monitorPrefix) ADD_TO_ROI = False # Add element to ROI mask if ADD_TO_ROI: ROI, _ = _add_element_to_roi( elinfo=elinfo, ROI=ROI, GT_code=GT_code, element_mask=element_mask, roiinfo=roiinfo) except Exception as e: if verbose: print('%s: ERROR! (see below)' % monitorPrefix) print(e) return ROI
[docs] def delete_annotations_in_slide(gc, slide_id): """Delete all annotations in a slide.""" existing_annotations = gc.get('/annotation/item/' + slide_id) for ann in existing_annotations: gc.delete('/annotation/%s' % ann['_id'])
def _simple_add_element_to_roi( elinfo, ROI, roiinfo, GT_code, element=None, verbose=True, monitorPrefix=''): """Get element coords and mask and add to ROI (Internal).""" def _process_coords(k): return np.array([int(j) for j in elinfo[k].split(',')])[..., None] try: if element is None: coords = np.concatenate([ _process_coords(k) for k in ('coords_x', 'coords_y')], 1) element_mask = create_mask_from_coords(coords) else: element_mask = element['mask'] # Add element to ROI mask ROI, element = _add_element_to_roi( elinfo=elinfo, ROI=ROI, GT_code=GT_code, element_mask=element_mask, roiinfo=roiinfo) except Exception as e: if verbose: print('%s: ERROR! (see below)' % monitorPrefix) print(e) return ROI, element