Source code for common.vision.transforms.keypoint_detection
"""
@author: Junguang Jiang
@contact: [email protected]
"""
# TODO needs better documentation
import numpy as np
from PIL import ImageFilter, Image
import torchvision.transforms.functional as F
import torchvision.transforms.transforms as T
import numbers
import random
import math
import warnings
from typing import ClassVar
[docs]def wrapper(transform: ClassVar):
""" Wrap a transform for classification to a transform for keypoint detection.
Note that the keypoint detection label will keep the same before and after wrapper.
Args:
transform (class, callable): transform for classification
Returns:
transform for keypoint detection
"""
class WrapperTransform(transform):
def __call__(self, image, **kwargs):
image = super().__call__(image)
return image, kwargs
return WrapperTransform
ToTensor = wrapper(T.ToTensor)
Normalize = wrapper(T.Normalize)
ColorJitter = wrapper(T.ColorJitter)
def resize(image: Image.Image, size: int, interpolation=Image.BILINEAR,
keypoint2d: np.ndarray=None, intrinsic_matrix: np.ndarray=None):
width, height = image.size
assert width == height
factor = float(size) / float(width)
image = F.resize(image, size, interpolation)
keypoint2d = np.copy(keypoint2d)
keypoint2d *= factor
intrinsic_matrix = np.copy(intrinsic_matrix)
intrinsic_matrix[0][0] *= factor
intrinsic_matrix[0][2] *= factor
intrinsic_matrix[1][1] *= factor
intrinsic_matrix[1][2] *= factor
return image, keypoint2d, intrinsic_matrix
def crop(image: Image.Image, top, left, height, width, keypoint2d: np.ndarray):
image = F.crop(image, top, left, height, width)
keypoint2d = np.copy(keypoint2d)
keypoint2d[:, 0] -= left
keypoint2d[:, 1] -= top
return image, keypoint2d
[docs]def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINEAR,
keypoint2d: np.ndarray=None, intrinsic_matrix: np.ndarray=None):
"""Crop the given PIL Image and resize it to desired size.
Notably used in :class:`~torchvision.transforms.RandomResizedCrop`.
Args:
img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
top (int): Vertical component of the top left corner of the crop box.
left (int): Horizontal component of the top left corner of the crop box.
height (int): Height of the crop box.
width (int): Width of the crop box.
size (sequence or int): Desired output size. Same semantics as ``resize``.
interpolation (int, optional): Desired interpolation. Default is
``PIL.Image.BILINEAR``.
Returns:
PIL Image: Cropped image.
"""
assert isinstance(img, Image.Image), 'img should be PIL Image'
img, keypoint2d = crop(img, top, left, height, width, keypoint2d)
img, keypoint2d, intrinsic_matrix = resize(img, size, interpolation, keypoint2d, intrinsic_matrix)
return img, keypoint2d, intrinsic_matrix
[docs]def center_crop(image, output_size, keypoint2d: np.ndarray):
"""Crop the given PIL Image and resize it to desired size.
Args:
img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
output_size (sequence or int): (height, width) of the crop box. If int,
it is used for both directions
Returns:
PIL Image: Cropped image.
"""
width, height = image.size
crop_height, crop_width = output_size
crop_top = int(round((height - crop_height) / 2.))
crop_left = int(round((width - crop_width) / 2.))
return crop(image, crop_top, crop_left, crop_height, crop_width, keypoint2d)
def hflip(image: Image.Image, keypoint2d: np.ndarray):
width, height = image.size
image = F.hflip(image)
keypoint2d = np.copy(keypoint2d)
keypoint2d[:, 0] = width - 1. - keypoint2d[:, 0]
return image, keypoint2d
def rotate(image: Image.Image, angle, keypoint2d: np.ndarray):
image = F.rotate(image, angle)
angle = -np.deg2rad(angle)
keypoint2d = np.copy(keypoint2d)
rotation_matrix = np.array([
[np.cos(angle), -np.sin(angle)],
[np.sin(angle), np.cos(angle)]
])
width, height = image.size
keypoint2d[:, 0] = keypoint2d[:, 0] - width / 2
keypoint2d[:, 1] = keypoint2d[:, 1] - height / 2
keypoint2d = np.matmul(rotation_matrix, keypoint2d.T).T
keypoint2d[:, 0] = keypoint2d[:, 0] + width / 2
keypoint2d[:, 1] = keypoint2d[:, 1] + height / 2
return image, keypoint2d
def resize_pad(img, keypoint2d, size, interpolation=Image.BILINEAR):
w, h = img.size
if w < h:
oh = size
ow = int(size * w / h)
img = img.resize((ow, oh), interpolation)
pad_top = pad_bottom = 0
pad_left = math.floor((size - ow) / 2)
pad_right = math.ceil((size - ow) / 2)
keypoint2d = keypoint2d * oh / h
keypoint2d[:, 0] += (size - ow) / 2
else:
ow = size
oh = int(size * h / w)
img = img.resize((ow, oh), interpolation)
pad_top = math.floor((size - oh) / 2)
pad_bottom = math.ceil((size - oh) / 2)
pad_left = pad_right = 0
keypoint2d = keypoint2d * ow / w
keypoint2d[:, 1] += (size - oh) / 2
keypoint2d[:, 0] += (size - ow) / 2
img = np.asarray(img)
img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), 'constant', constant_values=0)
return Image.fromarray(img), keypoint2d
[docs]class Compose(object):
"""Composes several transforms together.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, **kwargs):
for t in self.transforms:
image, kwargs = t(image, **kwargs)
return image, kwargs
class GaussianBlur(object):
def __init__(self, low=0, high=0.8):
self.low = low
self.high = high
def __call__(self, image: Image, **kwargs):
radius = np.random.uniform(low=self.low, high=self.high)
image = image.filter(ImageFilter.GaussianBlur(radius))
return image, kwargs
[docs]class Resize(object):
"""Resize the input PIL Image to the given size.
"""
def __init__(self, size, interpolation=Image.BILINEAR):
assert isinstance(size, int)
self.size = size
self.interpolation = interpolation
def __call__(self, image, keypoint2d: np.ndarray, intrinsic_matrix: np.ndarray, **kwargs):
image, keypoint2d, intrinsic_matrix = resize(image, self.size, self.interpolation, keypoint2d, intrinsic_matrix)
kwargs.update(keypoint2d=keypoint2d, intrinsic_matrix=intrinsic_matrix)
if 'depth' in kwargs:
kwargs['depth'] = F.resize(kwargs['depth'], self.size)
return image, kwargs
[docs]class ResizePad(object):
"""Pad the given image on all sides with the given "pad" value to resize the image to the given size.
"""
def __init__(self, size, interpolation=Image.BILINEAR):
self.size = size
self.interpolation = interpolation
def __call__(self, img, keypoint2d, **kwargs):
image, keypoint2d = resize_pad(img, keypoint2d, self.size, self.interpolation)
kwargs.update(keypoint2d=keypoint2d)
return image, kwargs
[docs]class CenterCrop(object):
"""Crops the given PIL Image at the center.
"""
def __init__(self, size):
if isinstance(size, numbers.Number):
self.size = (int(size), int(size))
else:
self.size = size
def __call__(self, image, keypoint2d, **kwargs):
"""
Args:
img (PIL Image): Image to be cropped.
Returns:
PIL Image: Cropped image.
"""
image, keypoint2d = center_crop(image, self.size, keypoint2d)
kwargs.update(keypoint2d=keypoint2d)
if 'depth' in kwargs:
kwargs['depth'] = F.center_crop(kwargs['depth'], self.size)
return image, kwargs
[docs]class RandomRotation(object):
"""Rotate the image by angle.
Args:
degrees (sequence or float or int): Range of degrees to select from.
If degrees is a number instead of sequence like (min, max), the range of degrees
will be (-degrees, +degrees).
"""
def __init__(self, degrees):
if isinstance(degrees, numbers.Number):
if degrees < 0:
raise ValueError("If degrees is a single number, it must be positive.")
self.degrees = (-degrees, degrees)
else:
if len(degrees) != 2:
raise ValueError("If degrees is a sequence, it must be of len 2.")
self.degrees = degrees
[docs] @staticmethod
def get_params(degrees):
"""Get parameters for ``rotate`` for a random rotation.
Returns:
sequence: params to be passed to ``rotate`` for random rotation.
"""
angle = random.uniform(degrees[0], degrees[1])
return angle
def __call__(self, image, keypoint2d, **kwargs):
"""
Args:
img (PIL Image): Image to be rotated.
Returns:
PIL Image: Rotated image.
"""
angle = self.get_params(self.degrees)
image, keypoint2d = rotate(image, angle, keypoint2d)
kwargs.update(keypoint2d=keypoint2d)
if 'depth' in kwargs:
kwargs['depth'] = F.rotate(kwargs['depth'], angle)
return image, kwargs
[docs]class RandomResizedCrop(object):
"""Crop the given PIL Image to random size and aspect ratio.
A crop of random size (default: of 0.08 to 1.0) of the original size and a random
aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
is finally resized to given size.
This is popularly used to train the Inception networks.
Args:
size: expected output size of each edge
scale: range of size of the origin size cropped
ratio: range of aspect ratio of the origin aspect ratio cropped
interpolation: Default: PIL.Image.BILINEAR
"""
def __init__(self, size, scale=(0.6, 1.3), interpolation=Image.BILINEAR):
self.size = size
if scale[0] > scale[1]:
warnings.warn("range should be of kind (min, max)")
self.interpolation = interpolation
self.scale = scale
[docs] @staticmethod
def get_params(img, scale):
"""Get parameters for ``crop`` for a random sized crop.
Args:
img (PIL Image): Image to be cropped.
scale (tuple): range of size of the origin size cropped
Returns:
tuple: params (i, j, h, w) to be passed to ``crop`` for a random
sized crop.
"""
width, height = img.size
area = height * width
for attempt in range(10):
target_area = random.uniform(*scale) * area
aspect_ratio = 1
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if 0 < w <= width and 0 < h <= height:
i = random.randint(0, height - h)
j = random.randint(0, width - w)
return i, j, h, w
# Fallback to whole image
return 0, 0, height, width
def __call__(self, image, keypoint2d: np.ndarray, intrinsic_matrix: np.ndarray, **kwargs):
"""
Args:
img (PIL Image): Image to be cropped and resized.
Returns:
PIL Image: Randomly cropped and resized image.
"""
i, j, h, w = self.get_params(image, self.scale)
image, keypoint2d, intrinsic_matrix = resized_crop(image, i, j, h, w, self.size, self.interpolation, keypoint2d, intrinsic_matrix)
kwargs.update(keypoint2d=keypoint2d, intrinsic_matrix=intrinsic_matrix)
if 'depth' in kwargs:
kwargs['depth'] = F.resized_crop(kwargs['depth'], i, j, h, w, self.size, self.interpolation,)
return image, kwargs
[docs]class RandomApply(T.RandomTransforms):
"""Apply randomly a list of transformations with a given probability.
Args:
transforms (list or tuple or torch.nn.Module): list of transformations
p (float): probability
"""
def __init__(self, transforms, p=0.5):
super(RandomApply, self).__init__(transforms)
self.p = p
def __call__(self, image, **kwargs):
if self.p < random.random():
return image, kwargs
for t in self.transforms:
image, kwargs = t(image, **kwargs)
return image, kwargs