Source code for menpofit.dlib.fitter

from __future__ import division
from functools import partial
import warnings
import dlib
from pathlib import Path
import numpy as np

from menpo.feature import no_op
from menpo.base import name_of_callable

from menpofit import checks
from menpofit.visualize import print_progress
from menpofit.fitter import (
    noisy_shape_from_bounding_box,
    MultiScaleNonParametricFitter,
    generate_perturbations_from_gt,
)
from menpofit.builder import (
    scale_images,
    rescale_images_to_reference_shape,
    compute_reference_shape,
)
from menpofit.result import Result

from .algorithm import DlibAlgorithm


[docs]class DlibERT(MultiScaleNonParametricFitter): r""" Class for training a multi-scale Ensemble of Regression Trees model. This class uses the implementation provided by the official DLib package (http://dlib.net/) and makes it multi-scale. Parameters ---------- images : `list` of `menpo.image.Image` The `list` of training images. group : `str` or ``None``, optional The landmark group that corresponds to the ground truth shape of each image. If ``None`` and the images only have a single landmark group, then that is the one that will be used. Note that all the training images need to have the specified landmark group. bounding_box_group_glob : `glob` or ``None``, optional Glob that defines the bounding boxes to be used for training. If ``None``, then the bounding boxes of the ground truth shapes are used. reference_shape : `menpo.shape.PointCloud` or ``None``, optional The reference shape that will be used for normalising the size of the training images. The normalization is performed by rescaling all the training images so that the scale of their ground truth shapes matches the scale of the reference shape. Note that the reference shape is rescaled with respect to the `diagonal` before performing the normalisation. If ``None``, then the mean shape will be used. diagonal : `int` or ``None``, optional This parameter is used to rescale the reference shape so that the diagonal of its bounding box matches the provided value. In other words, this parameter controls the size of the model at the highest scale. If ``None``, then the reference shape does not get rescaled. scales : `float` or `tuple` of `float`, optional The scale value of each scale. They must provided in ascending order, i.e. from lowest to highest scale. If `float`, then a single scale is assumed. n_perturbations : `int` or ``None``, optional The number of perturbations to be generated from each of the bounding boxes using `perturb_from_gt_bounding_box`. Note that the total number of perturbations is `n_perturbations * n_dlib_perturbations`. perturb_from_gt_bounding_box : `function`, optional The function that will be used to generate the perturbations. n_dlib_perturbations : `int` or ``None`` or `list` of those, optional The number of perturbations to be generated from the part of DLib. DLib calls this "oversampling amount". If `list`, it must specify a value per scale. Note that the total number of perturbations is `n_perturbations * n_dlib_perturbations`. n_iterations : `int` or `list` of `int`, optional The number of iterations (cascades) of each level. If `list`, it must specify a value per scale. If `int`, then it defines the total number of iterations (cascades) over all scales. feature_padding : `float` or `list` of `float`, optional When we randomly sample the pixels for the feature pool we do so in a box fit around the provided training landmarks. By default, this box is the tightest box that contains the landmarks. However, you can expand or shrink the size of the pixel sampling region by setting a different value of padding. To explain this precisely, for a padding of 0 we say that the pixels are sampled from a box of size 1x1. The padding value is added to each side of the box. So a padding of 0.5 would cause the algorithm to sample pixels from a box that was 2x2, effectively multiplying the area pixels are sampled from by 4. Similarly, setting the padding to -0.2 would cause it to sample from a box 0.6x0.6 in size. If `list`, it must specify a value per scale. n_pixel_pairs : `int` or `list` of `int`, optional `P` parameter from [1]. At each level of the cascade we randomly sample pixels from the image. These pixels are used to generate features for the random trees. So in general larger settings of this parameter give better accuracy but make the algorithm run slower. If `list`, it must specify a value per scale. distance_prior_weighting : `float` or `list` of `float`, optional To decide how to split nodes in the regression trees the algorithm looks at pairs of pixels in the image. These pixel pairs are sampled randomly but with a preference for selecting pixels that are near each other. This parameter controls this "nearness" preference. In particular, smaller values will make the algorithm prefer to select pixels close together and larger values will make it care less about picking nearby pixel pairs. Note that this is the inverse of how it is defined in [1]. For this object, you should think of `distance_prior_weighting` as "the fraction of the bounding box will we traverse to find a neighboring pixel". Nominally, this is normalized between 0 and 1. So reasonable settings are values in the range (0, 1). If `list`, it must specify a value per scale. regularisation_weight : `float` or `list` of `float`, optional Boosting regularization parameter - `nu` from [1]. Larger values may cause overfitting but improve performance on training data. If `list`, it must specify a value per scale. n_split_tests : `int` or `list` of `int`, optional When generating the random trees we randomly sample `n_split_tests` possible split features at each node and pick the one that gives the best split. Larger values of this parameter will usually give more accurate outputs but take longer to train. It is equivalent of `S` from [1]. If `list`, it must specify a value per scale. n_trees : `int` or `list` of `int`, optional Number of trees created for each cascade. The total number of trees in the learned model is equal n_trees * n_tree_levels. Equivalent to `K` from [1]. If `list`, it must specify a value per scale. n_tree_levels : `int` or `list` of `int`, optional The number of levels in the tree (depth of tree). In particular, there are pow(2, n_tree_levels) leaves in each tree. Equivalent to `F` from [1]. If `list`, it must specify a value per scale. verbose : `bool`, optional If ``True``, then the progress of building ERT will be printed. References ---------- .. [1] V. Kazemi, and J. Sullivan. "One millisecond face alignment with an ensemble of regression trees." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2014. """ def __init__( self, images, group=None, bounding_box_group_glob=None, reference_shape=None, diagonal=None, scales=(0.5, 1.0), n_perturbations=30, n_dlib_perturbations=1, perturb_from_gt_bounding_box=noisy_shape_from_bounding_box, n_iterations=10, feature_padding=0, n_pixel_pairs=400, distance_prior_weighting=0.1, regularisation_weight=0.1, n_split_tests=20, n_trees=500, n_tree_levels=5, verbose=False, ): checks.check_diagonal(diagonal) scales = checks.check_scales(scales) n_scales = len(scales) # Dummy option that is required by _prepare_image of MultiFitter. holistic_features = checks.check_callable(no_op, n_scales) # Call superclass super(DlibERT, self).__init__( scales=scales, reference_shape=reference_shape, holistic_features=holistic_features, algorithms=[], ) # Set parameters self.diagonal = diagonal self.n_perturbations = n_perturbations self.n_iterations = checks.check_max_iters(n_iterations, n_scales) self._perturb_from_gt_bounding_box = perturb_from_gt_bounding_box # DLib options self._setup_dlib_options( feature_padding, n_pixel_pairs, distance_prior_weighting, regularisation_weight, n_split_tests, n_trees, n_dlib_perturbations, n_tree_levels, ) # Set-up algorithms for j in range(self.n_scales): self.algorithms.append( DlibAlgorithm( self._dlib_options_templates[j], n_iterations=self.n_iterations[j] ) ) # Train DLIB over multiple scales self._train( images, group=group, bounding_box_group_glob=bounding_box_group_glob, verbose=verbose, ) def _setup_dlib_options( self, feature_padding, n_pixel_pairs, distance_prior_weighting, regularisation_weight, n_split_tests, n_trees, n_dlib_perturbations, n_tree_levels, ): check_int = partial(checks.check_multi_scale_param, self.n_scales, (int,)) check_float = partial(checks.check_multi_scale_param, self.n_scales, (float,)) feature_padding = check_int("feature_padding", feature_padding) n_pixel_pairs = check_int("n_pixel_pairs", n_pixel_pairs) distance_prior_weighting = check_float( "distance_prior_weighting", distance_prior_weighting ) regularisation_weight = check_float( "regularisation_weight", regularisation_weight ) n_split_tests = check_int("n_split_tests", n_split_tests) n_trees = check_int("n_trees", n_trees) n_dlib_perturbations = check_int("n_dlib_perturbations", n_dlib_perturbations) n_tree_levels = check_int("n_tree_levels", n_tree_levels) self._dlib_options_templates = [] for j in range(self.n_scales): new_opts = dlib.shape_predictor_training_options() # Size of region within which to sample features for the feature # pool, e.g a padding of 0.5 would cause the algorithm to sample # pixels from a box that was 2x2 pixels new_opts.feature_pool_region_padding = feature_padding[j] # P parameter from Kazemi paper new_opts.feature_pool_size = n_pixel_pairs[j] # Controls how tight the feature sampling should be. Lower values # enforce closer features. Opposite of explanation from Kazemi # paper, lambda new_opts.lambda_param = distance_prior_weighting[j] # Boosting regularization parameter - nu from Kazemi paper, larger # values may cause overfitting but improve performance on training # data new_opts.nu = regularisation_weight[j] # S from Kazemi paper - Number of split features at each node to # sample. The one that gives the best split is chosen. new_opts.num_test_splits = n_split_tests[j] # K from Kazemi paper - number of weak regressors new_opts.num_trees_per_cascade_level = n_trees[j] # R from Kazemi paper - amount of times other shapes are sampled # as example initialisations new_opts.oversampling_amount = n_dlib_perturbations[j] # F from Kazemi paper - number of levels in the tree (depth of tree) new_opts.tree_depth = n_tree_levels[j] self._dlib_options_templates.append(new_opts) def _train( self, original_images, group=None, bounding_box_group_glob=None, verbose=False ): # Dlib does not support incremental builds, so we must be passed a list if not isinstance(original_images, list): original_images = list(original_images) # We use temporary landmark groups - so we need the group key to not be # None if group is None: group = original_images[0].landmarks.group_labels[0] # Temporarily store all the bounding boxes for rescaling for i in original_images: i.landmarks["__gt_bb"] = i.landmarks[group].bounding_box() if self.reference_shape is None: # If no reference shape was given, use the mean of the first batch self._reference_shape = compute_reference_shape( [i.landmarks["__gt_bb"] for i in original_images], self.diagonal, verbose=verbose, ) # Rescale images wrt the scale factor between the existing # reference_shape and their ground truth (group) bboxes images = rescale_images_to_reference_shape( original_images, "__gt_bb", self.reference_shape, verbose=verbose ) # Scaling is done - remove temporary gt bounding boxes for i, i2 in zip(original_images, images): del i.landmarks["__gt_bb"] del i2.landmarks["__gt_bb"] # Create a callable that generates perturbations of the bounding boxes # of the provided images. generated_bb_func = generate_perturbations_from_gt( images, self.n_perturbations, self._perturb_from_gt_bounding_box, gt_group=group, bb_group_glob=bounding_box_group_glob, verbose=verbose, ) # For each scale (low --> high) for j in range(self.n_scales): # Print progress if asked if verbose: if len(self.scales) > 1: scale_prefix = " - Scale {}: ".format(j) else: scale_prefix = " - " else: scale_prefix = None # Rescale images according to scales. Note that scale_images is smart # enough in order not to rescale the images if the current scale # factor equals to 1. scaled_images, scale_transforms = scale_images( images, self.scales[j], prefix=scale_prefix, return_transforms=True, verbose=verbose, ) # Get bbox estimations of current scale. If we are at the first # scale, this is done by using generated_bb_func. If we are at the # rest of the scales, then the current bboxes are attached on the # scaled_images with key '__ert_current_bbox_{}'. current_bounding_boxes = [] if j == 0: # At the first scale, the current bboxes are created by calling # generated_bb_func. current_bounding_boxes = [generated_bb_func(im) for im in scaled_images] else: # At the rest of the scales, extract the current bboxes that # were attached to the images msg = "{}Extracting bbox estimations from previous " "scale.".format( scale_prefix ) wrap = partial( print_progress, prefix=msg, end_with_newline=False, verbose=verbose ) for ii in wrap(scaled_images): c_bboxes = [] for k in list(range(self.n_perturbations)): c_key = "__ert_current_bbox_{}".format(k) c_bboxes.append(ii.landmarks[c_key]) current_bounding_boxes.append(c_bboxes) # Extract scaled ground truth shapes for current scale scaled_gt_shapes = [i.landmarks[group] for i in scaled_images] # Train the Dlib model. This returns the bbox estimations for the # next scale. current_bounding_boxes = self.algorithms[j].train( scaled_images, scaled_gt_shapes, current_bounding_boxes, prefix=scale_prefix, verbose=verbose, ) # Scale the current bbox estimations for the next level. This # doesn't have to be done for the last scale. The only thing we need # to do at the last scale is to remove any attached landmarks from # the training images. if j < (self.n_scales - 1): for jj, image_bboxes in enumerate(current_bounding_boxes): for k, bbox in enumerate(image_bboxes): c_key = "__ert_current_bbox_{}".format(k) images[jj].landmarks[c_key] = scale_transforms[jj].apply(bbox)
[docs] def fit_from_shape(self, image, initial_shape, gt_shape=None): r""" Fits the model to an image. Note that it is not possible to initialise the fitting process from a shape. Thus, this method raises a warning and calls `fit_from_bb` with the bounding box of the provided `initial_shape`. Parameters ---------- image : `menpo.image.Image` or subclass The image to be fitted. initial_shape : `menpo.shape.PointCloud` The initial shape estimate from which the fitting procedure will start. Note that the shape won't actually be used, only its bounding box. gt_shape : `menpo.shape.PointCloud`, optional The ground truth shape associated to the image. Returns ------- fitting_result : :map:`MultiScaleNonParametricIterativeResult` The result of the fitting procedure. """ warnings.warn( "Fitting from an initial shape is not supported by " "Dlib - therefore we are falling back to the tightest " "bounding box from the given initial_shape" ) tightest_bb = initial_shape.bounding_box() return self.fit_from_bb(image, tightest_bb, gt_shape=gt_shape)
[docs] def fit_from_bb(self, image, bounding_box, gt_shape=None): r""" Fits the model to an image given an initial bounding box. Parameters ---------- image : `menpo.image.Image` or subclass The image to be fitted. bounding_box : `menpo.shape.PointDirectedGraph` The initial bounding box from which the fitting procedure will start. gt_shape : `menpo.shape.PointCloud`, optional The ground truth shape associated to the image. Returns ------- fitting_result : :map:`MultiScaleNonParametricIterativeResult` The result of the fitting procedure. """ # Generate the list of images to be fitted, as well as the correctly # scaled initial and ground truth shapes per level. The function also # returns the lists of affine and scale transforms per level that are # required in order to transform the shapes at the original image # space in the fitting result. The affine transforms refer to the # transform introduced by the rescaling to the reference shape as well # as potential affine transform from the features. The scale # transforms are the Scale objects that correspond to each level's # scale. ( images, bounding_boxes, gt_shapes, affine_transforms, scale_transforms, ) = self._prepare_image(image, bounding_box, gt_shape=gt_shape) # Execute multi-scale fitting algorithm_results = self._fit( images=images, initial_shape=bounding_boxes[0], affine_transforms=affine_transforms, scale_transforms=scale_transforms, return_costs=False, gt_shapes=gt_shapes, ) # Return multi-scale fitting result return self._fitter_result( image=image, algorithm_results=algorithm_results, affine_transforms=affine_transforms, scale_transforms=scale_transforms, gt_shape=gt_shape, )
def __str__(self): if self.diagonal is not None: diagonal = self.diagonal else: y, x = self.reference_shape.range() diagonal = np.sqrt(x ** 2 + y ** 2) # Compute scale info strings scales_info = [] lvl_str_tmplt = r""" - Scale {0} - Cascade depth: {1} - Depth per tree: {2} - Trees per cascade level: {3} - Regularisation parameter: {4:.1f} - Feature pool of size {5} and padding {6:.1f} - Lambda: {7:.1f} - {8} split tests - Perturbations generated per shape: {9} - Total perturbations generated: {10}""" for k, s in enumerate(self.scales): scales_info.append( lvl_str_tmplt.format( s, self._dlib_options_templates[k].cascade_depth, self._dlib_options_templates[k].tree_depth, self._dlib_options_templates[k].num_trees_per_cascade_level, self._dlib_options_templates[k].nu, self._dlib_options_templates[k].feature_pool_size, self._dlib_options_templates[k].feature_pool_region_padding, self._dlib_options_templates[k].lambda_param, self._dlib_options_templates[k].num_test_splits, self._dlib_options_templates[k].oversampling_amount, self._dlib_options_templates[k].oversampling_amount * self.n_perturbations, ) ) scales_info = "\n".join(scales_info) is_custom_perturb_func = ( self._perturb_from_gt_bounding_box != noisy_shape_from_bounding_box ) if is_custom_perturb_func: is_custom_perturb_func = name_of_callable( self._perturb_from_gt_bounding_box ) cls_str = r"""{class_title} - Images scaled to diagonal: {diagonal:.2f} - Perturbations generated per shape: {n_perturbations} - Custom perturbation scheme used: {is_custom_perturb_func} - Scales: {scales} {scales_info} """.format( class_title="Ensemble of Regression Trees", diagonal=diagonal, n_perturbations=self.n_perturbations, is_custom_perturb_func=is_custom_perturb_func, scales=self.scales, scales_info=scales_info, ) return cls_str
[docs]class DlibWrapper(object): r""" Wrapper class for fitting a pre-trained ERT model. Pre-trained models are provided by the official DLib package (http://dlib.net/). Parameters ---------- model : `Path` or `str` Path to the pre-trained model. """ def __init__(self, model): if isinstance(model, (str, Path)): m_path = Path(model) if not Path(m_path).exists(): raise ValueError("Model {} does not exist.".format(m_path)) model = dlib.shape_predictor(str(m_path)) # Dlib doesn't expose any information about how the model was built, # so we just create dummy options self.algorithm = DlibAlgorithm( dlib.shape_predictor_training_options(), n_iterations=0 ) self.algorithm.dlib_model = model self.scales = [1]
[docs] def fit_from_shape(self, image, initial_shape, gt_shape=None): r""" Fits the model to an image. Note that it is not possible to initialise the fitting process from a shape. Thus, this method raises a warning and calls `fit_from_bb` with the bounding box of the provided `initial_shape`. Parameters ---------- image : `menpo.image.Image` or subclass The image to be fitted. initial_shape : `menpo.shape.PointCloud` The initial shape estimate from which the fitting procedure will start. Note that the shape won't actually be used, only its bounding box. gt_shape : `menpo.shape.PointCloud` The ground truth shape associated to the image. Returns ------- fitting_result : :map:`Result` The result of the fitting procedure. """ warnings.warn( "Fitting from an initial shape is not supported by " "Dlib - therefore we are falling back to the tightest " "bounding box from the given initial_shape" ) tightest_bb = initial_shape.bounding_box() return self.fit_from_bb(image, tightest_bb, gt_shape=gt_shape)
[docs] def fit_from_bb(self, image, bounding_box, gt_shape=None): r""" Fits the model to an image given an initial bounding box. Parameters ---------- image : `menpo.image.Image` or subclass The image to be fitted. bounding_box : `menpo.shape.PointDirectedGraph` The initial bounding box. gt_shape : `menpo.shape.PointCloud` The ground truth shape associated to the image. Returns ------- fitting_result : :map:`Result` The result of the fitting procedure. """ # We get back a NonParametricIterativeResult with one iteration, # which is pointless. Simply convert it to a Result instance without # passing in an initial shape. fit_result = self.algorithm.run(image, bounding_box, gt_shape=gt_shape) return Result( final_shape=fit_result.final_shape, image=image, initial_shape=None, gt_shape=gt_shape, )
def __str__(self): return "Pre-trained DLib Ensemble of Regression Trees model"