from __future__ import division
from functools import partial
import warnings
import dlib
from pathlib import Path
import numpy as np
from menpo.feature import no_op
from menpo.base import name_of_callable
from menpofit import checks
from menpofit.visualize import print_progress
from menpofit.fitter import (
noisy_shape_from_bounding_box,
MultiScaleNonParametricFitter,
generate_perturbations_from_gt,
)
from menpofit.builder import (
scale_images,
rescale_images_to_reference_shape,
compute_reference_shape,
)
from menpofit.result import Result
from .algorithm import DlibAlgorithm
[docs]class DlibERT(MultiScaleNonParametricFitter):
r"""
Class for training a multi-scale Ensemble of Regression Trees model. This
class uses the implementation provided by the official DLib package
(http://dlib.net/) and makes it multi-scale.
Parameters
----------
images : `list` of `menpo.image.Image`
The `list` of training images.
group : `str` or ``None``, optional
The landmark group that corresponds to the ground truth shape of each
image. If ``None`` and the images only have a single landmark group,
then that is the one that will be used. Note that all the training
images need to have the specified landmark group.
bounding_box_group_glob : `glob` or ``None``, optional
Glob that defines the bounding boxes to be used for training. If
``None``, then the bounding boxes of the ground truth shapes are used.
reference_shape : `menpo.shape.PointCloud` or ``None``, optional
The reference shape that will be used for normalising the size of the
training images. The normalization is performed by rescaling all the
training images so that the scale of their ground truth shapes
matches the scale of the reference shape. Note that the reference
shape is rescaled with respect to the `diagonal` before performing
the normalisation. If ``None``, then the mean shape will be used.
diagonal : `int` or ``None``, optional
This parameter is used to rescale the reference shape so that the
diagonal of its bounding box matches the provided value. In other
words, this parameter controls the size of the model at the highest
scale. If ``None``, then the reference shape does not get rescaled.
scales : `float` or `tuple` of `float`, optional
The scale value of each scale. They must provided in ascending order,
i.e. from lowest to highest scale. If `float`, then a single scale is
assumed.
n_perturbations : `int` or ``None``, optional
The number of perturbations to be generated from each of the bounding
boxes using `perturb_from_gt_bounding_box`. Note that the total
number of perturbations is `n_perturbations * n_dlib_perturbations`.
perturb_from_gt_bounding_box : `function`, optional
The function that will be used to generate the perturbations.
n_dlib_perturbations : `int` or ``None`` or `list` of those, optional
The number of perturbations to be generated from the part of DLib. DLib
calls this "oversampling amount". If `list`, it must specify a value per
scale. Note that the total number of perturbations is
`n_perturbations * n_dlib_perturbations`.
n_iterations : `int` or `list` of `int`, optional
The number of iterations (cascades) of each level. If `list`, it must
specify a value per scale. If `int`, then it defines the total number of
iterations (cascades) over all scales.
feature_padding : `float` or `list` of `float`, optional
When we randomly sample the pixels for the feature pool we do so in a
box fit around the provided training landmarks. By default, this box
is the tightest box that contains the landmarks. However, you can
expand or shrink the size of the pixel sampling region by setting a
different value of padding. To explain this precisely, for a padding
of 0 we say that the pixels are sampled from a box of size 1x1. The
padding value is added to each side of the box. So a padding of 0.5
would cause the algorithm to sample pixels from a box that was 2x2,
effectively multiplying the area pixels are sampled from by 4.
Similarly, setting the padding to -0.2 would cause it to sample from
a box 0.6x0.6 in size. If `list`, it must specify a value per scale.
n_pixel_pairs : `int` or `list` of `int`, optional
`P` parameter from [1]. At each level of the cascade we randomly sample
pixels from the image. These pixels are used to generate features for
the random trees. So in general larger settings of this parameter
give better accuracy but make the algorithm run slower. If `list`, it
must specify a value per scale.
distance_prior_weighting : `float` or `list` of `float`, optional
To decide how to split nodes in the regression trees the algorithm
looks at pairs of pixels in the image. These pixel pairs are sampled
randomly but with a preference for selecting pixels that are near
each other. This parameter controls this "nearness" preference. In
particular, smaller values will make the algorithm prefer to select
pixels close together and larger values will make it care less about
picking nearby pixel pairs. Note that this is the inverse of how it is
defined in [1]. For this object, you should think of
`distance_prior_weighting` as "the fraction of the bounding box will
we traverse to find a neighboring pixel". Nominally, this is
normalized between 0 and 1. So reasonable settings are values in the
range (0, 1). If `list`, it must specify a value per scale.
regularisation_weight : `float` or `list` of `float`, optional
Boosting regularization parameter - `nu` from [1]. Larger values may
cause overfitting but improve performance on training data. If `list`,
it must specify a value per scale.
n_split_tests : `int` or `list` of `int`, optional
When generating the random trees we randomly sample `n_split_tests`
possible split features at each node and pick the one that gives the
best split. Larger values of this parameter will usually give more
accurate outputs but take longer to train. It is equivalent of `S`
from [1]. If `list`, it must specify a value per scale.
n_trees : `int` or `list` of `int`, optional
Number of trees created for each cascade. The total number of trees
in the learned model is equal n_trees * n_tree_levels. Equivalent to
`K` from [1]. If `list`, it must specify a value per scale.
n_tree_levels : `int` or `list` of `int`, optional
The number of levels in the tree (depth of tree). In particular,
there are pow(2, n_tree_levels) leaves in each tree. Equivalent to
`F` from [1]. If `list`, it must specify a value per scale.
verbose : `bool`, optional
If ``True``, then the progress of building ERT will be printed.
References
----------
.. [1] V. Kazemi, and J. Sullivan. "One millisecond face alignment with
an ensemble of regression trees." Proceedings of the IEEE Conference
on Computer Vision and Pattern Recognition. 2014.
"""
def __init__(
self,
images,
group=None,
bounding_box_group_glob=None,
reference_shape=None,
diagonal=None,
scales=(0.5, 1.0),
n_perturbations=30,
n_dlib_perturbations=1,
perturb_from_gt_bounding_box=noisy_shape_from_bounding_box,
n_iterations=10,
feature_padding=0,
n_pixel_pairs=400,
distance_prior_weighting=0.1,
regularisation_weight=0.1,
n_split_tests=20,
n_trees=500,
n_tree_levels=5,
verbose=False,
):
checks.check_diagonal(diagonal)
scales = checks.check_scales(scales)
n_scales = len(scales)
# Dummy option that is required by _prepare_image of MultiFitter.
holistic_features = checks.check_callable(no_op, n_scales)
# Call superclass
super(DlibERT, self).__init__(
scales=scales,
reference_shape=reference_shape,
holistic_features=holistic_features,
algorithms=[],
)
# Set parameters
self.diagonal = diagonal
self.n_perturbations = n_perturbations
self.n_iterations = checks.check_max_iters(n_iterations, n_scales)
self._perturb_from_gt_bounding_box = perturb_from_gt_bounding_box
# DLib options
self._setup_dlib_options(
feature_padding,
n_pixel_pairs,
distance_prior_weighting,
regularisation_weight,
n_split_tests,
n_trees,
n_dlib_perturbations,
n_tree_levels,
)
# Set-up algorithms
for j in range(self.n_scales):
self.algorithms.append(
DlibAlgorithm(
self._dlib_options_templates[j], n_iterations=self.n_iterations[j]
)
)
# Train DLIB over multiple scales
self._train(
images,
group=group,
bounding_box_group_glob=bounding_box_group_glob,
verbose=verbose,
)
def _setup_dlib_options(
self,
feature_padding,
n_pixel_pairs,
distance_prior_weighting,
regularisation_weight,
n_split_tests,
n_trees,
n_dlib_perturbations,
n_tree_levels,
):
check_int = partial(checks.check_multi_scale_param, self.n_scales, (int,))
check_float = partial(checks.check_multi_scale_param, self.n_scales, (float,))
feature_padding = check_int("feature_padding", feature_padding)
n_pixel_pairs = check_int("n_pixel_pairs", n_pixel_pairs)
distance_prior_weighting = check_float(
"distance_prior_weighting", distance_prior_weighting
)
regularisation_weight = check_float(
"regularisation_weight", regularisation_weight
)
n_split_tests = check_int("n_split_tests", n_split_tests)
n_trees = check_int("n_trees", n_trees)
n_dlib_perturbations = check_int("n_dlib_perturbations", n_dlib_perturbations)
n_tree_levels = check_int("n_tree_levels", n_tree_levels)
self._dlib_options_templates = []
for j in range(self.n_scales):
new_opts = dlib.shape_predictor_training_options()
# Size of region within which to sample features for the feature
# pool, e.g a padding of 0.5 would cause the algorithm to sample
# pixels from a box that was 2x2 pixels
new_opts.feature_pool_region_padding = feature_padding[j]
# P parameter from Kazemi paper
new_opts.feature_pool_size = n_pixel_pairs[j]
# Controls how tight the feature sampling should be. Lower values
# enforce closer features. Opposite of explanation from Kazemi
# paper, lambda
new_opts.lambda_param = distance_prior_weighting[j]
# Boosting regularization parameter - nu from Kazemi paper, larger
# values may cause overfitting but improve performance on training
# data
new_opts.nu = regularisation_weight[j]
# S from Kazemi paper - Number of split features at each node to
# sample. The one that gives the best split is chosen.
new_opts.num_test_splits = n_split_tests[j]
# K from Kazemi paper - number of weak regressors
new_opts.num_trees_per_cascade_level = n_trees[j]
# R from Kazemi paper - amount of times other shapes are sampled
# as example initialisations
new_opts.oversampling_amount = n_dlib_perturbations[j]
# F from Kazemi paper - number of levels in the tree (depth of tree)
new_opts.tree_depth = n_tree_levels[j]
self._dlib_options_templates.append(new_opts)
def _train(
self, original_images, group=None, bounding_box_group_glob=None, verbose=False
):
# Dlib does not support incremental builds, so we must be passed a list
if not isinstance(original_images, list):
original_images = list(original_images)
# We use temporary landmark groups - so we need the group key to not be
# None
if group is None:
group = original_images[0].landmarks.group_labels[0]
# Temporarily store all the bounding boxes for rescaling
for i in original_images:
i.landmarks["__gt_bb"] = i.landmarks[group].bounding_box()
if self.reference_shape is None:
# If no reference shape was given, use the mean of the first batch
self._reference_shape = compute_reference_shape(
[i.landmarks["__gt_bb"] for i in original_images],
self.diagonal,
verbose=verbose,
)
# Rescale images wrt the scale factor between the existing
# reference_shape and their ground truth (group) bboxes
images = rescale_images_to_reference_shape(
original_images, "__gt_bb", self.reference_shape, verbose=verbose
)
# Scaling is done - remove temporary gt bounding boxes
for i, i2 in zip(original_images, images):
del i.landmarks["__gt_bb"]
del i2.landmarks["__gt_bb"]
# Create a callable that generates perturbations of the bounding boxes
# of the provided images.
generated_bb_func = generate_perturbations_from_gt(
images,
self.n_perturbations,
self._perturb_from_gt_bounding_box,
gt_group=group,
bb_group_glob=bounding_box_group_glob,
verbose=verbose,
)
# For each scale (low --> high)
for j in range(self.n_scales):
# Print progress if asked
if verbose:
if len(self.scales) > 1:
scale_prefix = " - Scale {}: ".format(j)
else:
scale_prefix = " - "
else:
scale_prefix = None
# Rescale images according to scales. Note that scale_images is smart
# enough in order not to rescale the images if the current scale
# factor equals to 1.
scaled_images, scale_transforms = scale_images(
images,
self.scales[j],
prefix=scale_prefix,
return_transforms=True,
verbose=verbose,
)
# Get bbox estimations of current scale. If we are at the first
# scale, this is done by using generated_bb_func. If we are at the
# rest of the scales, then the current bboxes are attached on the
# scaled_images with key '__ert_current_bbox_{}'.
current_bounding_boxes = []
if j == 0:
# At the first scale, the current bboxes are created by calling
# generated_bb_func.
current_bounding_boxes = [generated_bb_func(im) for im in scaled_images]
else:
# At the rest of the scales, extract the current bboxes that
# were attached to the images
msg = "{}Extracting bbox estimations from previous " "scale.".format(
scale_prefix
)
wrap = partial(
print_progress, prefix=msg, end_with_newline=False, verbose=verbose
)
for ii in wrap(scaled_images):
c_bboxes = []
for k in list(range(self.n_perturbations)):
c_key = "__ert_current_bbox_{}".format(k)
c_bboxes.append(ii.landmarks[c_key])
current_bounding_boxes.append(c_bboxes)
# Extract scaled ground truth shapes for current scale
scaled_gt_shapes = [i.landmarks[group] for i in scaled_images]
# Train the Dlib model. This returns the bbox estimations for the
# next scale.
current_bounding_boxes = self.algorithms[j].train(
scaled_images,
scaled_gt_shapes,
current_bounding_boxes,
prefix=scale_prefix,
verbose=verbose,
)
# Scale the current bbox estimations for the next level. This
# doesn't have to be done for the last scale. The only thing we need
# to do at the last scale is to remove any attached landmarks from
# the training images.
if j < (self.n_scales - 1):
for jj, image_bboxes in enumerate(current_bounding_boxes):
for k, bbox in enumerate(image_bboxes):
c_key = "__ert_current_bbox_{}".format(k)
images[jj].landmarks[c_key] = scale_transforms[jj].apply(bbox)
[docs] def fit_from_shape(self, image, initial_shape, gt_shape=None):
r"""
Fits the model to an image. Note that it is not possible to
initialise the fitting process from a shape. Thus, this method raises a
warning and calls `fit_from_bb` with the bounding box of the provided
`initial_shape`.
Parameters
----------
image : `menpo.image.Image` or subclass
The image to be fitted.
initial_shape : `menpo.shape.PointCloud`
The initial shape estimate from which the fitting procedure
will start. Note that the shape won't actually be used, only its
bounding box.
gt_shape : `menpo.shape.PointCloud`, optional
The ground truth shape associated to the image.
Returns
-------
fitting_result : :map:`MultiScaleNonParametricIterativeResult`
The result of the fitting procedure.
"""
warnings.warn(
"Fitting from an initial shape is not supported by "
"Dlib - therefore we are falling back to the tightest "
"bounding box from the given initial_shape"
)
tightest_bb = initial_shape.bounding_box()
return self.fit_from_bb(image, tightest_bb, gt_shape=gt_shape)
[docs] def fit_from_bb(self, image, bounding_box, gt_shape=None):
r"""
Fits the model to an image given an initial bounding box.
Parameters
----------
image : `menpo.image.Image` or subclass
The image to be fitted.
bounding_box : `menpo.shape.PointDirectedGraph`
The initial bounding box from which the fitting procedure
will start.
gt_shape : `menpo.shape.PointCloud`, optional
The ground truth shape associated to the image.
Returns
-------
fitting_result : :map:`MultiScaleNonParametricIterativeResult`
The result of the fitting procedure.
"""
# Generate the list of images to be fitted, as well as the correctly
# scaled initial and ground truth shapes per level. The function also
# returns the lists of affine and scale transforms per level that are
# required in order to transform the shapes at the original image
# space in the fitting result. The affine transforms refer to the
# transform introduced by the rescaling to the reference shape as well
# as potential affine transform from the features. The scale
# transforms are the Scale objects that correspond to each level's
# scale.
(
images,
bounding_boxes,
gt_shapes,
affine_transforms,
scale_transforms,
) = self._prepare_image(image, bounding_box, gt_shape=gt_shape)
# Execute multi-scale fitting
algorithm_results = self._fit(
images=images,
initial_shape=bounding_boxes[0],
affine_transforms=affine_transforms,
scale_transforms=scale_transforms,
return_costs=False,
gt_shapes=gt_shapes,
)
# Return multi-scale fitting result
return self._fitter_result(
image=image,
algorithm_results=algorithm_results,
affine_transforms=affine_transforms,
scale_transforms=scale_transforms,
gt_shape=gt_shape,
)
def __str__(self):
if self.diagonal is not None:
diagonal = self.diagonal
else:
y, x = self.reference_shape.range()
diagonal = np.sqrt(x ** 2 + y ** 2)
# Compute scale info strings
scales_info = []
lvl_str_tmplt = r""" - Scale {0}
- Cascade depth: {1}
- Depth per tree: {2}
- Trees per cascade level: {3}
- Regularisation parameter: {4:.1f}
- Feature pool of size {5} and padding {6:.1f}
- Lambda: {7:.1f}
- {8} split tests
- Perturbations generated per shape: {9}
- Total perturbations generated: {10}"""
for k, s in enumerate(self.scales):
scales_info.append(
lvl_str_tmplt.format(
s,
self._dlib_options_templates[k].cascade_depth,
self._dlib_options_templates[k].tree_depth,
self._dlib_options_templates[k].num_trees_per_cascade_level,
self._dlib_options_templates[k].nu,
self._dlib_options_templates[k].feature_pool_size,
self._dlib_options_templates[k].feature_pool_region_padding,
self._dlib_options_templates[k].lambda_param,
self._dlib_options_templates[k].num_test_splits,
self._dlib_options_templates[k].oversampling_amount,
self._dlib_options_templates[k].oversampling_amount
* self.n_perturbations,
)
)
scales_info = "\n".join(scales_info)
is_custom_perturb_func = (
self._perturb_from_gt_bounding_box != noisy_shape_from_bounding_box
)
if is_custom_perturb_func:
is_custom_perturb_func = name_of_callable(
self._perturb_from_gt_bounding_box
)
cls_str = r"""{class_title}
- Images scaled to diagonal: {diagonal:.2f}
- Perturbations generated per shape: {n_perturbations}
- Custom perturbation scheme used: {is_custom_perturb_func}
- Scales: {scales}
{scales_info}
""".format(
class_title="Ensemble of Regression Trees",
diagonal=diagonal,
n_perturbations=self.n_perturbations,
is_custom_perturb_func=is_custom_perturb_func,
scales=self.scales,
scales_info=scales_info,
)
return cls_str
[docs]class DlibWrapper(object):
r"""
Wrapper class for fitting a pre-trained ERT model. Pre-trained models are
provided by the official DLib package (http://dlib.net/).
Parameters
----------
model : `Path` or `str`
Path to the pre-trained model.
"""
def __init__(self, model):
if isinstance(model, (str, Path)):
m_path = Path(model)
if not Path(m_path).exists():
raise ValueError("Model {} does not exist.".format(m_path))
model = dlib.shape_predictor(str(m_path))
# Dlib doesn't expose any information about how the model was built,
# so we just create dummy options
self.algorithm = DlibAlgorithm(
dlib.shape_predictor_training_options(), n_iterations=0
)
self.algorithm.dlib_model = model
self.scales = [1]
[docs] def fit_from_shape(self, image, initial_shape, gt_shape=None):
r"""
Fits the model to an image. Note that it is not possible to
initialise the fitting process from a shape. Thus, this method raises a
warning and calls `fit_from_bb` with the bounding box of the provided
`initial_shape`.
Parameters
----------
image : `menpo.image.Image` or subclass
The image to be fitted.
initial_shape : `menpo.shape.PointCloud`
The initial shape estimate from which the fitting procedure
will start. Note that the shape won't actually be used, only its
bounding box.
gt_shape : `menpo.shape.PointCloud`
The ground truth shape associated to the image.
Returns
-------
fitting_result : :map:`Result`
The result of the fitting procedure.
"""
warnings.warn(
"Fitting from an initial shape is not supported by "
"Dlib - therefore we are falling back to the tightest "
"bounding box from the given initial_shape"
)
tightest_bb = initial_shape.bounding_box()
return self.fit_from_bb(image, tightest_bb, gt_shape=gt_shape)
[docs] def fit_from_bb(self, image, bounding_box, gt_shape=None):
r"""
Fits the model to an image given an initial bounding box.
Parameters
----------
image : `menpo.image.Image` or subclass
The image to be fitted.
bounding_box : `menpo.shape.PointDirectedGraph`
The initial bounding box.
gt_shape : `menpo.shape.PointCloud`
The ground truth shape associated to the image.
Returns
-------
fitting_result : :map:`Result`
The result of the fitting procedure.
"""
# We get back a NonParametricIterativeResult with one iteration,
# which is pointless. Simply convert it to a Result instance without
# passing in an initial shape.
fit_result = self.algorithm.run(image, bounding_box, gt_shape=gt_shape)
return Result(
final_shape=fit_result.final_shape,
image=image,
initial_shape=None,
gt_shape=gt_shape,
)
def __str__(self):
return "Pre-trained DLib Ensemble of Regression Trees model"