Source code for menpofit.visualize.textutils

from collections import OrderedDict
import numpy as np

from menpo.visualize import print_progress as menpo_print_progress


[docs]def print_progress(iterable, prefix='', n_items=None, offset=0,
                   show_bar=True, show_count=True, show_eta=True,
                   end_with_newline=True, verbose=True):
    r"""
    Print the remaining time needed to compute over an iterable.

    To use, wrap an existing iterable with this function before processing in
    a for loop (see example).

    The estimate of the remaining time is based on a moving average of the last
    100 items completed in the loop.

    This method is identical to `menpo.visualize.print_progress`, but adds a
    `verbose` flag which allows the printing to be skipped if necessary.

    Parameters
    ----------
    iterable : `iterable`
        An iterable that will be processed. The iterable is passed through by
        this function, with the time taken for each complete iteration logged.
    prefix : `str`, optional
        If provided a string that will be prepended to the progress report at
        each level.
    n_items : `int`, optional
        Allows for ``iterator`` to be a generator whose length will be assumed
        to be `n_items`. If not provided, then ``iterator`` needs to be
        `Sizable`.
    offset : `int`, optional
        Useful in combination with ``n_items`` - report back the progress as
        if `offset` items have already been handled. ``n_items``  will be left
        unchanged.
    show_bar : `bool`, optional
        If False, The progress bar (e.g. [=========      ]) will be hidden.
    show_count : `bool`, optional
        If False, The item count (e.g. (4/25)) will be hidden.
    show_eta : `bool`, optional
        If False, The estimated time to finish (e.g. - 00:00:03 remaining)
        will be hidden.
    end_with_newline : `bool`, optional
        If False, there will be no new line added at the end of the dynamic
        printing. This means the next print statement will overwrite the
        dynamic report presented here. Useful if you want to follow up a
        print_progress with a second print_progress, where the second
        overwrites the first on the same line.
    verbose : `bool`, optional
        Printing is performed only if set to ``True``.

    Raises
    ------
    ValueError
        ``offset`` provided without ``n_items``

    Examples
    --------
    This for loop: ::

        from time import sleep
        for i in print_progress(range(100)):
            sleep(1)

    prints a progress report of the form: ::

        [=============       ] 70% (7/10) - 00:00:03 remaining
    """
    if verbose:
        # Yield the images from the menpo print_progress (yield from would
        # be perfect here :( )
        for i in menpo_print_progress(iterable, prefix=prefix, n_items=n_items,
                                      offset=offset, show_bar=show_bar,
                                      show_count=show_count, show_eta=show_eta,
                                      end_with_newline=end_with_newline):
            yield i
    else:
        # Skip the verbosity!
        for i in iterable:
            yield i


[docs]def statistics_table(errors, method_names, auc_max_error, auc_error_step,
                     auc_min_error=0., stats_types=None, stats_names=None,
                     sort_by=None, precision=4):
    r"""
    Function that generates a table with statistical measures on the fitting
    results of various methods using pandas. It supports multiple types of
    statistical measures.

    **Note that the returned object is a pandas table which can be further
    converted to Latex tabular or simply a string.** See the examples for
    more details.

    Parameters
    ----------
    errors : `list` of `list` of `float`
        A `list` that contains `lists` of `float` with the errors per method.
    method_names : `list` of `str`
        The `list` with the names that will appear for each method. Note that
        it must have the same length as `errors`.
    auc_max_error : `float`
        The maximum error value for computing the area under the curve.
    auc_error_step : `float`
        The sampling step of the error bins for computing the area under the
        curve.
    auc_min_error : `float`, optional
        The minimum error value for computing the area under the curve.
    stats_types : `list` of `str` or ``None``, optional
        The types of statistical measures to compute. Possible options are:

        ======== ========================================================
        Value    Description
        ======== ========================================================
        `mean`   The mean value of the errors.
        `std`    The standard deviation of the errors.
        `median` The median value of the errors.
        `mad`    The median absolute deviation of the errors.
        `max`    The max value of the errors.
        `auc`    The area under the curve based on the CED of the errors.
        `fr`     The failure rate (percentage of images that failed).
        ======== ========================================================

        If ``None``, then all of them will be used with the above order.
    stats_names : `list` of `str`, optional
        The `list` with the names that will appear for each statistical measure
        type selected in `stats_types`. Note that it must have the same
        length as `stats_types`.
    sort_by : `str` or ``None``, optional
        The column to use for sorting the methods. If ``None``, then no
        sorting is performed and the methods will appear in the provided
        order of `method_names`. Possible options are:

        ======== ========================================================
        Value    Description
        ======== ========================================================
        `mean`   The mean value of the errors.
        `std`    The standard deviation of the errors.
        `median` The median value of the errors.
        `mad`    The median absolute deviation of the errors.
        `max`    The max value of the errors.
        `auc`    The area under the curve based on the CED of the errors.
        `fr`     The failure rate (percentage of images that failed).
        ======== ========================================================

    precision : `int`, optional
        The precision of the reported values, i.e. the number of decimals.

    Raises
    ------
    ValueError
        stat_type must be selected from [mean, std, median, mad, max, auc, fr]
    ValueError
        sort_by must be selected from [mean, std, median, mad, max, auc, fr]
    ValueError
        stats_types and stats_names must have the same length

    Returns
    -------
    table : `pandas.DataFrame`
        The pandas table. It can be further converted to various format,
        such as Latex tabular or `str`.

    Examples
    --------
    Let us create some errors for 3 methods sampled from Normal distributions
    with different mean and standard deviations: ::

        import numpy as np
        from menpofit.visualize import statistics_table

        method_names = ['Method_1', 'Method_2', 'Method_3']
        errors = [list(np.random.normal(0.07, 0.02, 400)),
                  list(np.random.normal(0.06, 0.03, 400)),
                  list(np.random.normal(0.08, 0.04, 400))]

    We can create a pandas `DataFrame` as: ::

        tab = statistics_table(errors, method_names, auc_max_error=0.1,
                               auc_error_step=0.001, sort_by='auc')
        tab

    Pandas offers excellent functionalities. For example, the table can be
    converted to an `str` as: ::

        print(tab.to_string())

    or to a Latex tabular as: ::

        print(tab.to_latex())

    """
    from menpofit.error import compute_statistical_measures
    import pandas as pn

    # Make sure errors is a list of lists
    if not isinstance(errors[0], list):
        errors = [errors]

    # Compute statistics
    means, stds, medians, mads, maxs, aucs, frs = compute_statistical_measures(
            errors, step_error=auc_error_step, max_error=auc_max_error,
            min_error=auc_min_error)

    # Check stats types
    supported_types = ['mean', 'std', 'median', 'mad', 'max', 'auc', 'fr']
    if stats_types is None:
        stats_types = supported_types

    # Check stats names
    if stats_names is None:
        stats_names = stats_types

    # Check stats_types and stats_names lists
    if len(stats_types) != len(stats_names):
        raise ValueError('stats_types and stats_names must have the same '
                         'length')

    # Create data dict
    data = OrderedDict()
    for stat_type, stat_name in zip(stats_types, stats_names):
        if stat_type not in supported_types:
            raise ValueError('stat_type must be selected from [mean, std, '
                             'median, mad, max, auc, fr]')
        if stat_type == 'mean':
            data[stat_name] = np.array(means)
        if stat_type == 'std':
            data[stat_name] = np.array(stds)
        if stat_type == 'median':
            data[stat_name] = np.array(medians)
        if stat_type == 'mad':
            data[stat_name] = np.array(mads)
        if stat_type == 'max':
            data[stat_name] = np.array(maxs)
        if stat_type == 'auc':
            data[stat_name] = np.array(aucs)
        if stat_type == 'fr':
            data[stat_name] = np.array(frs)

    # Create pandas table
    tab = pn.DataFrame(data, index=method_names)

    # Sort table
    ascending = True
    if sort_by is not None:
        if sort_by not in stats_types:
            raise ValueError('sort_by must be selected from [mean, std, '
                             'median, mad, max, auc, fr]')
        if sort_by == 'auc':
            ascending = False
        tab.sort_values(by=stats_names[stats_types.index(sort_by)],
                        inplace=True, ascending=ascending)

    # Set precision
    pn.set_option('precision', precision)

    return tab