Source code for flexcode.post_processing

import numpy as np

from .loss_functions import cde_loss


[docs]def normalize(cde_estimates, tol=1e-6, max_iter=200):
    """Normalizes conditional density estimates to be non-negative and
    integrate to one.

    Assumes densities are evaluated on the unit grid.

    :param cde_estimates: a numpy array or matrix of conditional density estimates.
    :param tol: float, the tolerance to accept for abs(area - 1).
    :param max_iter: int, the maximal number of search iterations.
    :returns: the normalized conditional density estimates.
    :rtype: numpy array or matrix.

    """
    if cde_estimates.ndim == 1:
        _normalize(cde_estimates, tol, max_iter)
    else:
        np.apply_along_axis(_normalize, 1, cde_estimates, tol=tol, max_iter=max_iter)


[docs]def _normalize(density, tol=1e-6, max_iter=500):
    """Normalizes a density estimate to be non-negative and integrate to
    one.

    Assumes density is evaluated on the unit grid.

    :param density: a numpy array of density estimates.
    :param z_grid: an array, the grid points at the density is estimated.
    :param tol: float, the tolerance to accept for abs(area - 1).
    :param max_iter: int, the maximal number of search iterations.
    :returns: the normalized density estimate.
    :rtype: numpy array.

    """
    hi = np.max(density)
    lo = 0.0

    area = np.mean(np.maximum(density, 0.0))
    if area == 0.0:
        # replace with uniform if all negative density
        density[:] = 1.0
    elif area < 1:
        density /= area
        density[density < 0.0] = 0.0
        return

    for _ in range(max_iter):
        mid = (hi + lo) / 2
        area = np.mean(np.maximum(density - mid, 0.0))
        if abs(1.0 - area) <= tol:
            break
        if area < 1.0:
            hi = mid
        else:
            lo = mid

    # update in place
    density -= mid
    density[density < 0.0] = 0.0


[docs]def sharpen(cde_estimates, alpha):
    """Sharpens conditional density estimates.

    Assumes densities are evaluated on the unit grid.

    :param cde_estimates: a numpy array or matrix of conditional density estimates.
    :param alpha: float, the exponent to which the estimate is raised.
    :returns: the sharpened conditional density estimate.
    :rtype: numpy array or matrix.

    """
    cde_estimates **= alpha
    normalize(cde_estimates)


[docs]def choose_sharpen(cde_estimates, z_grid, true_z, alpha_grid):
    """Chooses the sharpen parameter by minimizing cde loss.

    :param cde_estimates: a numpy matrix of conditional density estimates
    :param true_z: an array of the true z values corresponding to the cde_estimates.
    :param alpha_grid: an array of candidate sharpen parameter values.
    :returns: the sharpen parameter value from alpha_grid which minimizes cde loss.
    :rtype: float

    """
    best_alpha = None
    best_loss = np.inf
    for alpha in alpha_grid:
        tmp_estimates = cde_estimates.copy()
        sharpen(tmp_estimates, alpha)
        loss = cde_loss(tmp_estimates, z_grid, true_z)
        if loss < best_loss:
            best_loss = loss
            best_alpha = alpha
    return best_alpha


[docs]def remove_bumps(cde_estimates, delta):
    """Removes bumps in conditional density estimates

    Assumes that cde_estimates are on the unit grid.

    :param cde_estimates: a numpy array or matrix of conditional density estimates.
    :param delta: float, the threshold for bump removal
    :returns: the conditional density estimates with bumps removed
    :rtype: numpy array or matrix

    """
    if cde_estimates.ndim == 1:
        _remove_bumps(cde_estimates, delta)
    else:
        np.apply_along_axis(_remove_bumps, 1, cde_estimates, delta=delta)


[docs]def _remove_bumps(density, delta):
    """Removes bumps in conditional density estimates.

    Assumes estimates are on the unit grid.

    :param density: a numpy array of conditional density estimate.
    :param delta: float, the threshold for bump removal.
    :returns: the conditional density estimate with bumps removed.
    :rtype: numpy array.

    """
    bin_size = 1.0 / len(density)
    area = 0.0
    left_idx = 0
    removed_area = 0.0
    for right_idx, val in enumerate(density):
        if val <= 0.0:
            if area < delta:
                density[left_idx : (right_idx + 1)] = 0.0
                removed_area += area
            left_idx = right_idx + 1
            area = 0.0
        else:
            area += val * bin_size
    if area < delta:  # final check at end
        density[left_idx:] = 0.0
        removed_area += area
    _normalize(density)


[docs]def choose_bump_threshold(cde_estimates, z_grid, true_z, delta_grid):
    """Chooses the bump threshold which minimizes cde loss.

    :param cde_estimates: a numpy array or matrix of conditional density estimates.
    :param z_grid: an array, the grid points at which the density is estimated.b
    :param true_z: the true z values corresponding to the conditional
    denstity estimates.
    :param delta_grid: an array of candidate bump threshold values
    :returns: the bump threshold value from delta_grid which minimizes CDE loss
    :rtype: float

    """
    best_delta = None
    best_loss = np.inf
    for delta in delta_grid:
        tmp_estimates = cde_estimates.copy()
        remove_bumps(tmp_estimates, delta)
        normalize(tmp_estimates)
        loss = cde_loss(tmp_estimates, z_grid, true_z)
        if loss < best_loss:
            best_loss = loss
            best_delta = delta
    return best_delta