Source code for flexcode.post_processing

import numpy as np

from .loss_functions import cde_loss


[docs]def normalize(cde_estimates, tol=1e-6, max_iter=200): """Normalizes conditional density estimates to be non-negative and integrate to one. Assumes densities are evaluated on the unit grid. :param cde_estimates: a numpy array or matrix of conditional density estimates. :param tol: float, the tolerance to accept for abs(area - 1). :param max_iter: int, the maximal number of search iterations. :returns: the normalized conditional density estimates. :rtype: numpy array or matrix. """ if cde_estimates.ndim == 1: _normalize(cde_estimates, tol, max_iter) else: np.apply_along_axis(_normalize, 1, cde_estimates, tol=tol, max_iter=max_iter)
[docs]def _normalize(density, tol=1e-6, max_iter=500): """Normalizes a density estimate to be non-negative and integrate to one. Assumes density is evaluated on the unit grid. :param density: a numpy array of density estimates. :param z_grid: an array, the grid points at the density is estimated. :param tol: float, the tolerance to accept for abs(area - 1). :param max_iter: int, the maximal number of search iterations. :returns: the normalized density estimate. :rtype: numpy array. """ hi = np.max(density) lo = 0.0 area = np.mean(np.maximum(density, 0.0)) if area == 0.0: # replace with uniform if all negative density density[:] = 1.0 elif area < 1: density /= area density[density < 0.0] = 0.0 return for _ in range(max_iter): mid = (hi + lo) / 2 area = np.mean(np.maximum(density - mid, 0.0)) if abs(1.0 - area) <= tol: break if area < 1.0: hi = mid else: lo = mid # update in place density -= mid density[density < 0.0] = 0.0
[docs]def sharpen(cde_estimates, alpha): """Sharpens conditional density estimates. Assumes densities are evaluated on the unit grid. :param cde_estimates: a numpy array or matrix of conditional density estimates. :param alpha: float, the exponent to which the estimate is raised. :returns: the sharpened conditional density estimate. :rtype: numpy array or matrix. """ cde_estimates **= alpha normalize(cde_estimates)
[docs]def choose_sharpen(cde_estimates, z_grid, true_z, alpha_grid): """Chooses the sharpen parameter by minimizing cde loss. :param cde_estimates: a numpy matrix of conditional density estimates :param true_z: an array of the true z values corresponding to the cde_estimates. :param alpha_grid: an array of candidate sharpen parameter values. :returns: the sharpen parameter value from alpha_grid which minimizes cde loss. :rtype: float """ best_alpha = None best_loss = np.inf for alpha in alpha_grid: tmp_estimates = cde_estimates.copy() sharpen(tmp_estimates, alpha) loss = cde_loss(tmp_estimates, z_grid, true_z) if loss < best_loss: best_loss = loss best_alpha = alpha return best_alpha
[docs]def remove_bumps(cde_estimates, delta): """Removes bumps in conditional density estimates Assumes that cde_estimates are on the unit grid. :param cde_estimates: a numpy array or matrix of conditional density estimates. :param delta: float, the threshold for bump removal :returns: the conditional density estimates with bumps removed :rtype: numpy array or matrix """ if cde_estimates.ndim == 1: _remove_bumps(cde_estimates, delta) else: np.apply_along_axis(_remove_bumps, 1, cde_estimates, delta=delta)
[docs]def _remove_bumps(density, delta): """Removes bumps in conditional density estimates. Assumes estimates are on the unit grid. :param density: a numpy array of conditional density estimate. :param delta: float, the threshold for bump removal. :returns: the conditional density estimate with bumps removed. :rtype: numpy array. """ bin_size = 1.0 / len(density) area = 0.0 left_idx = 0 removed_area = 0.0 for right_idx, val in enumerate(density): if val <= 0.0: if area < delta: density[left_idx : (right_idx + 1)] = 0.0 removed_area += area left_idx = right_idx + 1 area = 0.0 else: area += val * bin_size if area < delta: # final check at end density[left_idx:] = 0.0 removed_area += area _normalize(density)
[docs]def choose_bump_threshold(cde_estimates, z_grid, true_z, delta_grid): """Chooses the bump threshold which minimizes cde loss. :param cde_estimates: a numpy array or matrix of conditional density estimates. :param z_grid: an array, the grid points at which the density is estimated.b :param true_z: the true z values corresponding to the conditional denstity estimates. :param delta_grid: an array of candidate bump threshold values :returns: the bump threshold value from delta_grid which minimizes CDE loss :rtype: float """ best_delta = None best_loss = np.inf for delta in delta_grid: tmp_estimates = cde_estimates.copy() remove_bumps(tmp_estimates, delta) normalize(tmp_estimates) loss = cde_loss(tmp_estimates, z_grid, true_z) if loss < best_loss: best_loss = loss best_delta = delta return best_delta