Source code for nestcheck.estimators

#!/usr/bin/env python
"""
Functions for estimating quantities from nested sampling runs.
Each estimator function should have arguments:

.. code-block:: python

    def estimator_func(self, ns_run, logw=None, simulate=False):
        ...

Any additional arguments required for the function should be keyword
arguments.

The ``logw`` argument allows the log weights for the points in the run to be
provided - this is useful if many estimators are being calculated from
the same run as it allows ``logw`` to only be calculated once. If it is not
specified, ``logw`` is calculated from the run when required.

The simulate argument is passed to ``ns_run_utils.get_logw``, and is only used
if the function needs to calculate ``logw``.
"""

import functools
import numpy as np
import scipy
import nestcheck.ns_run_utils


# Estimators
# ----------

[docs]def count_samples(ns_run, **kwargs): r"""Number of samples in run. Unlike most estimators this does not require log weights, but for convenience will not throw an error if they are specified. Parameters ---------- ns_run: dict Nested sampling run dict (see the data_processing module docstring for more details). Returns ------- int """ kwargs.pop('logw', None) kwargs.pop('simulate', None) if kwargs: raise TypeError('Unexpected **kwargs: {0}'.format(kwargs)) return ns_run['logl'].shape[0]
[docs]def logz(ns_run, logw=None, simulate=False): r"""Natural log of Bayesian evidence :math:`\log \mathcal{Z}`. Parameters ---------- ns_run: dict Nested sampling run dict (see the data_processing module docstring for more details). logw: None or 1d numpy array, optional Log weights of samples. simulate: bool, optional Passed to ns_run_utils.get_logw if logw needs to be calculated. Returns ------- float """ if logw is None: logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate) return scipy.special.logsumexp(logw)
[docs]def evidence(ns_run, logw=None, simulate=False): r"""Bayesian evidence :math:`\log \mathcal{Z}`. Parameters ---------- ns_run: dict Nested sampling run dict (see the data_processing module docstring for more details). logw: None or 1d numpy array, optional Log weights of samples. simulate: bool, optional Passed to ns_run_utils.get_logw if logw needs to be calculated. Returns ------- float """ if logw is None: logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate) return np.exp(scipy.special.logsumexp(logw))
[docs]def param_mean(ns_run, logw=None, simulate=False, param_ind=0, handle_indexerror=False): """Mean of a single parameter (single component of theta). Parameters ---------- ns_run: dict Nested sampling run dict (see the data_processing module docstring for more details). logw: None or 1d numpy array, optional Log weights of samples. simulate: bool, optional Passed to ns_run_utils.get_logw if logw needs to be calculated. param_ind: int, optional Index of parameter for which the mean should be calculated. This corresponds to the column of ns_run['theta'] which contains the parameter. handle_indexerror: bool, optional Make the function function return nan rather than raising an IndexError if param_ind >= ndim. This is useful when applying the same list of estimators to data sets of different dimensions. Returns ------- float """ if logw is None: logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate) w_relative = np.exp(logw - logw.max()) try: return (np.sum(w_relative * ns_run['theta'][:, param_ind]) / np.sum(w_relative)) except IndexError: if handle_indexerror: return np.nan else: raise
[docs]def param_cred(ns_run, logw=None, simulate=False, probability=0.5, param_ind=0): """One-tailed credible interval on the value of a single parameter (component of theta). Parameters ---------- ns_run: dict Nested sampling run dict (see the data_processing module docstring for more details). logw: None or 1d numpy array, optional Log weights of samples. simulate: bool, optional Passed to ns_run_utils.get_logw if logw needs to be calculated. probability: float, optional Quantile to estimate - must be in open interval (0, 1). For example, use 0.5 for the median and 0.84 for the upper 84% quantile. Passed to weighted_quantile. param_ind: int, optional Index of parameter for which the credible interval should be calculated. This corresponds to the column of ns_run['theta'] which contains the parameter. Returns ------- float """ if logw is None: logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate) w_relative = np.exp(logw - logw.max()) # protect against overflow return weighted_quantile(probability, ns_run['theta'][:, param_ind], w_relative)
[docs]def param_squared_mean(ns_run, logw=None, simulate=False, param_ind=0): """Mean of the square of single parameter (second moment of its posterior distribution). Parameters ---------- ns_run: dict Nested sampling run dict (see the data_processing module docstring for more details). logw: None or 1d numpy array, optional Log weights of samples. simulate: bool, optional Passed to ns_run_utils.get_logw if logw needs to be calculated. param_ind: int, optional Index of parameter for which the second moment should be calculated. This corresponds to the column of ns_run['theta'] which contains the parameter. Returns ------- float """ if logw is None: logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate) w_relative = np.exp(logw - logw.max()) # protect against overflow w_relative /= np.sum(w_relative) return np.sum(w_relative * (ns_run['theta'][:, param_ind] ** 2))
[docs]def r_mean(ns_run, logw=None, simulate=False): """Mean of the radial coordinate (magnitude of theta vector). Parameters ---------- ns_run: dict Nested sampling run dict (see the data_processing module docstring for more details). logw: None or 1d numpy array, optional Log weights of samples. simulate: bool, optional Passed to ns_run_utils.get_logw if logw needs to be calculated. Returns ------- float """ if logw is None: logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate) w_relative = np.exp(logw - logw.max()) r = np.sqrt(np.sum(ns_run['theta'] ** 2, axis=1)) return np.sum(w_relative * r) / np.sum(w_relative)
[docs]def r_cred(ns_run, logw=None, simulate=False, probability=0.5): """One-tailed credible interval on the value of the radial coordinate (magnitude of theta vector). Parameters ---------- ns_run: dict Nested sampling run dict (see the data_processing module docstring for more details). logw: None or 1d numpy array, optional Log weights of samples. simulate: bool, optional Passed to ns_run_utils.get_logw if logw needs to be calculated. probability: float, optional Quantile to estimate - must be in open interval (0, 1). For example, use 0.5 for the median and 0.84 for the upper 84% quantile. Passed to weighted_quantile. Returns ------- float """ if logw is None: logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate) w_relative = np.exp(logw - logw.max()) # protect against overflow r = np.sqrt(np.sum(ns_run['theta'] ** 2, axis=1)) return weighted_quantile(probability, r, w_relative)
# Helper functions # ----------------
[docs]def get_latex_name(func_in, **kwargs): """ Produce a latex formatted name for each function for use in labelling results. Parameters ---------- func_in: function kwargs: dict, optional Kwargs for function. Returns ------- latex_name: str Latex formatted name for the function. """ if isinstance(func_in, functools.partial): func = func_in.func assert not set(func_in.keywords) & set(kwargs), ( 'kwargs={0} and func_in.keywords={1} contain repeated keys' .format(kwargs, func_in.keywords)) kwargs.update(func_in.keywords) else: func = func_in param_ind = kwargs.pop('param_ind', 0) probability = kwargs.pop('probability', 0.5) kwargs.pop('handle_indexerror', None) if kwargs: raise TypeError('Unexpected **kwargs: {0}'.format(kwargs)) ind_str = r'{\hat{' + str(param_ind + 1) + '}}' latex_name_dict = { 'count_samples': r'samples', 'logz': r'$\mathrm{log} \mathcal{Z}$', 'evidence': r'$\mathcal{Z}$', 'r_mean': r'$\overline{|\theta|}$', 'param_mean': r'$\overline{\theta_' + ind_str + '}$', 'param_squared_mean': r'$\overline{\theta^2_' + ind_str + '}$'} # Add credible interval names if probability == 0.5: cred_str = r'$\mathrm{median}(' else: # format percent without trailing zeros percent_str = ('%f' % (probability * 100)).rstrip('0').rstrip('.') cred_str = r'$\mathrm{C.I.}_{' + percent_str + r'\%}(' latex_name_dict['param_cred'] = cred_str + r'\theta_' + ind_str + ')$' latex_name_dict['r_cred'] = cred_str + r'|\theta|)$' try: return latex_name_dict[func.__name__] except KeyError as err: err.args = err.args + ('get_latex_name not yet set up for ' + func.__name__,) raise
[docs]def weighted_quantile(probability, values, weights): """ Get quantile estimate for input probability given weighted samples using linear interpolation. Parameters ---------- probability: float Quantile to estimate - must be in open interval (0, 1). For example, use 0.5 for the median and 0.84 for the upper 84% quantile. values: 1d numpy array Sample values. weights: 1d numpy array Corresponding sample weights (same shape as values). Returns ------- quantile: float """ assert 1 > probability > 0, ( 'credible interval prob= ' + str(probability) + ' not in (0, 1)') assert values.shape == weights.shape assert values.ndim == 1 assert weights.ndim == 1 sorted_inds = np.argsort(values) quantiles = np.cumsum(weights[sorted_inds]) - (0.5 * weights[sorted_inds]) quantiles /= np.sum(weights) return np.interp(probability, quantiles, values[sorted_inds])