#!/usr/bin/env python
"""
Functions for estimating quantities from nested sampling runs.
Each estimator function should have arguments:
.. code-block:: python
def estimator_func(self, ns_run, logw=None, simulate=False):
...
Any additional arguments required for the function should be keyword
arguments.
The ``logw`` argument allows the log weights for the points in the run to be
provided - this is useful if many estimators are being calculated from
the same run as it allows ``logw`` to only be calculated once. If it is not
specified, ``logw`` is calculated from the run when required.
The simulate argument is passed to ``ns_run_utils.get_logw``, and is only used
if the function needs to calculate ``logw``.
"""
import functools
import numpy as np
import scipy
import nestcheck.ns_run_utils
# Estimators
# ----------
[docs]def count_samples(ns_run, **kwargs):
r"""Number of samples in run.
Unlike most estimators this does not require log weights, but for
convenience will not throw an error if they are specified.
Parameters
----------
ns_run: dict
Nested sampling run dict (see the data_processing module
docstring for more details).
Returns
-------
int
"""
kwargs.pop('logw', None)
kwargs.pop('simulate', None)
if kwargs:
raise TypeError('Unexpected **kwargs: {0}'.format(kwargs))
return ns_run['logl'].shape[0]
[docs]def logz(ns_run, logw=None, simulate=False):
r"""Natural log of Bayesian evidence :math:`\log \mathcal{Z}`.
Parameters
----------
ns_run: dict
Nested sampling run dict (see the data_processing module
docstring for more details).
logw: None or 1d numpy array, optional
Log weights of samples.
simulate: bool, optional
Passed to ns_run_utils.get_logw if logw needs to be
calculated.
Returns
-------
float
"""
if logw is None:
logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate)
return scipy.special.logsumexp(logw)
[docs]def evidence(ns_run, logw=None, simulate=False):
r"""Bayesian evidence :math:`\log \mathcal{Z}`.
Parameters
----------
ns_run: dict
Nested sampling run dict (see the data_processing module
docstring for more details).
logw: None or 1d numpy array, optional
Log weights of samples.
simulate: bool, optional
Passed to ns_run_utils.get_logw if logw needs to be
calculated.
Returns
-------
float
"""
if logw is None:
logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate)
return np.exp(scipy.special.logsumexp(logw))
[docs]def param_mean(ns_run, logw=None, simulate=False, param_ind=0,
handle_indexerror=False):
"""Mean of a single parameter (single component of theta).
Parameters
----------
ns_run: dict
Nested sampling run dict (see the data_processing module
docstring for more details).
logw: None or 1d numpy array, optional
Log weights of samples.
simulate: bool, optional
Passed to ns_run_utils.get_logw if logw needs to be
calculated.
param_ind: int, optional
Index of parameter for which the mean should be calculated. This
corresponds to the column of ns_run['theta'] which contains the
parameter.
handle_indexerror: bool, optional
Make the function function return nan rather than raising an
IndexError if param_ind >= ndim. This is useful when applying
the same list of estimators to data sets of different dimensions.
Returns
-------
float
"""
if logw is None:
logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate)
w_relative = np.exp(logw - logw.max())
try:
return (np.sum(w_relative * ns_run['theta'][:, param_ind])
/ np.sum(w_relative))
except IndexError:
if handle_indexerror:
return np.nan
else:
raise
[docs]def param_cred(ns_run, logw=None, simulate=False, probability=0.5,
param_ind=0):
"""One-tailed credible interval on the value of a single parameter
(component of theta).
Parameters
----------
ns_run: dict
Nested sampling run dict (see the data_processing module
docstring for more details).
logw: None or 1d numpy array, optional
Log weights of samples.
simulate: bool, optional
Passed to ns_run_utils.get_logw if logw needs to be
calculated.
probability: float, optional
Quantile to estimate - must be in open interval (0, 1).
For example, use 0.5 for the median and 0.84 for the upper
84% quantile. Passed to weighted_quantile.
param_ind: int, optional
Index of parameter for which the credible interval should be
calculated. This corresponds to the column of ns_run['theta']
which contains the parameter.
Returns
-------
float
"""
if logw is None:
logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate)
w_relative = np.exp(logw - logw.max()) # protect against overflow
return weighted_quantile(probability, ns_run['theta'][:, param_ind],
w_relative)
[docs]def param_squared_mean(ns_run, logw=None, simulate=False, param_ind=0):
"""Mean of the square of single parameter (second moment of its
posterior distribution).
Parameters
----------
ns_run: dict
Nested sampling run dict (see the data_processing module
docstring for more details).
logw: None or 1d numpy array, optional
Log weights of samples.
simulate: bool, optional
Passed to ns_run_utils.get_logw if logw needs to be
calculated.
param_ind: int, optional
Index of parameter for which the second moment should be
calculated. This corresponds to the column of ns_run['theta']
which contains the parameter.
Returns
-------
float
"""
if logw is None:
logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate)
w_relative = np.exp(logw - logw.max()) # protect against overflow
w_relative /= np.sum(w_relative)
return np.sum(w_relative * (ns_run['theta'][:, param_ind] ** 2))
[docs]def r_mean(ns_run, logw=None, simulate=False):
"""Mean of the radial coordinate (magnitude of theta vector).
Parameters
----------
ns_run: dict
Nested sampling run dict (see the data_processing module
docstring for more details).
logw: None or 1d numpy array, optional
Log weights of samples.
simulate: bool, optional
Passed to ns_run_utils.get_logw if logw needs to be
calculated.
Returns
-------
float
"""
if logw is None:
logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate)
w_relative = np.exp(logw - logw.max())
r = np.sqrt(np.sum(ns_run['theta'] ** 2, axis=1))
return np.sum(w_relative * r) / np.sum(w_relative)
[docs]def r_cred(ns_run, logw=None, simulate=False, probability=0.5):
"""One-tailed credible interval on the value of the radial coordinate
(magnitude of theta vector).
Parameters
----------
ns_run: dict
Nested sampling run dict (see the data_processing module
docstring for more details).
logw: None or 1d numpy array, optional
Log weights of samples.
simulate: bool, optional
Passed to ns_run_utils.get_logw if logw needs to be
calculated.
probability: float, optional
Quantile to estimate - must be in open interval (0, 1).
For example, use 0.5 for the median and 0.84 for the upper
84% quantile. Passed to weighted_quantile.
Returns
-------
float
"""
if logw is None:
logw = nestcheck.ns_run_utils.get_logw(ns_run, simulate=simulate)
w_relative = np.exp(logw - logw.max()) # protect against overflow
r = np.sqrt(np.sum(ns_run['theta'] ** 2, axis=1))
return weighted_quantile(probability, r, w_relative)
# Helper functions
# ----------------
[docs]def get_latex_name(func_in, **kwargs):
"""
Produce a latex formatted name for each function for use in labelling
results.
Parameters
----------
func_in: function
kwargs: dict, optional
Kwargs for function.
Returns
-------
latex_name: str
Latex formatted name for the function.
"""
if isinstance(func_in, functools.partial):
func = func_in.func
assert not set(func_in.keywords) & set(kwargs), (
'kwargs={0} and func_in.keywords={1} contain repeated keys'
.format(kwargs, func_in.keywords))
kwargs.update(func_in.keywords)
else:
func = func_in
param_ind = kwargs.pop('param_ind', 0)
probability = kwargs.pop('probability', 0.5)
kwargs.pop('handle_indexerror', None)
if kwargs:
raise TypeError('Unexpected **kwargs: {0}'.format(kwargs))
ind_str = r'{\hat{' + str(param_ind + 1) + '}}'
latex_name_dict = {
'count_samples': r'samples',
'logz': r'$\mathrm{log} \mathcal{Z}$',
'evidence': r'$\mathcal{Z}$',
'r_mean': r'$\overline{|\theta|}$',
'param_mean': r'$\overline{\theta_' + ind_str + '}$',
'param_squared_mean': r'$\overline{\theta^2_' + ind_str + '}$'}
# Add credible interval names
if probability == 0.5:
cred_str = r'$\mathrm{median}('
else:
# format percent without trailing zeros
percent_str = ('%f' % (probability * 100)).rstrip('0').rstrip('.')
cred_str = r'$\mathrm{C.I.}_{' + percent_str + r'\%}('
latex_name_dict['param_cred'] = cred_str + r'\theta_' + ind_str + ')$'
latex_name_dict['r_cred'] = cred_str + r'|\theta|)$'
try:
return latex_name_dict[func.__name__]
except KeyError as err:
err.args = err.args + ('get_latex_name not yet set up for ' +
func.__name__,)
raise
[docs]def weighted_quantile(probability, values, weights):
"""
Get quantile estimate for input probability given weighted samples using
linear interpolation.
Parameters
----------
probability: float
Quantile to estimate - must be in open interval (0, 1).
For example, use 0.5 for the median and 0.84 for the upper
84% quantile.
values: 1d numpy array
Sample values.
weights: 1d numpy array
Corresponding sample weights (same shape as values).
Returns
-------
quantile: float
"""
assert 1 > probability > 0, (
'credible interval prob= ' + str(probability) + ' not in (0, 1)')
assert values.shape == weights.shape
assert values.ndim == 1
assert weights.ndim == 1
sorted_inds = np.argsort(values)
quantiles = np.cumsum(weights[sorted_inds]) - (0.5 * weights[sorted_inds])
quantiles /= np.sum(weights)
return np.interp(probability, quantiles, values[sorted_inds])