Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions bayes_kit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# functions
from .autocorr import autocorr
from .ess import ess, ess_imse, ess_ipse
from .iat import iat, iat_imse, iat_ipse
from .rhat import rhat

# classes
from .ensemble import Stretcher
from .hmc import HMCDiag
from .mala import MALA
from .metropolis import Metropolis, MetropolisHastings
Expand Down
33 changes: 33 additions & 0 deletions bayes_kit/autocorr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import numpy as np
import numpy.typing as npt

FloatType = np.float64
VectorType = npt.NDArray[FloatType]

def autocorr(chain: VectorType) -> VectorType:
"""Return sample autocorrelations at all lags from 0 to the length
of the sequence minus 1 for the specified sequence. The returned
vector will thus be the same size as the input vector.

Algorithmically, this function calls NumPy's fast Fourier transform
and inverse fast Fourier transforms.

Parameters:
chain: sequence whose autocorrelation is returned

Returns:
autocorrelation estimates at all lags for the specified sequence

Raises:
ValueError: if the size of the chain is less than 2
"""
if len(chain) < 2:
raise ValueError(f"autocorr requires len(chain) >= 2, but {len(chain)=}")
size = 2 ** np.ceil(np.log2(2 * len(chain) - 1)).astype("int")
var = np.var(chain)
ndata = chain - np.mean(chain)
fft = np.fft.fft(ndata, size)
sq_mag = np.abs(fft) ** 2
N = len(ndata)
acorr = np.fft.ifft(sq_mag).real / var / N
return acorr[0:N]
131 changes: 26 additions & 105 deletions bayes_kit/ess.py
Original file line number Diff line number Diff line change
@@ -1,100 +1,31 @@
import numpy as np
import numpy.typing as npt
import bayes_kit.autocorr as autocorr
from bayes_kit.iat import iat, iat_ipse, iat_imse

FloatType = np.float64
IntType = np.int64
VectorType = npt.NDArray[FloatType]

def autocorr_fft(chain: VectorType) -> VectorType:
"""
Return sample autocorrelations at all lags for the specified sequence.
Algorithmically, this function calls a fast Fourier transform (FFT).

Parameters:
chain: sequence whose autocorrelation is returned

Returns:
autocorrelation estimates at all lags for the specified sequence
"""
size = 2 ** np.ceil(np.log2(2 * len(chain) - 1)).astype("int")
var = np.var(chain)
ndata = chain - np.mean(chain)
fft = np.fft.fft(ndata, size)
pwr = np.abs(fft) ** 2
N = len(ndata)
acorr = np.fft.ifft(pwr).real / var / N
return acorr

def autocorr_np(chain: VectorType) -> VectorType:
"""
Return sample autocorrelations at all lags for the specified sequence.
Algorithmically, this function delegates to the Numpy `correlation()` function.

Parameters:
chain: sequence whose autocorrelation is returned

Returns:
autocorrelation estimates at all lags for the specified sequence
"""
chain_ctr = chain - np.mean(chain)
N = len(chain_ctr)
acorrN = np.correlate(chain_ctr, chain_ctr, "full")[N - 1 :]
return acorrN / N

def autocorr(chain: VectorType) -> VectorType:
"""
Return sample autocorrelations at all lags for the specified sequence.
Algorithmically, this function delegates to `autocorr_fft`.

Parameters:
chain: sequence whose autocorrelation is returned

Returns:
autocorrelation estimates at all lags for the specified sequence
"""
return autocorr_fft(chain)

def first_neg_pair_start(chain: VectorType) -> IntType:
"""
Return the index of first element of the sequence whose sum with the following
element is negative, or the length of the sequence if there is no such element.

Parameters:
chain: input sequence

Return:
index of first element whose sum with following element is negative, or
the number of elements if there is no such element
"""
N = len(chain)
n = 0
while n + 1 < N:
if chain[n] + chain[n + 1] < 0:
return n
n = n + 2
return N

def ess_ipse(chain: VectorType) -> FloatType:
"""
Return an estimate of the effective sample size (ESS) of the specified Markov chain
using the initial positive sequence estimator (IPSE).

Parameters:
chain: Markov chain whose ESS is returned
chain: Markov chain whose ESS is returned

Return:
estimated effective sample size for the specified Markov chain
estimated effective sample size for the specified Markov chain

Throws:
ValueError: if there are fewer than 4 elements in the chain
Raises:
ValueError: if there are fewer than 4 elements in the chain
"""
if len(chain) < 4:
raise ValueError(f"ess requires len(chains) >=4, but {len(chain) = }")
acor = autocorr(chain)
n = first_neg_pair_start(acor)
sigma_sq_hat = acor[0] + 2 * sum(acor[1:n])
ess = len(chain) / sigma_sq_hat
return ess
raise ValueError(f"ess_ipse(chain) requires len(chain) >= 4, but {len(chain)=}")
return len(chain) / iat_ipse(chain)


def ess_imse(chain: VectorType) -> FloatType:
"""
Expand All @@ -106,33 +37,23 @@ def ess_imse(chain: VectorType) -> FloatType:
This estimator was introduced in the following paper.

Geyer, C.J., 1992. Practical Markov chain Monte Carlo. Statistical Science
7(4):473--483.
7(4):473--483.

Parameters:
chain: Markov chain whose ESS is returned
chain: Markov chain whose ESS is returned

Return:
estimated effective sample size for the specified Markov chain
estimated effective sample size for the specified Markov chain

Throws:
ValueError: if there are fewer than 4 elements in the chain
ValueError: if there are fewer than 4 elements in the chain
"""
if len(chain) < 4:
raise ValueError(f"ess requires len(chains) >=4, but {len(chain) = }")
acor = autocorr(chain)
n = first_neg_pair_start(acor)
prev_min = acor[1] + acor[2]
# convex minorization uses slow loop
accum = prev_min
i = 3
while i + 1 < n:
minprev = min(prev_min, acor[i] + acor[i + 1])
accum = accum + minprev
i = i + 2
# end diff code
sigma_sq_hat = acor[0] + 2 * accum
ess = len(chain) / sigma_sq_hat
return ess
raise ValueError(
f"ess_imse(chain) requires len(chain) >=4, but {len(chain) = }"
)
return len(chain) / iat_imse(chain)


def ess(chain: VectorType) -> FloatType:
"""
Expand All @@ -141,14 +62,14 @@ def ess(chain: VectorType) -> FloatType:
to `ess_imse()`.

Parameters:
chain: Markov chains whose ESS is returned
chain: Markov chains whose ESS is returned

Return:
estimated effective sample size for the specified Markov chain
estimated effective sample size for the specified Markov chain

Throws:
ValueError: if there are fewer than 4 elements in the chain
"""
return ess_imse(chain)


ValueError: if there are fewer than 4 elements in the chain
"""
if len(chain) < 4:
raise ValueError(f"ess(chain) requires len(chain) >=4, but {len(chain) = }")
return len(chain) / iat(chain)
158 changes: 158 additions & 0 deletions bayes_kit/iat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import numpy as np
import numpy.typing as npt
import bayes_kit.autocorr as autocorr

FloatType = np.float64
IntType = np.int64
VectorType = npt.NDArray[FloatType]


def _end_pos_pairs(acor: VectorType) -> IntType:
"""
Return the index 1 past the last positive pair of autocorrelations
starting on an even index. The sequence `acor` should contain
autocorrelations from a Markov chain with values at the lag given by
the index (i.e., `acor[0]` is autocorrelation at lag 0 and `acor[5]`
is autocorrelation at lag 5).

The even index pairs are (0, 1), (2, 3), (4, 5), ... This function
scans the pairs in order, and returns 1 plus the second index of the
last such pair that has a positive sum.

Examples:
```python
_end_pos_pairs([]) = 0
_end_pos_pairs([1]) = 0
_end_pos_pairs([1, 0.4]) = 2
_end_pos_pairs([1, -0.4]) = 2
_end_pos_pairs([1, -0.5, 0.25, -0.3]) == 2
_end_pos_pairs([1, -0.5, 0.25, -0.1]) == 4
_end_pos_pairs([1, -0.5, 0.25, -0.3, 0.05]) == 2
_end_pos_pairs([1, -0.5, 0.25, -0.1, 0.05]) == 4
```

Parameters:
acor (VectorType): Input sequence of autocorrelations at lag given by index.

Returns:
The index 1 past the last positive pair of values starting on an even index.
"""
N = len(acor)
n = 0
while n + 1 < N:
if acor[n] + acor[n + 1] < 0:
return n
n += 2
return n


def iat_ipse(chain: VectorType) -> FloatType:
"""
Return an estimate of the integrated autocorrelation time (IAT)
of the specified Markov chain using the initial positive sequence
estimator (IPSE).

The integrated autocorrelation time of a chain is defined to be
the sum of the autocorrelations at every lag (positive and negative).
If `autocorr[n]` is the autocorrelation at lag `n`, then

```
IAT = SUM_{n in Z} autocorr[n],
```

where `Z = {..., -2, -1, 0, 1, 2, ...}` is the set of integers.

Because the autocorrelations are symmetric, `autocorr[n] == autocorr[-n]` and
`autocorr[0] = 1`, if we double count the non-negative entries, we will have
counted `autocorr[0]`, which is 1, twice, so we subtract 1, to get

```
IAT = -1 + 2 * SUM_{n in Nat} autocorr[n],
```

where `Nat = {0, 1, 2, ...}` is the set of natural numbers.

References:
Geyer, Charles J. 2011. “Introduction to Markov Chain Monte Carlo.”
In Handbook of Markov Chain Monte Carlo, edited by Steve Brooks,
Andrew Gelman, Galin L. Jones, and Xiao-Li Meng, 3–48. Chapman;
Hall/CRC.

Parameters:
chain: A Markov chain.

Return:
An estimate of the integrated autocorrelation time (IAT) for the specified chain.

Raises:
ValueError: if there are fewer than 4 elements in the chain
"""
if len(chain) < 4:
raise ValueError(f"ess requires len(chains) >= 4, but {len(chain)=}")
acor = autocorr(chain)
n = _end_pos_pairs(acor)
return 2 * acor[0:n].sum() - 1


def iat_imse(chain: VectorType) -> FloatType:
"""
Return an estimate of the integrated autocorrelation time (IAT)
of the specified Markov chain using the initial monotone sequence
estimator (IMSE).

The IMSE imposes a monotonic downward condition on the sum of pairs,
replacing each sum with the minimum of the sum and the minimum of
the previous sums.

References:
Geyer, C.J., 1992. Practical Markov chain Monte Carlo. Statistical Science
7(4):473--483.

Geyer, Charles J. 2011. “Introduction to Markov Chain Monte Carlo.”
In Handbook of Markov Chain Monte Carlo, edited by Steve Brooks,
Andrew Gelman, Galin L. Jones, and Xiao-Li Meng, 3–48. Chapman;
Hall/CRC.

Parameters:
chain: A Markov chain.

Return:
An estimate of integrated autocorrelation time (IAT) for the specified chain.

Throws:
ValueError: If there are fewer than 4 elements in the chain.
"""
if len(chain) < 4:
raise ValueError(f"iat requires len(chains) >=4, but {len(chain) = }")
acor = autocorr(chain)
n = _end_pos_pairs(acor)
prev_min = acor[0] + acor[1]
acor_sum = prev_min
i = 2
while i + 1 < n:
# enforce monotone downward condition (slow loop)
prev_min = min(prev_min, acor[i] + acor[i + 1])
acor_sum += prev_min
i += 2
return 2 * acor_sum - 1


def iat(chain: VectorType) -> FloatType:
"""
Return an estimate of the integrated autocorrelation time (IAT)
of the specified Markov chain. Evaluated by delegating to the
initial monotone sequence estimator, `iat_imse(chain)`.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would there be any reason (performance?) someone might want the ipse instead? Is it worth exposing the ability to choose that? (I'm guessing not really)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm OK getting rid of IPSE; even though it's a bit faster than IMSE, it has an overestimation bias. Do you think I should get rid of it?

I still think we should keep the ess_imse and iat_imse definitions and delegate ess and iat. I know at least for ess we will have further estimators that work cross chain. The other option is to not provide a default implementation. I'm always on the fence about choosing default implementations for users, but in the end, I think they're going to want o see ess() or iat() in most applied code.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a strong feeling about IPSE--it's there, it's implemented, it works; no reason to discard that work, especially if we might want to expose it later. The counterargument is that it's more surface area, and unreachable unused code gets out of date (not that any code in Python is ever truly unreachable).

Unfortunately, I'm kind of clueless whether anybody would in practice actually want to have a choice here.

And if we expose that choice, however we did so, I could imagine a tricky situation where we wind up having some deeply nested calls such that there's an inconsistency between the implementations used in different places--whether because it's hard to make sure the "use-this-implementation" flag gets passed around everywhere ess is called, or because users call ess_ipse in their own direct code but our version doesn't. Of course, again, dunno if such a scenario is actually likely in practice.

I guess it would come down to three questions:

  • Would anybody have a reason to use IPSE?
  • Are these two the only implementations worth implementing, or could we imagine adding more later?
  • Are these used only/mainly for evaluating sample quality after the fact, or are there cases where a sampling method might want to make use of these in a way that affects its behavior?

Copy link
Collaborator Author

@bob-carpenter bob-carpenter Mar 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I talked to @WardBrian and concluded we should leave it in for pedagogical purposes.

  • I think the utility for IPSE is pedagogical and I don't think anyone would use one in an application unless they really needed the extra speed. The standard way to teach these estimators is to first describe the initial positive sequence method, which was in common use until IMSE was developed, then add a monotonicity condition on top of that.
  • Yes, we'll probably be coding other versions of these eventually, such as a Bayesian estimator that makes a parametric or non-parametric assumption about the shape of the autocorrelation vs. lag curve---it's an active area of research.
  • Yes, we will eventually want to use ESS at run time to set a minimum ESS target and run until it's hit (it won't affect the transitions, just when we stop)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me. All of this points to the same conclusion--leave it in for pedagogy (and so people could technically call it if they really wanted to), but have a standard pathway that delegates to a specific implementation.


The IAT can be less than one in cases where the Markov chain is
anti-correlated.

Parameters:
chain: A Markov chain.

Return:
The integrated autocorrelation time (IAT) for the specified chain.

Throws:
ValueError: If there are fewer than 4 elements in the chain.
"""
return iat_imse(chain)
Loading