flatironinstitute · bob-carpenter · Jan 26, 2023 · Jan 27, 2023 · Jan 30, 2023 · Jan 30, 2023
diff --git a/README.md b/README.md
@@ -71,24 +71,38 @@ draws from target log density, which may be used for Monte Carlo
 estimates of posterior expectations and quantiles for uncertainty
 quantification.
 
-#### Random-walk Metropolis sampler
+#### Metropolis sampler
 
-Random-walk Metropolis (RWM) is a diffusive sampler that requires a
-target log density function and a symmetric pseudorandom proposal
-generator.
+Metropolis is a diffusive sampler that requires a target log density
+function and a symmetric pseudorandom proposal generator.
 
-#### Metropolis-adjusted Langevin sampler
+* **Random-Walk Metropolis (RWM)**: uses a zero-centered normal proposal,
+  resulting in Markov chains that are random walks
 
-Metroplis-adjusted Langevin (MALA) is a diffusive sampler that adjusts
-proposals with gradient-based information.  MALA requires a target log
-density and gradient function.
+#### Metropolis-Hastings sampler
+
+Metropolis Hastings (MH) is a sampler that requires a target log
+density function, a (not necessarily symmetric) proposal generator,
+and a way to evaluate a proposal's log density.
+
+* **Metropolis-adjusted Langevin (MALA) **: uses a random proposal with a
+gradient adjustment.  MALA requires a target log density and gradient
+function.
+
+* **Affine-Invariant Walker**: uses an ensemble of parameter values
+and uses complementary values to condition an MH proposal.
+Affine-invariant walkers require only a target log density function.
 
 #### Hamiltonian Monte Carlo sampler
 
 Hamiltonian Monte Carlo (HMC) simulates Hamiltonian dynamics with a
 potential energy function equal to the negative log density. It
 requires a target log density, gradient function, and optionally a
-metric.
+metric.  Technically, HMC composes a Gibbs kernel that refreshes the
+quadratic (i.e., normally distributed) momentum with a deterministic
+Metropolis sampler, the proposals for which are generated by following
+the Hamiltonian dynamics.
+
 
 ### Sequential Monte Carlo samplers
 
@@ -108,6 +122,29 @@ p(theta | y)^t[n] * p(theta),
 where the temperature `t[n]` runs from 0 to 1 across iterations.
 
 
+
+### Posterior analysis
+
+#### R-hat
+
+R-hat is a statistic over multiple Markov chains that converges to 1
+if the chains have the same stationary distribution (under some
+mild assumptions).
+
+
+#### Effective sample size
+
+The effective sample size of a Markov chain for estimating a specific
+expectation is the number of independent draws that would lead to the
+same standard error.
+
+#### Standard error
+
+The standard error of an unbiased estimate of an expectation provides
+the scale of the distribution of normally distributed error.
+
+
+
 ## Dependencies
 
 `bayes-kit` only depends on a single external package,

diff --git a/bayes_kit/__init__.py b/bayes_kit/__init__.py
@@ -1,4 +1,4 @@
 from .hmc import HMCDiag
 from .rwm import RandomWalkMetropolis
-from .ensemble import Stretcher
+from .ensemble import AffineInvariantWalker
 from .smc import TemperedLikelihoodSMC
diff --git a/bayes_kit/ensemble.py b/bayes_kit/ensemble.py
@@ -1,68 +1,91 @@
-from typing import Callable, Optional, Tuple
+from typing import Callable, Iterator, Optional, Tuple
 from numpy.typing import NDArray
 import numpy as np
 
 from .model_types import LogDensityModel
 
+Sample = NDArray[np.float64]
 
-class Stretcher:
+class AffineInvariantWalker:
     """
+    An implementation of the affine-invariant ensemble sampler of
+    Goodman and Weare (2010).  
+
+    References:
     Goodman, J. and Weare, J., 2010. Ensemble samplers with affine invariance.
     *Communications in Applied Mathematics and Computational Science*
     5(1):65--80.
     """
 
-    # def __init__(
-    #         self,
-    #         model: LogDensityModel,
-    #         a: Optional[float] = None,
-    #         walkers: Optional[int] = None
-    #         init: Optional[NDarray[np.float64]] = None)
-    #     ):
-    #     self._model = model
-    #     self._dim = self._model.dims()
-    #     if a != None and a < 1:
-    #         raise ValueError(f"stretch bound must be greater than or equal to 1; found {a=}")
-    #     self._a = a
-    #     self._sqrt_a = np.sqrt(a)
-    #     self._inv_sqrt_a = 1 / self._sqrt_a
-    #     if walkers != NONE and (walkers <= 0 or walkers % 2 != 0) :
-    #         raise ValueError(f"walkers must be strictly positive, even integer; found {walkers=}")
-    #     self._walkers = walkers or 2 * self._dim
-    #     self._halfwalkers = a / 2
-    #     self._drawshape = (self._walkers, self._dim)
-    #     if init != None and init.shape != self._drawshape:
-    #         raise ValueError(f"init must be shape of draw {self._drawshape}; found {init.shape=}")
-    #     self._thetas = init or np.random.normal(size=self._drawshape)
-    #     self._firsthalf = range(halfwalkers)
-    #     self._secondhalf = range(halfwalkers, walkers)
+    def __init__(
+            self,
+            model: LogDensityModel,
+            a: Optional[float] = None,
+            walkers: Optional[int] = None,
+            init: Optional[NDArray[np.float64]] = None
+        ):
+        """
+        Initialize the sampler with a log density model, and optionally
+        proposal bounds, number of walkers and initial parameter values.
+            
+        Parameters:
+        model: class used to evaluate log densities
+        a: bounds on proposal (default 2)
+        walkers: an even number of walkers to use (default dimensionality of `model * 2`)
+        init: `walker` x `dimensio`n array of initial positions (defaults to standard normal)
+
+        Throws:
+        ValueError: if `a` is provided and not >= 1, `walker`s is provided and not strictly positive and even,
+        or if the `init` is provided and is not an `NDArray` of shape `walker` x `dimension`
+        """
+        self._model = model
+        self._dim = self._model.dims()
+        if a != None and np.float64(a) < 1:
+            raise ValueError(f"stretch bound must be greater than or equal to 1; found {a=}")
+        self._a = np.float64(a or 2.0)
+        self._sqrt_a = np.sqrt(np.float64(a))
+        self._inv_sqrt_a = 1 / self._sqrt_a
+        self._walkers = np.int64(walkers or 2 * self._dim)
+        if self._walkers < 2 or self._walkers % 2 != 0:
+            raise ValueError(f"walkers must be strictly positive, even integer; found {walkers=}")
+        self._halfwalkers = self._walkers // 2
+        self._drawshape = (int(self._walkers), self._dim)
+        self._thetas = np.asarray(init or np.random.normal(size=self._drawshape))
+        if self._thetas.shape != self._drawshape:
+            raise ValueError(f"init must be shape of draw {self._drawshape}; found {self._thetas.shape=}")
+        self._firsthalf = range(0, int(self._halfwalkers))
+        self._secondhalf = range(int(self._halfwalkers), int(self._walkers))
 
-    # def __iter__(self):
-    #     return self
+    def __iter__(self) -> Iterator[Sample]:
+        return self
 
-    # def __next__(self):
-    #     return self.sample
+    def __next__(self) -> Sample:
+        return self.sample()
 
-    # def draw_z(self):
-    #     """Return random draw z in (1/a, a) with p(z) propto 1 / sqrt(z)"""
-    #     return np.square(np.random.uniform(self._inv_sqrt_a, self._sqrt_a))
+    def draw_z(self) -> Sample:
+        """Return random draw z in (1/a, a) with p(z) propto 1 / sqrt(z)"""
+        return np.asarray(np.square(np.random.uniform(self._inv_sqrt_a, self._sqrt_a)))
 
-    # def stretch_move(self, theta_k: NDarray[np.float64], theta_j: NDarray[np.float64]):
-    #     z = self.draw_z()
-    #     theta_star = theta_j + z * (theta_k - theta_j)  # (1 - z) * theta_j + z * theta_k
-    #     log_q = (self._dims - 1) * np.log(z) + self._model.log_density(theta_star) - self._model.log_density(theta_k)
-    #     if np.log(np.random.uniform()) < log_q:
-    #         return theta_star
-    #     return theta_k
+    def stretch_move(self, theta_k: NDArray[np.float64], theta_j: NDArray[np.float64]) -> Sample:
+        z = self.draw_z()
+        theta_star = np.asarray(theta_j + z * (theta_k - theta_j))  # (1 - z) * theta_j + z * theta_k
+        print(f"{theta_k=}  {theta_j=}  {z=}  {theta_star=}")
+        log_q = (self._dim - 1) * np.log(z) + self._model.log_density(theta_star) - self._model.log_density(theta_k)
+        log_u = np.log(np.random.uniform())
+        print(f"{log_q=}  {log_u=}")
+        if log_u < log_q:
+            return theta_star
+        return theta_k
 
-    # def sample(self) -> NDarray[np.float64]
-    #     js = np.random.choice(secondhalf, size=self._halfwalkers)
-    #     for k in firsthalf:
-    #         self._thetas[k] = stretch_move(self._thetas[k], self._thetas[js[k]])
-    #     js = np.random.choice(firsthalf, size=self._halfwalkers)
-    #     for k in secondhalf:
-    #         self_thetas[k] = stretch_move(self._thetas[k], self._thetas[js[k]])
-    #     return self._thetas
+    def sample(self) -> Sample:
+        print(f"IN: {self._thetas=}")
+        js = np.random.choice(self._secondhalf, size=self._halfwalkers, replace=False)
+        for k in self._firsthalf:
+            self._thetas[k] = self.stretch_move(self._thetas[k], self._thetas[js[k]])
+        js = np.random.choice(self._firsthalf, size=self._halfwalkers, replace=False)
+        for k in self._secondhalf:
+            self._thetas[k] = self.stretch_move(self._thetas[k], self._thetas[js[k - self._halfwalkers]])
+        print(f"OUT: {self._thetas=}")
+        return self._thetas
 
 
-# TODO(carpenter): cache log density rather than recomputing for self
diff --git a/bayes_kit/ess.py b/bayes_kit/ess.py
@@ -2,7 +2,7 @@
 import numpy.typing as npt
 
 FloatType = np.float64
-IntType = np.int64
+IntType = int
 VectorType = npt.NDArray[FloatType]
 
 def autocorr_fft(chain: VectorType) -> VectorType:
@@ -22,7 +22,7 @@ def autocorr_fft(chain: VectorType) -> VectorType:
     fft = np.fft.fft(ndata, size)
     pwr = np.abs(fft) ** 2
     N = len(ndata)
-    acorr = np.fft.ifft(pwr).real / var / N
+    acorr: VectorType = np.fft.ifft(pwr).real / var / N
     return acorr
 
 def autocorr_np(chain: VectorType) -> VectorType:
@@ -39,7 +39,7 @@ def autocorr_np(chain: VectorType) -> VectorType:
     chain_ctr = chain - np.mean(chain)
     N = len(chain_ctr)
     acorrN = np.correlate(chain_ctr, chain_ctr, "full")[N - 1 :]
-    return acorrN / N
+    return np.asarray(acorrN / N)
 
 def autocorr(chain: VectorType) -> VectorType:
     """
@@ -92,9 +92,9 @@ def ess_ipse(chain: VectorType) -> FloatType:
         raise ValueError(f"ess requires len(chains) >=4, but {len(chain) = }")
     acor = autocorr(chain)
     n = first_neg_pair_start(acor)
-    sigma_sq_hat = acor[0] + 2 * sum(acor[1:n])
+    sigma_sq_hat = acor[0] + 2 * acor[1:n].sum()
     ess = len(chain) / sigma_sq_hat
-    return ess
+    return np.float64(ess)
 
 def ess_imse(chain: VectorType) -> FloatType:
     """
@@ -132,7 +132,7 @@ def ess_imse(chain: VectorType) -> FloatType:
     # end diff code
     sigma_sq_hat = acor[0] + 2 * accum
     ess = len(chain) / sigma_sq_hat
-    return ess
+    return np.float64(ess)
 
 def ess(chain: VectorType) -> FloatType:
     """

diff --git a/bayes_kit/rhat.py b/bayes_kit/rhat.py
@@ -29,10 +29,10 @@ def rhat(chains: list[SeqType]) -> FloatType:
     """
     if len(chains) < 2:
         raise ValueError(f"rhat requires len(chains) >= 2, but {len(chains) = }")
-    chain_lengths = [len(chain) for chain in chains]
+    chain_lengths = [len(np.asarray(chain)) for chain in chains]
     mean_chain_length = np.mean(chain_lengths)
-    means = [np.mean(chain) for chain in chains]
-    vars = [np.var(chain, ddof=1) for chain in chains]
+    means = [np.mean(np.asarray(chain)) for chain in chains]
+    vars = [np.var(np.asarray(chain), ddof=1) for chain in chains]
     r_hat: np.float64 = np.sqrt(
         (mean_chain_length - 1) / mean_chain_length + np.var(means, ddof=1) / np.mean(vars)
     )

diff --git a/bayes_kit/rwm.py b/bayes_kit/rwm.py
@@ -36,3 +36,4 @@ def sample(self) -> Sample:
             self._theta = np.asanyarray(theta_star)
             self._log_p_theta = log_p_theta_star
         return self._theta, self._log_p_theta
+
diff --git a/test/test_ensemble.py b/test/test_ensemble.py
@@ -0,0 +1,20 @@
+from test.models.std_normal import StdNormal
+from bayes_kit.ensemble import AffineInvariantWalker
+import numpy as np
+
+def test_aiw_std_normal() -> None:
+    # init with draw from posterior
+    init = np.random.normal(loc=0, scale=1, size=[1])
+    model = StdNormal()
+    sampler = AffineInvariantWalker(model, a = 2, walkers=10)
+    M = 10
+    for m in range(M):
+        theta = sampler.sample()
+        print(theta)
+    return 1
+    draws = np.array([sampler.sample()[0] for _ in range(M)])
+    print(f"{draws=}")
+    mean = draws.mean(axis=0)
+    var = draws.var(axis=0, ddof=1)
+    np.testing.assert_allclose(mean, model.posterior_mean(), atol=0.1)
+    np.testing.assert_allclose(var, model.posterior_variance(), atol=0.1)
diff --git a/test/test_rwm.py b/test/test_rwm.py
@@ -16,14 +16,7 @@ def test_rwm_std_normal() -> None:
     np.testing.assert_allclose(mean, model.posterior_mean(), atol=0.1)
     np.testing.assert_allclose(var, model.posterior_variance(), atol=0.1)
 
-    accept = M - (draws[: M - 1] == draws[1:]).sum()
-    print(f"{accept=}")
-    print(f"{draws[1:10]=}")
-    print(f"{mean=}  {var=}")
-
-
 def test_rwm_repr() -> None:
-
     init = np.random.normal(loc=0, scale=1, size=[1])
     model = StdNormal()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -36,3 +36,4 @@ def sample(self) -> Sample:
		self._theta = np.asanyarray(theta_star)
		self._log_p_theta = log_p_theta_star
		return self._theta, self._log_p_theta