-
Notifications
You must be signed in to change notification settings - Fork 4
affine invariant sampler + type cleanup #10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
5751a61
6d32f46
4165598
00b8660
6ace036
1ced4d6
56935cc
d47759c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| from .hmc import HMCDiag | ||
| from .rwm import RandomWalkMetropolis | ||
| from .ensemble import Stretcher | ||
| from .ensemble import AffineInvariantWalker | ||
| from .smc import TemperedLikelihoodSMC |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,68 +1,91 @@ | ||
| from typing import Callable, Optional, Tuple | ||
| from typing import Callable, Iterator, Optional, Tuple | ||
| from numpy.typing import NDArray | ||
| import numpy as np | ||
|
|
||
| from .model_types import LogDensityModel | ||
|
|
||
| Sample = NDArray[np.float64] | ||
|
|
||
| class Stretcher: | ||
| class AffineInvariantWalker: | ||
| """ | ||
| An implementation of the affine-invariant ensemble sampler of | ||
| Goodman and Weare (2010). | ||
| References: | ||
| Goodman, J. and Weare, J., 2010. Ensemble samplers with affine invariance. | ||
| *Communications in Applied Mathematics and Computational Science* | ||
| 5(1):65--80. | ||
| """ | ||
|
|
||
| # def __init__( | ||
| # self, | ||
| # model: LogDensityModel, | ||
| # a: Optional[float] = None, | ||
| # walkers: Optional[int] = None | ||
| # init: Optional[NDarray[np.float64]] = None) | ||
| # ): | ||
| # self._model = model | ||
| # self._dim = self._model.dims() | ||
| # if a != None and a < 1: | ||
| # raise ValueError(f"stretch bound must be greater than or equal to 1; found {a=}") | ||
| # self._a = a | ||
| # self._sqrt_a = np.sqrt(a) | ||
| # self._inv_sqrt_a = 1 / self._sqrt_a | ||
| # if walkers != NONE and (walkers <= 0 or walkers % 2 != 0) : | ||
| # raise ValueError(f"walkers must be strictly positive, even integer; found {walkers=}") | ||
| # self._walkers = walkers or 2 * self._dim | ||
| # self._halfwalkers = a / 2 | ||
| # self._drawshape = (self._walkers, self._dim) | ||
| # if init != None and init.shape != self._drawshape: | ||
| # raise ValueError(f"init must be shape of draw {self._drawshape}; found {init.shape=}") | ||
| # self._thetas = init or np.random.normal(size=self._drawshape) | ||
| # self._firsthalf = range(halfwalkers) | ||
| # self._secondhalf = range(halfwalkers, walkers) | ||
| def __init__( | ||
| self, | ||
| model: LogDensityModel, | ||
| a: Optional[float] = None, | ||
| walkers: Optional[int] = None, | ||
| init: Optional[NDArray[np.float64]] = None | ||
| ): | ||
| """ | ||
| Initialize the sampler with a log density model, and optionally | ||
| proposal bounds, number of walkers and initial parameter values. | ||
| Parameters: | ||
| model: class used to evaluate log densities | ||
| a: bounds on proposal (default 2) | ||
| walkers: an even number of walkers to use (default dimensionality of `model * 2`) | ||
| init: `walker` x `dimensio`n array of initial positions (defaults to standard normal) | ||
bob-carpenter marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| Throws: | ||
| ValueError: if `a` is provided and not >= 1, `walker`s is provided and not strictly positive and even, | ||
| or if the `init` is provided and is not an `NDArray` of shape `walker` x `dimension` | ||
| """ | ||
| self._model = model | ||
| self._dim = self._model.dims() | ||
| if a != None and np.float64(a) < 1: | ||
| raise ValueError(f"stretch bound must be greater than or equal to 1; found {a=}") | ||
| self._a = np.float64(a or 2.0) | ||
| self._sqrt_a = np.sqrt(np.float64(a)) | ||
| self._inv_sqrt_a = 1 / self._sqrt_a | ||
| self._walkers = np.int64(walkers or 2 * self._dim) | ||
| if self._walkers < 2 or self._walkers % 2 != 0: | ||
| raise ValueError(f"walkers must be strictly positive, even integer; found {walkers=}") | ||
| self._halfwalkers = self._walkers // 2 | ||
| self._drawshape = (int(self._walkers), self._dim) | ||
| self._thetas = np.asarray(init or np.random.normal(size=self._drawshape)) | ||
bob-carpenter marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if self._thetas.shape != self._drawshape: | ||
| raise ValueError(f"init must be shape of draw {self._drawshape}; found {self._thetas.shape=}") | ||
| self._firsthalf = range(0, int(self._halfwalkers)) | ||
| self._secondhalf = range(int(self._halfwalkers), int(self._walkers)) | ||
|
|
||
| # def __iter__(self): | ||
| # return self | ||
| def __iter__(self) -> Iterator[Sample]: | ||
| return self | ||
|
|
||
| # def __next__(self): | ||
| # return self.sample | ||
| def __next__(self) -> Sample: | ||
| return self.sample() | ||
|
|
||
| # def draw_z(self): | ||
| # """Return random draw z in (1/a, a) with p(z) propto 1 / sqrt(z)""" | ||
| # return np.square(np.random.uniform(self._inv_sqrt_a, self._sqrt_a)) | ||
| def draw_z(self) -> Sample: | ||
|
||
| """Return random draw z in (1/a, a) with p(z) propto 1 / sqrt(z)""" | ||
| return np.asarray(np.square(np.random.uniform(self._inv_sqrt_a, self._sqrt_a))) | ||
bob-carpenter marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| # def stretch_move(self, theta_k: NDarray[np.float64], theta_j: NDarray[np.float64]): | ||
| # z = self.draw_z() | ||
| # theta_star = theta_j + z * (theta_k - theta_j) # (1 - z) * theta_j + z * theta_k | ||
| # log_q = (self._dims - 1) * np.log(z) + self._model.log_density(theta_star) - self._model.log_density(theta_k) | ||
| # if np.log(np.random.uniform()) < log_q: | ||
| # return theta_star | ||
| # return theta_k | ||
| def stretch_move(self, theta_k: NDArray[np.float64], theta_j: NDArray[np.float64]) -> Sample: | ||
| z = self.draw_z() | ||
| theta_star = np.asarray(theta_j + z * (theta_k - theta_j)) # (1 - z) * theta_j + z * theta_k | ||
bob-carpenter marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| print(f"{theta_k=} {theta_j=} {z=} {theta_star=}") | ||
| log_q = (self._dim - 1) * np.log(z) + self._model.log_density(theta_star) - self._model.log_density(theta_k) | ||
bob-carpenter marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| log_u = np.log(np.random.uniform()) | ||
| print(f"{log_q=} {log_u=}") | ||
| if log_u < log_q: | ||
| return theta_star | ||
| return theta_k | ||
|
|
||
| # def sample(self) -> NDarray[np.float64] | ||
| # js = np.random.choice(secondhalf, size=self._halfwalkers) | ||
| # for k in firsthalf: | ||
| # self._thetas[k] = stretch_move(self._thetas[k], self._thetas[js[k]]) | ||
| # js = np.random.choice(firsthalf, size=self._halfwalkers) | ||
| # for k in secondhalf: | ||
| # self_thetas[k] = stretch_move(self._thetas[k], self._thetas[js[k]]) | ||
| # return self._thetas | ||
| def sample(self) -> Sample: | ||
| print(f"IN: {self._thetas=}") | ||
| js = np.random.choice(self._secondhalf, size=self._halfwalkers, replace=False) | ||
| for k in self._firsthalf: | ||
| self._thetas[k] = self.stretch_move(self._thetas[k], self._thetas[js[k]]) | ||
| js = np.random.choice(self._firsthalf, size=self._halfwalkers, replace=False) | ||
| for k in self._secondhalf: | ||
| self._thetas[k] = self.stretch_move(self._thetas[k], self._thetas[js[k - self._halfwalkers]]) | ||
| print(f"OUT: {self._thetas=}") | ||
bob-carpenter marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return self._thetas | ||
|
|
||
|
|
||
| # TODO(carpenter): cache log density rather than recomputing for self | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| from test.models.std_normal import StdNormal | ||
| from bayes_kit.ensemble import AffineInvariantWalker | ||
| import numpy as np | ||
|
|
||
| def test_aiw_std_normal() -> None: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I appreciate the elegance of delegating to the actual sampling test, however I think there are two potential issues:
On the second point:
I'm not proposing to test a bunch of volatile implementation details, but piercing the veil a little bit might lead to more specific tests and a faster test suite overall.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not entirely sure what you're asking me to do here.
Our whole test suite, including this one, runs in 13s on my desktop. Is this really a concern or can I just ignore this for now?
There's no good theory around this to test in the affine-invariant case. The hyperparameter If we're willing to open up the box and test internals, I could maybe rewrite parts of the code to expose as functions rather than just as code. For example, we could test that the variance of proposals is higher with larger
That won't test that they're being used appropriately. Everything's stochastic all the way down in that updating a single walker involves selecting a random complementary walking, proposing, then doing an MH accept/reject step. I'm not sure how to test things like that we have a uniform distribution over all complementary walkers. Again, we'd have to decompose the sampler into finer grained pieces and test those.
Again, same problem for Metropolis and every other sampler that we use. And again, we can open up the box and test that we really are setting the values appropriately on initialization. We might want to test that everything breaks if each ensemble member is initialized at the same value.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. First off, this does not seem to me to be a blocker for the PR going through--as @roualdes says, it's definitely a good step forward!--and I don't intend to single out this PR for particular scrutiny, it just happened to be what made me realize that we need to revisit the topic of testing strategy. Also, I wasn't clear enough in my earlier comment. I'm not requesting even-stricter tests; I just want to make sure that each test adds new information. If we have several tests that don't capture different specific behavior of the subject-under-test, one option is to write them to test more specific details; another is to say "These are testing the same thing, maybe we'd be fine cutting some of them to save runtime." Both are valid responses, as long as we're satisfied with the resulting coverage. Here, I'm hearing you say that for the affine-invariant sampler, there's no good theory for how changing the hyperparameters should affect overall algorithmic performance. To me that sounds like it means that even if we rerun the convergence test, we don't have an outcome to assert on afterwards. So I'm thinking there's no need to spend time rerunning it--I'd be happy to cut entirely the tests against a specific initialization point, initializing with different random generators, etc. (That said, if we do want to test those things, rerunning the full integration test for them will take longer & probably give less new information than testing something more specific. So for the initialization point, instead of repeating the test for convergence, I'd pick a start point out in the boonies with a model centered at 0, take one draw, and assert that the result is within $APPROPRIATE_VALUE of the initialization; for the random generators, pass in a mock generator and assert that that mock was used; etc.) I do love the idea of testing for higher-order properties that any correct implementation ought to have--you mentioned ensuring correct distribution over walkers; hearing you mention it, that sounds like a great thing to check. For that specifically, we can mock
Basically, it's a real concern that you can ignore for now 😄 I'm just looking at the possible matrix of parameters and thinking that even if each call to
I think this gets into the tension between testing the algorithm and testing the implementation of the algorithm. For the purposes of testing the implementation, I don't think we need to go beyond testing that Metropolis calls what it's given; past that point, it seems like we'd be more testing the Metropolis algorithm itself rather than this implementation of it.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is the key issue that @jsoules raised:
I agree that we should test our implementations. And that what we have now mainly tests the algorithm. We can test our implementation does the right thing with changes in a, namely that it gives you a broader range of proposals. That's easy to check. And that's why we were talking about mock objects on the way to lunch. It does require refactoring the code so we can test the internal details of the algorithm. Otherwise, all we have is something we can test end-to-end. You can see that I just pushed the new tests for ESS and added the IAT functionality, where the IAT funcitonality is required for the ESS functionality. We can do the same thing here. Part of what I was doing with all those calls is making sure that we have code coverage. The tests can be simplified.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds like we're on the same page! Re: doing the right thing with changes in My understanding was that the (To be clear, I'm imagining a test that in pseudocode says basically: def test_variance_of_interpolation_factor_increases_as_a_increases():
def test_an_a(a: int) -> Sample[]:
sampler = # instantiate an AffineInvariantWalker with the given a
return [sampler._draw_z() for _ in range(100)]
as = [1, 2, 5, 10, 100]
zs = [test_an_a(a) for a in as]
# Do something to fix dimensions, then
assert var(z[0]) < var(z[1]) < var(z[2]) < var(z[3]) < var(z[4]) Naturally we could get a lot more specific about the statistical properties we'd expect from |
||
| # init with draw from posterior | ||
| init = np.random.normal(loc=0, scale=1, size=[1]) | ||
| model = StdNormal() | ||
| sampler = AffineInvariantWalker(model, a = 2, walkers=10) | ||
| M = 10 | ||
| for m in range(M): | ||
| theta = sampler.sample() | ||
| print(theta) | ||
| return 1 | ||
bob-carpenter marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| draws = np.array([sampler.sample()[0] for _ in range(M)]) | ||
| print(f"{draws=}") | ||
| mean = draws.mean(axis=0) | ||
| var = draws.var(axis=0, ddof=1) | ||
| np.testing.assert_allclose(mean, model.posterior_mean(), atol=0.1) | ||
| np.testing.assert_allclose(var, model.posterior_variance(), atol=0.1) | ||
Uh oh!
There was an error while loading. Please reload this page.