From 37beb6939344ecd6ffcf0080cf6d3b48ed519c92 Mon Sep 17 00:00:00 2001 From: Ethan Kang Date: Sun, 26 Apr 2026 01:22:20 -0700 Subject: [PATCH] DOCS: Add examples to deterministic IBNR methods (#704) Adds doctest-validated Examples blocks to the five deterministic IBNR estimator classes, matching the Polars-style format used in PRs #714 and #719: no imports, "..." continuation lines, strict output indentation, and byte-exact output (no NORMALIZE_WHITESPACE). Per-class coverage: - Chainladder: class docstring, fit, predict - MackChainladder: class docstring, fit, predict (predict had no docstring before this PR), and five property docstrings written from scratch: full_std_err_, total_process_risk_, mack_std_err_, total_mack_std_err_, summary_ - BornhuetterFerguson: class docstring, fit, predict. Class docstring notes the apriori-as-Triangle requirement (a literal int sample_weight raises AttributeError) and explains the canonical "cl_ult * 0 + value" broadcasting idiom for building a flat per-origin apriori - Benktander: class docstring, fit, predict. Class docstring shows that n_iters interpolates between BF (n_iters=1) and chainladder (n_iters large) - CapeCod: class docstring, fit, predict. Class docstring contrasts CapeCod with BF/Benktander (sample_weight is exposure, apriori is derived) and demonstrates the trend and decay parameters All 20 example blocks validated with strict doctest. --- chainladder/methods/benktander.py | 74 ++++++++++++ chainladder/methods/bornferg.py | 70 +++++++++++ chainladder/methods/capecod.py | 83 +++++++++++++ chainladder/methods/chainladder.py | 81 +++++++++++++ chainladder/methods/mack.py | 184 +++++++++++++++++++++++++++++ 5 files changed, 492 insertions(+) diff --git a/chainladder/methods/benktander.py b/chainladder/methods/benktander.py index 84462dfe..0d6ca16e 100644 --- a/chainladder/methods/benktander.py +++ b/chainladder/methods/benktander.py @@ -34,6 +34,48 @@ class Benktander(MethodBase): The ultimate losses per the method ibnr_: Triangle The IBNR per the method + + Examples + -------- + Benktander is the iterated Bornhuetter-Ferguson model. Like BF, it + requires a per-origin apriori expected ultimate supplied through + ``sample_weight``. The ``n_iters`` parameter interpolates between BF + (``n_iters=1``) and chainladder (``n_iters`` large): each additional + iteration shifts the ultimate further toward the chainladder estimate. + + >>> tr = cl.load_sample('ukmotor') + >>> apriori = cl.Chainladder().fit(tr).ultimate_ * 0 + 14000 + + With ``n_iters=1`` Benktander reproduces Bornhuetter-Ferguson exactly. + + >>> cl.Benktander(apriori=1.0, n_iters=1).fit( + ... tr, sample_weight=apriori + ... ).ultimate_ + 2261 + 2007 12690.000000 + 2008 13121.098503 + 2009 14028.278620 + 2010 13272.048822 + 2011 13911.968891 + 2012 15614.145287 + 2013 16029.501746 + + Increasing ``n_iters`` pulls the immature origins toward the chainladder + estimate. The 2013 origin shows this most: ``16029`` at ``n_iters=1``, + rising to ``19110`` at ``n_iters=4`` and approaching the chainladder + ultimate of ``20680``. + + >>> cl.Benktander(apriori=1.0, n_iters=4).fit( + ... tr, sample_weight=apriori + ... ).ultimate_ + 2261 + 2007 12690.000000 + 2008 13096.902490 + 2009 14030.535854 + 2010 13138.365841 + 2011 13880.984774 + 2012 16719.527550 + 2013 19110.806503 """ def __init__(self, apriori=1.0, n_iters=1, apriori_sigma=0, random_state=None): @@ -58,6 +100,16 @@ def fit(self, X, y=None, sample_weight=None): ------- self: object Returns the instance itself. + + Examples + -------- + Fit returns the estimator itself, with ``ultimate_`` populated. The + repr shows non-default parameters. + + >>> tr = cl.load_sample('ukmotor') + >>> apriori = cl.Chainladder().fit(tr).ultimate_ * 0 + 14000 + >>> cl.Benktander(apriori=1.0, n_iters=2).fit(tr, sample_weight=apriori) + Benktander(n_iters=2) """ if sample_weight is None: raise ValueError("sample_weight is required.") @@ -81,6 +133,28 @@ def predict(self, X, sample_weight=None): ------- X_new: Triangle Loss data with Benktander ultimate applied + + Examples + -------- + Fit on a prior-period view of the data, then apply the model to the + current Triangle and a refreshed apriori. + + >>> tr = cl.load_sample('ukmotor') + >>> tr_prior = tr[tr.valuation < tr.valuation_date] + >>> apriori_prior = cl.Chainladder().fit(tr_prior).ultimate_ * 0 + 14000 + >>> apriori = cl.Chainladder().fit(tr).ultimate_ * 0 + 14000 + >>> model = cl.Benktander(apriori=1.0, n_iters=2).fit( + ... tr_prior, sample_weight=apriori_prior + ... ) + >>> model.predict(tr, sample_weight=apriori).ultimate_ + 2261 + 2007 12690.000000 + 2008 12746.000000 + 2009 13642.189922 + 2010 12740.812082 + 2011 13516.188545 + 2012 15914.716737 + 2013 17193.715555 """ X_new = super().predict(X, sample_weight) X_new.expectation_ = self._get_benktander_aprioris(X, sample_weight) diff --git a/chainladder/methods/bornferg.py b/chainladder/methods/bornferg.py index 7bb1a325..b6d3bccd 100644 --- a/chainladder/methods/bornferg.py +++ b/chainladder/methods/bornferg.py @@ -31,6 +31,45 @@ class BornhuetterFerguson(Benktander): The ultimate losses per the method ibnr_: Triangle The IBNR per the method + + Examples + -------- + Bornhuetter-Ferguson requires an apriori expected ultimate per origin, + supplied through ``sample_weight``. ``sample_weight`` must be a + chainladder Triangle aligned with ``X``, not a scalar; passing + ``sample_weight=14000`` would raise ``AttributeError`` because the model + accesses ``.shape``. + + A common idiom for building a flat per-origin apriori is to take any + same-shape Triangle, zero it out, and add the desired value. Below uses + the chainladder ultimate as the shape donor. + + >>> tr = cl.load_sample('ukmotor') + >>> cl_ult = cl.Chainladder().fit(tr).ultimate_ + >>> apriori = cl_ult * 0 + float(cl_ult.sum()) / 7 + >>> apriori + 2261 + 2007 14903.967562 + 2008 14903.967562 + 2009 14903.967562 + 2010 14903.967562 + 2011 14903.967562 + 2012 14903.967562 + 2013 14903.967562 + + Fit with that apriori. The BF ultimates pull the immature origins toward + the apriori while leaving mature origins close to chainladder. + + >>> model = cl.BornhuetterFerguson(apriori=1.0).fit(tr, sample_weight=apriori) + >>> model.ultimate_ + 2261 + 2007 12690.000000 + 2008 13145.318280 + 2009 14095.125641 + 2010 13412.748068 + 2011 14150.549749 + 2012 15999.244850 + 2013 16658.824705 """ def __init__(self, apriori=1.0, apriori_sigma=0.0, random_state=None): @@ -54,6 +93,15 @@ def fit(self, X, y=None, sample_weight=None): ------- self : object Returns the instance itself. + + Examples + -------- + Fit returns the estimator itself, with ``ultimate_`` populated. + + >>> tr = cl.load_sample('ukmotor') + >>> apriori = cl.Chainladder().fit(tr).ultimate_ * 0 + 14000 + >>> cl.BornhuetterFerguson(apriori=1.0).fit(tr, sample_weight=apriori) + BornhuetterFerguson() """ self.n_iters = 1 super().fit(X, y, sample_weight) @@ -73,5 +121,27 @@ def predict(self, X, sample_weight=None): ------- X_new: Triangle Loss data with Bornhuetter-Ferguson ultimate applied + + Examples + -------- + Fit on a prior-period view of the data, then apply the model to the + current Triangle and a refreshed apriori. + + >>> tr = cl.load_sample('ukmotor') + >>> tr_prior = tr[tr.valuation < tr.valuation_date] + >>> apriori_prior = cl.Chainladder().fit(tr_prior).ultimate_ * 0 + 14000 + >>> apriori = cl.Chainladder().fit(tr).ultimate_ * 0 + 14000 + >>> model = cl.BornhuetterFerguson(apriori=1.0).fit( + ... tr_prior, sample_weight=apriori_prior + ... ) + >>> model.predict(tr, sample_weight=apriori).ultimate_ + 2261 + 2007 12690.000000 + 2008 12746.000000 + 2009 13658.425101 + 2010 12883.599658 + 2011 13610.582796 + 2012 15360.020613 + 2013 15893.717063 """ return super().predict(X, sample_weight) diff --git a/chainladder/methods/capecod.py b/chainladder/methods/capecod.py index 943fdd3f..48f59f88 100644 --- a/chainladder/methods/capecod.py +++ b/chainladder/methods/capecod.py @@ -48,6 +48,57 @@ class CapeCod(Benktander): The trended apriori vector developed by the Cape Cod Method detrended_apriori_: The detrended apriori vector developed by the Cape Cod Method + + Examples + -------- + Unlike Bornhuetter-Ferguson and Benktander, CapeCod derives the apriori + loss ratio from the data itself. ``sample_weight`` represents exposure + (e.g. earned premium) rather than an apriori expected ultimate. + + >>> tr = cl.load_sample('ukmotor') + >>> exposure = cl.Chainladder().fit(tr).ultimate_ * 0 + 20000 + + With default ``decay=1`` and ``trend=0``, every origin receives the same + apriori loss ratio: the exposure-weighted mean loss ratio across all + origins. + + >>> model = cl.CapeCod().fit(tr, sample_weight=exposure) + >>> model.apriori_ + 2261 + 2007 0.706225 + 2008 0.706225 + 2009 0.706225 + 2010 0.706225 + 2011 0.706225 + 2012 0.706225 + 2013 0.706225 + + Setting ``decay`` below 1 down-weights distant origins when computing + each origin's apriori, so each origin receives its own loss-ratio + estimate that drifts toward more recent experience. + + >>> cl.CapeCod(decay=0.5).fit(tr, sample_weight=exposure).apriori_ + 2261 + 2007 0.653584 + 2008 0.666113 + 2009 0.683132 + 2010 0.689123 + 2011 0.717497 + 2012 0.776364 + 2013 0.836006 + + Setting ``trend`` projects the loss ratio forward over the experience + period. With ``decay=1``, all origins share the trended apriori. + + >>> cl.CapeCod(trend=0.05).fit(tr, sample_weight=exposure).apriori_ + 2261 + 2007 0.836096 + 2008 0.836096 + 2009 0.836096 + 2010 0.836096 + 2011 0.836096 + 2012 0.836096 + 2013 0.836096 """ def __init__( @@ -81,6 +132,16 @@ def fit(self, X, y=None, sample_weight=None): ------- self: object Returns the instance itself. + + Examples + -------- + Fit returns the estimator itself, with ``ultimate_`` and + ``apriori_`` populated. The repr shows non-default parameters. + + >>> tr = cl.load_sample('ukmotor') + >>> exposure = cl.Chainladder().fit(tr).ultimate_ * 0 + 20000 + >>> cl.CapeCod(trend=0.05).fit(tr, sample_weight=exposure) + CapeCod(trend=0.05) """ if sample_weight is None: @@ -138,6 +199,28 @@ def predict(self, X, sample_weight=None): ------- X_new: Triangle Loss data with CapeCod ultimate applied + + Examples + -------- + Fit on a prior-period view of the data, then apply the model to the + current Triangle and a refreshed exposure. + + >>> tr = cl.load_sample('ukmotor') + >>> tr_prior = tr[tr.valuation < tr.valuation_date] + >>> exposure_prior = cl.Chainladder().fit(tr_prior).ultimate_ * 0 + 20000 + >>> exposure = cl.Chainladder().fit(tr).ultimate_ * 0 + 20000 + >>> model = cl.CapeCod(trend=0.05).fit( + ... tr_prior, sample_weight=exposure_prior + ... ) + >>> model.predict(tr, sample_weight=exposure).ultimate_ + 2261 + 2007 12690.000000 + 2008 12746.000000 + 2009 13631.353487 + 2010 12896.639975 + 2011 13806.211939 + 2012 15991.144199 + 2013 17489.630279 """ if sample_weight is None: raise ValueError("sample_weight is required.") diff --git a/chainladder/methods/chainladder.py b/chainladder/methods/chainladder.py index 29bad4bc..00f513f6 100644 --- a/chainladder/methods/chainladder.py +++ b/chainladder/methods/chainladder.py @@ -26,6 +26,57 @@ class Chainladder(MethodBase): full_triangle_: The ultimates back-filled to each development period in **X** retaining the known data + + Examples + -------- + Fit the chainladder method to a loss triangle and inspect the projected + ultimates. + + >>> tr = cl.load_sample('ukmotor') + >>> model = cl.Chainladder().fit(tr) + >>> model.ultimate_ + 2261 + 2007 12690.000000 + 2008 13096.902024 + 2009 14030.536767 + 2010 13137.859861 + 2011 13880.404483 + 2012 16812.150646 + 2013 20679.919151 + + The ``ibnr_`` attribute is ``ultimate_ - latest_diagonal``. The 2007 origin + is fully developed in the data, so its IBNR is ``NaN``. + + >>> model.ibnr_ + 2261 + 2007 NaN + 2008 350.902024 + 2009 1037.536767 + 2010 2044.859861 + 2011 3663.404483 + 2012 7162.150646 + 2013 14396.919151 + + ``full_triangle_`` projects each origin to ultimate while preserving the + known cells. Showing the last three origins and the first five development + periods makes the data-to-projection boundary visible: whole-number cells + are observed, decimal cells are projected. + + >>> model.full_triangle_.iloc[..., -3:, :5] + 12 24 36 48 60 + 2011 4150.0 7897.000000 10217.000000 11719.970266 12853.969769 + 2012 5102.0 9650.000000 12374.981102 14195.400857 15568.917781 + 2013 6283.0 11870.058983 15221.943585 17461.165333 19150.670711 + + ``full_expectation_`` is similar but replaces every cell, including the + known ones, with the model's expectation. Compare the ``12`` column above + against the same slice below: the observed values have been overwritten. + + >>> model.full_expectation_.iloc[..., -3:, :5] + 12 24 36 48 60 + 2011 4217.162588 7967.208127 10217.000000 11719.970266 12853.969769 + 2012 5107.889530 9650.000000 12374.981102 14195.400857 15568.917781 + 2013 6283.000000 11870.058983 15221.943585 17461.165333 19150.670711 """ def fit(self, X, y=None, sample_weight=None): @@ -42,6 +93,15 @@ def fit(self, X, y=None, sample_weight=None): ------- self: object Returns the instance itself. + + Examples + -------- + Fitting returns the estimator itself, so it can be chained with + attribute access. + + >>> tr = cl.load_sample('ukmotor') + >>> cl.Chainladder().fit(tr) + Chainladder() """ super().fit(X, y, sample_weight) self.ultimate_ = self._get_ultimate(self.X_) @@ -62,6 +122,27 @@ def predict(self, X, sample_weight=None): ------- X_new: Triangle Loss data with chainladder ultimate applied + + Examples + -------- + ``predict`` applies the fitted development patterns to a different + Triangle. A common workflow is to fit on a prior-period view of the + data (one diagonal removed) and then apply that model to the current + Triangle. The ultimates differ from a freshly-fit model because the + patterns reflect the older view. + + >>> tr = cl.load_sample('ukmotor') + >>> tr_prior = tr[tr.valuation < tr.valuation_date] + >>> model = cl.Chainladder().fit(tr_prior) + >>> model.predict(tr).ultimate_ + 2261 + 2007 12690.000000 + 2008 12746.000000 + 2009 13641.379750 + 2010 12719.871218 + 2011 13485.986574 + 2012 16296.783586 + 2013 20040.175415 """ X_new = super().predict(X, sample_weight) X_new.ultimate_ = self._get_ultimate(X_new, sample_weight) diff --git a/chainladder/methods/mack.py b/chainladder/methods/mack.py index 09bf4795..36da96a5 100644 --- a/chainladder/methods/mack.py +++ b/chainladder/methods/mack.py @@ -39,6 +39,32 @@ class MackChainladder(Chainladder): The total prediction error by origin period total_mack_std_err_: The total prediction error across all origin periods + + Examples + -------- + Fit the Mack chainladder method and inspect the headline summary table, + which combines the deterministic chainladder estimate with Mack's + stochastic standard error. + + >>> tr = cl.load_sample('ukmotor') + >>> model = cl.MackChainladder().fit(tr) + >>> model.summary_ + Latest IBNR Ultimate Mack Std Err + 2007 12690.0 NaN 12690.000000 NaN + 2008 12746.0 350.902024 13096.902024 27.246756 + 2009 12993.0 1037.536767 14030.536767 36.524408 + 2010 11093.0 2044.859861 13137.859861 144.534287 + 2011 10217.0 3663.404483 13880.404483 427.634355 + 2012 9650.0 7162.150646 16812.150646 693.166178 + 2013 6283.0 14396.919151 20679.919151 901.408385 + + The deterministic chainladder ultimates match those of + :class:`Chainladder`. Mack's contribution is the stochastic standard error + in the rightmost column, which can be aggregated across origins. + + >>> model.total_mack_std_err_ + columns values + (Total,) 1424.531543 """ def fit(self, X, y=None, sample_weight=None): @@ -55,6 +81,15 @@ def fit(self, X, y=None, sample_weight=None): ------- self: object Returns the instance itself. + + Examples + -------- + Fitting attaches the ``ultimate_`` and Mack std error attributes to + the estimator and returns the estimator itself. + + >>> tr = cl.load_sample('ukmotor') + >>> cl.MackChainladder().fit(tr) + MackChainladder() """ super().fit(X, y, sample_weight) if "sigma_" not in self.X_: @@ -71,6 +106,39 @@ def fit(self, X, y=None, sample_weight=None): return self def predict(self, X, sample_weight=None): + """Predicts the Mack chainladder ultimate on a new triangle **X**. + + The fitted age-to-age factors and sigma estimates from ``self.X_`` are + applied to ``X`` to compute ``ultimate_`` and the Mack standard errors + (parameter risk, process risk, ``mack_std_err_``, + ``total_mack_std_err_``) on the predicted Triangle. + + Parameters + ---------- + X: Triangle + Loss data to which the fitted model will be applied. Must share + the same shape as the Triangle used in :meth:`fit`. + sample_weight: Triangle + Optional exposure used in CDF alignment. + + Returns + ------- + X_new: Triangle + Triangle with ``ultimate_`` and Mack std error attributes + attached. + + Examples + -------- + Fit the model and apply it to a Triangle with the same shape, then + read the Mack standard error off the resulting Triangle. + + >>> tr = cl.load_sample('ukmotor') + >>> model = cl.MackChainladder().fit(tr) + >>> predicted = model.predict(tr) + >>> predicted.total_mack_std_err_ + columns values + (Total,) 1424.531543 + """ X_new = super().predict(X, sample_weight) X_new.sigma_ = getattr(X_new, "sigma_", self.X_.sigma_) X_new.std_err_ = getattr(X_new, "std_err_", self.X_.std_err_) @@ -93,6 +161,31 @@ def predict(self, X, sample_weight=None): @property def full_std_err_(self): + """ + Per-cell standard error of the form ``sigma_ / sqrt(c**(2-alpha))``, + where ``alpha`` reflects the development averaging method + (regression, volume, simple). This is the building block Mack's + recursion uses to propagate parameter and process risk forward. + + Returns + ------- + Triangle + Per-cell standardized error scale. + + Examples + -------- + >>> tr = cl.load_sample('ukmotor') + >>> model = cl.MackChainladder().fit(tr) + >>> model.full_std_err_ + 12 24 36 48 60 72 84 + 2007 0.047826 0.040745 0.031412 0.010337 0.001431 0.001523 0.0 + 2008 0.044802 0.038074 0.029815 0.010123 0.001410 0.001500 0.0 + 2009 0.042943 0.036708 0.029445 0.009864 0.001361 0.001449 0.0 + 2010 0.043241 0.037958 0.030130 0.010154 0.001407 0.001497 0.0 + 2011 0.043990 0.037603 0.029468 0.009879 0.001369 0.001457 0.0 + 2012 0.039675 0.034017 0.026776 0.008976 0.001243 0.001324 0.0 + 2013 0.035752 0.030671 0.024143 0.008094 0.001121 0.001193 0.0 + """ return self._get_full_std_err_(self.X_) def _get_full_std_err_(self, X=None): @@ -116,6 +209,27 @@ def _get_full_std_err_(self, X=None): @property def total_process_risk_(self): + """ + Across-origin process risk by development period. + + Process risk is independent across origins, so the across-origin + total is the quadrature sum: ``sqrt(sum_i process_risk_i**2)``. + Reported as a 1-row Triangle (the "(Total)" origin). + + Returns + ------- + Triangle + Single-origin Triangle of total process risk by development + period. + + Examples + -------- + >>> tr = cl.load_sample('ukmotor') + >>> model = cl.MackChainladder().fit(tr) + >>> model.total_process_risk_.iloc[..., -3:] + 72 84 9999 + 2007 1039.901929 1069.726277 1069.726277 + """ return (self.process_risk_ ** 2).sum(axis="origin").sqrt() def _mack_recursion(self, est, X=None): @@ -158,10 +272,56 @@ def _mack_recursion(self, est, X=None): @property def mack_std_err_(self): + """ + Per-(origin, development) Mack prediction error, + ``sqrt(parameter_risk**2 + process_risk**2)`` (Mack 1993 eq 7). + + Returns + ------- + Triangle + Per-cell Mack standard error. The last development column is the + ultimate-period prediction error per origin. + + Examples + -------- + Showing the last three origins and last three development periods to + keep the slice readable. The final column is the ultimate prediction + error per origin. + + >>> tr = cl.load_sample('ukmotor') + >>> model = cl.MackChainladder().fit(tr) + >>> model.mack_std_err_.iloc[..., -3:, -3:] + 72 84 9999 + 2011 415.253333 427.634355 427.634355 + 2012 673.828536 693.166178 693.166178 + 2013 876.437914 901.408385 901.408385 + """ return (self.parameter_risk_ ** 2 + self.process_risk_ ** 2).sqrt() @property def total_mack_std_err_(self): + """ + Total Mack prediction error aggregated across all origin periods, + ``sqrt(total_process_risk**2 + total_parameter_risk**2)``. + + Note that this total exceeds the quadrature sum of per-origin + ``mack_std_err_`` values because parameter risk is correlated across + origins (each origin's projection uses the same estimated age-to-age + factors), introducing positive cross-origin covariances. + + Returns + ------- + DataFrame + One value per (index, column) combination of the fitted Triangle. + + Examples + -------- + >>> tr = cl.load_sample('ukmotor') + >>> model = cl.MackChainladder().fit(tr) + >>> model.total_mack_std_err_ + columns values + (Total,) 1424.531543 + """ return self._get_total_mack_std_err_(self) def _get_total_mack_std_err_(self, obj): @@ -174,6 +334,30 @@ def _get_total_mack_std_err_(self, obj): @property def summary_(self): + """ + Headline Mack summary table by origin: latest diagonal, IBNR, + ultimate, and Mack standard error. + + Returns + ------- + Triangle + Triangle whose four development columns are ``Latest``, ``IBNR``, + ``Ultimate``, and ``Mack Std Err``. + + Examples + -------- + >>> tr = cl.load_sample('ukmotor') + >>> model = cl.MackChainladder().fit(tr) + >>> model.summary_ + Latest IBNR Ultimate Mack Std Err + 2007 12690.0 NaN 12690.000000 NaN + 2008 12746.0 350.902024 13096.902024 27.246756 + 2009 12993.0 1037.536767 14030.536767 36.524408 + 2010 11093.0 2044.859861 13137.859861 144.534287 + 2011 10217.0 3663.404483 13880.404483 427.634355 + 2012 9650.0 7162.150646 16812.150646 693.166178 + 2013 6283.0 14396.919151 20679.919151 901.408385 + """ # This might be better as a dataframe obj = self.ultimate_.copy() cols = (