diff --git a/docs/Project.toml b/docs/Project.toml index a6e35558..530c5d25 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,15 +1,18 @@ [deps] +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" Optim = "429524aa-4258-5aef-a3af-852621145aeb" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" [compat] DataFrames = "1" Documenter = "1" -Optim = "1.6.2" \ No newline at end of file +Optim = "1.6.2" diff --git a/docs/src/api.md b/docs/src/api.md index fccdc818..5f667f79 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -2,7 +2,7 @@ ```@meta DocTestSetup = quote - using CategoricalArrays, DataFrames, Distributions, GLM, RDatasets + using CategoricalArrays, DataFrames, Distributions, GLM, RDatasets, StableRNGs end ``` diff --git a/docs/src/examples.md b/docs/src/examples.md index 6a127207..a5a6113a 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -61,7 +61,7 @@ julia> dof(ols) 3 julia> dof_residual(ols) -1.0 +1 julia> round(aic(ols); digits=5) 5.84252 @@ -214,8 +214,8 @@ sales ^ 2 -6.94594e-9 3.72614e-9 -1.86 0.0725 -1.45667e-8 6.7487e-10 ```jldoctest julia> data = DataFrame(X=[1,2,2], Y=[1,0,1]) 3×2 DataFrame - Row │ X Y - │ Int64 Int64 + Row │ X Y + │ Int64 Int64 ─────┼────────────── 1 │ 1 1 2 │ 2 0 @@ -319,8 +319,8 @@ julia> using GLM, RDatasets julia> form = dataset("datasets", "Formaldehyde") 6×2 DataFrame - Row │ Carb OptDen - │ Float64 Float64 + Row │ Carb OptDen + │ Float64 Float64 ─────┼────────────────── 1 │ 0.1 0.086 2 │ 0.3 0.269 @@ -473,8 +473,8 @@ julia> dobson = DataFrame(Counts = [18.,17,15,20,10,21,25,13,13], Outcome = categorical([1,2,3,1,2,3,1,2,3]), Treatment = categorical([1,1,1,2,2,2,3,3,3])) 9×3 DataFrame - Row │ Counts Outcome Treatment - │ Float64 Cat… Cat… + Row │ Counts Outcome Treatment + │ Float64 Cat… Cat… ─────┼───────────────────────────── 1 │ 18.0 1 1 2 │ 17.0 2 1 @@ -513,29 +513,8 @@ In this example, we choose the best model from a set of λs, based on minimum BI ```jldoctest; filter = r"(\d*)\.(\d{6})\d+" => s"\1.\2" julia> using GLM, RDatasets, StatsBase, DataFrames, Optim -julia> trees = DataFrame(dataset("datasets", "trees")) -31×3 DataFrame - Row │ Girth Height Volume - │ Float64 Int64 Float64 -─────┼────────────────────────── - 1 │ 8.3 70 10.3 - 2 │ 8.6 65 10.3 - 3 │ 8.8 63 10.2 - 4 │ 10.5 72 16.4 - 5 │ 10.7 81 18.8 - 6 │ 10.8 83 19.7 - 7 │ 11.0 66 15.6 - 8 │ 11.0 75 18.2 - ⋮ │ ⋮ ⋮ ⋮ - 25 │ 16.3 77 42.6 - 26 │ 17.3 81 55.4 - 27 │ 17.5 82 55.7 - 28 │ 17.9 80 58.3 - 29 │ 18.0 80 51.5 - 30 │ 18.0 80 51.0 - 31 │ 20.6 87 77.0 - 16 rows omitted - +julia> trees = DataFrame(dataset("datasets", "trees")); + julia> bic_glm(λ) = bic(glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(λ))); julia> optimal_bic = optimize(bic_glm, -1.0, 1.0); @@ -554,9 +533,9 @@ Coefficients: ──────────────────────────────────────────────────────────────────────────── (Intercept) -1.07586 0.352543 -3.05 0.0023 -1.76684 -0.384892 Height 0.0232172 0.00523331 4.44 <1e-05 0.0129601 0.0334743 -Girth 0.242837 0.00922555 26.32 <1e-99 0.224756 0.260919 +Girth 0.242837 0.00922556 26.32 <1e-99 0.224756 0.260919 ──────────────────────────────────────────────────────────────────────────── julia> round(optimal_bic.minimum, digits=5) 156.37638 -``` \ No newline at end of file +``` diff --git a/docs/src/index.md b/docs/src/index.md index 3e2d18c5..1bdfba34 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -17,6 +17,7 @@ The [RDatasets package](https://github.com/johnmyleswhite/RDatasets.jl) is usefu Two methods can be used to fit a Generalized Linear Model (GLM): `glm(formula, data, family, link)` and `glm(X, y, family, link)`. Their arguments must be: + - `formula`: a [StatsModels.jl `Formula` object](https://juliastats.org/StatsModels.jl/stable/formula/) referring to columns in `data`; for example, if column names are `:Y`, `:X1`, and `:X2`, then a valid formula is `@formula(Y ~ X1 + X2)` @@ -123,10 +124,115 @@ x: 4 -0.032673 0.0797865 -0.41 0.6831 -0.191048 0.125702 ─────────────────────────────────────────────────────────────────────────── ``` +## Weighting + +Both `lm` and `glm` allow weighted estimation. The four different +[types of weights](https://juliastats.org/StatsBase.jl/stable/weights/) defined in +[StatsBase.jl](https://github.com/JuliaStats/StatsBase.jl) can be used to fit a model: + +- `AnalyticWeights` describe a non-random relative importance (usually between 0 and 1) for + each observation. These weights may also be referred to as reliability weights, precision + weights or inverse variance weights. These are typically used when the observations being + weighted are aggregate values (e.g., averages) with differing variances. +- `FrequencyWeights` describe the number of times (or frequency) each observation was seen. + These weights may also be referred to as case weights or repeat weights. +- `ProbabilityWeights` represent the inverse of the sampling probability for each observation, + providing a correction mechanism for under- or over-sampling certain population groups. + These weights may also be referred to as sampling weights. +- `UnitWeights` attribute a weight of 1 to each observation, which corresponds + to unweighted regression (the default). + +To indicate which kind of weights should be used, the vector of weights must be wrapped in +one of the three weights types, and then passed to the `weights` keyword argument. +Short-hand functions `aweights`, `fweights`, and `pweights` can be used to construct +`AnalyticWeights`, `FrequencyWeights`, and `ProbabilityWeights`, respectively. + +We illustrate the API with randomly generated data. + +```jldoctest weights +julia> using StableRNGs, DataFrames, GLM + +julia> data = DataFrame(y = rand(StableRNG(1), 100), x = randn(StableRNG(2), 100), weights = repeat([1, 2, 3, 4], 25)); + +julia> m = lm(@formula(y ~ x), data) +LinearModel + +y ~ 1 + x + +Coefficients: +────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +────────────────────────────────────────────────────────────────────────── +(Intercept) 0.517369 0.0280232 18.46 <1e-32 0.461758 0.57298 +x -0.0500249 0.0307201 -1.63 0.1066 -0.110988 0.0109382 +────────────────────────────────────────────────────────────────────────── + +julia> m_aweights = lm(@formula(y ~ x), data, wts=aweights(data.weights)) +LinearModel + +y ~ 1 + x + +Coefficients: +────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +────────────────────────────────────────────────────────────────────────── +(Intercept) 0.51673 0.0270707 19.09 <1e-34 0.463009 0.570451 +x -0.0478667 0.0308395 -1.55 0.1239 -0.109067 0.0133333 +────────────────────────────────────────────────────────────────────────── + +julia> m_fweights = lm(@formula(y ~ x), data, wts=fweights(data.weights)) +LinearModel + +y ~ 1 + x + +Coefficients: +───────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +───────────────────────────────────────────────────────────────────────────── +(Intercept) 0.51673 0.0170172 30.37 <1e-84 0.483213 0.550246 +x -0.0478667 0.0193863 -2.47 0.0142 -0.0860494 -0.00968394 +───────────────────────────────────────────────────────────────────────────── + +julia> m_pweights = lm(@formula(y ~ x), data, wts=pweights(data.weights)) +LinearModel + +y ~ 1 + x + +Coefficients: +─────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +─────────────────────────────────────────────────────────────────────────── +(Intercept) 0.51673 0.0287193 17.99 <1e-32 0.459737 0.573722 +x -0.0478667 0.0265532 -1.80 0.0745 -0.100561 0.00482739 +─────────────────────────────────────────────────────────────────────────── + +``` + +!!! warning + +In the old API, weights were passed as `AbstractVectors` and were silently treated in +the internal computation of standard errors and related quantities as `FrequencyWeights`. +Passing weights as `AbstractVector` is still allowed for backward compatibility, but it +is deprecated. When weights are passed following the old API, they are now coerced to +`FrequencyWeights` and a deprecation warning is issued. + +The type of the weights will affect the variance of the estimated coefficients and the +quantities involving this variance. The coefficient point estimates will be the same +regardless of the type of weights. + +```jldoctest weights +julia> loglikelihood(m_aweights) +-16.29630756138424 + +julia> loglikelihood(m_fweights) +-25.518609617564483 +``` + ## Comparing models with F-test Comparisons between two or more linear models can be performed using the `ftest` function, which computes an F-test between each pair of subsequent models and reports fit statistics: + ```jldoctest julia> using DataFrames, GLM, StableRNGs @@ -149,6 +255,7 @@ F-test: 2 models fitted on 50 observations ## Methods applied to fitted models Many of the methods provided by this package have names similar to those in [R](http://www.r-project.org). + - `adjr2`: adjusted R² for a linear model (an alias for `adjr²`) - `aic`: Akaike's Information Criterion - `aicc`: corrected Akaike's Information Criterion for small sample sizes @@ -175,9 +282,8 @@ Many of the methods provided by this package have names similar to those in [R]( - `stderror`: standard errors of the coefficients - `vcov`: variance-covariance matrix of the coefficient estimates - -Note that the canonical link for negative binomial regression is `NegativeBinomialLink`, but -in practice one typically uses `LogLink`. +Note that the canonical link for negative binomial regression is `NegativeBinomialLink`, +but in practice one typically uses `LogLink`. ```jldoctest methods julia> using GLM, DataFrames, StatsBase @@ -209,7 +315,9 @@ julia> round.(predict(mdl, test_data); digits=8) 9.33333333 ``` -The [`cooksdistance`](@ref) method computes [Cook's distance](https://en.wikipedia.org/wiki/Cook%27s_distance) for each observation used to fit a linear model, giving an estimate of the influence of each data point. +The [`cooksdistance`](@ref) method computes +[Cook's distance](https://en.wikipedia.org/wiki/Cook%27s_distance) for each observation +used to fit a linear model, giving an estimate of the influence of each data point. Note that it's currently only implemented for linear models without weights. ```jldoctest methods @@ -223,45 +331,47 @@ julia> round.(cooksdistance(mdl); digits=8) ## Separation of response object and predictor object The general approach in this code is to separate functionality related -to the response from that related to the linear predictor. This +to the response from that related to the linear predictor. This allows for greater generality by mixing and matching different -subtypes of the abstract type ```LinPred``` and the abstract type ```ModResp```. +subtypes of the abstract type `LinPred` and the abstract type `ModResp`. -A ```LinPred``` type incorporates the parameter vector and the model -matrix. The parameter vector is a dense numeric vector but the model -matrix can be dense or sparse. A ```LinPred``` type must incorporate +A `LinPred` type incorporates the parameter vector and the model +matrix. The parameter vector is a dense numeric vector but the model +matrix can be dense or sparse. A `LinPred` type must incorporate some form of a decomposition of the weighted model matrix that allows -for the solution of a system ```X'W * X * delta=X'wres``` where ```W``` is a +for the solution of a system `X'W * X * delta=X'wres` where `W` is a diagonal matrix of "X weights", provided as a vector of the square -roots of the diagonal elements, and ```wres``` is a weighted residual vector. +roots of the diagonal elements, and `wres` is a weighted residual vector. -Currently there are two dense predictor types, ```DensePredQR``` and -```DensePredChol```, and the usual caveats apply. The Cholesky +Currently there are two dense predictor types, `DensePredQR` and +`DensePredChol`, and the usual caveats apply. The Cholesky version is faster but somewhat less accurate than that QR version. The skeleton of a distributed predictor type is in the code -but not yet fully fleshed out. Because Julia by default uses +but not yet fully fleshed out. Because Julia by default uses OpenBLAS, which is already multi-threaded on multicore machines, there may not be much advantage in using distributed predictor types. -A ```ModResp``` type must provide methods for the ```wtres``` and -```sqrtxwts``` generics. Their values are the arguments to the -```updatebeta``` methods of the ```LinPred``` types. The -```Float64``` value returned by ```updatedelta``` is the value of the +A `ModResp` type must provide methods for the `wtres` and +`sqrtxwts` generics. Their values are the arguments to the +`updatebeta` methods of the `LinPred` types. The +`Float64` value returned by `updatedelta` is the value of the convergence criterion. -Similarly, ```LinPred``` types must provide a method for the -```linpred``` generic. In general ```linpred``` takes an instance of -a ```LinPred``` type and a step factor. Methods that take only an instance -of a ```LinPred``` type use a default step factor of 1. The value of -```linpred``` is the argument to the ```updatemu``` method for -```ModResp``` types. The ```updatemu``` method returns the updated +Similarly, `LinPred` types must provide a method for the +`linpred` generic. In general `linpred` takes an instance of +a `LinPred` type and a step factor. Methods that take only an instance +of a `LinPred` type use a default step factor of 1. The value of +`linpred` is the argument to the `updatemu` method for +`ModResp` types. The `updatemu` method returns the updated deviance. ## Debugging failed fits + In the rare cases when a fit of a generalized linear model fails, it can be useful to enable more output from the fitting steps. This can be done through the Julia logging mechanism by setting `ENV["JULIA_DEBUG"] = GLM`. Enabling debug output will result in ouput like the following + ```julia ┌ Debug: Iteration: 1, deviance: 5.129147109764238, diff.dev.:0.05057195315968688 └ @ GLM ~/.julia/dev/GLM/src/glmfit.jl:418 diff --git a/ext/GLMSparseArraysExt.jl b/ext/GLMSparseArraysExt.jl index 7e299f2f..375c1b9c 100644 --- a/ext/GLMSparseArraysExt.jl +++ b/ext/GLMSparseArraysExt.jl @@ -1,83 +1,119 @@ module GLMSparseArraysExt using GLM, LinearAlgebra, SparseArrays - +import GLM: AbstractWeights, UnitWeights, BlasReal, uweights +using LinearAlgebra: LinearAlgebra ## QR -mutable struct SparsePredQR{T,M<:SparseMatrixCSC,F} <: GLM.LinPred +mutable struct SparsePredQR{T,M<:SparseMatrixCSC,F,W<:AbstractWeights} <: GLM.LinPred X::M # model matrix beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} qr::F + wts::W scratch::M end -function SparsePredQR(X::SparseMatrixCSC{T}) where {T} + +function SparsePredQR(X::SparseMatrixCSC{T}, wts::AbstractWeights) where {T} # The one(float(T))* part is because of a promotion issue in SPQR.jl on Julia 1.9 - fqr = qr(sparse(one(float(T))*I, size(X)...)) - return SparsePredQR{eltype(X),typeof(X),typeof(fqr)}(X, - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - fqr, - similar(X)) + fqr = qr(sparse(one(float(T)) * I, size(X)...)) + return SparsePredQR{eltype(X),typeof(X),typeof(fqr),typeof(wts)}(X, + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + fqr, + wts, + similar(X)) end -GLM.qrpred(X::SparseMatrixCSC, pivot::Bool) = SparsePredQR(X) +function GLM.qrpred(X::SparseMatrixCSC, pivot::Bool, + wts::AbstractWeights=uweights(size(X, 1))) + return SparsePredQR(X, wts) +end function GLM.delbeta!(p::SparsePredQR{T}, r::Vector{T}, wt::Vector{T}) where {T} wtsqrt = sqrt.(wt) Wsqrt = Diagonal(wtsqrt) scr = mul!(p.scratch, Wsqrt, p.X) p.qr = qr(scr) - return p.delbeta = p.qr \ (Wsqrt*r) + return p.delbeta = p.qr \ (Wsqrt * r) end -function GLM.delbeta!(p::SparsePredQR{T}, r::Vector{T}) where {T} +function GLM.delbeta!(p::SparsePredQR{T,M,F,<:UnitWeights}, + r::Vector{T}) where {T<:BlasReal,M,F} p.qr = qr(p.X) return p.delbeta = p.qr \ r end +function GLM.delbeta!(p::SparsePredQR{T,M,F,<:AbstractWeights}, + r::Vector{T}) where {T<:BlasReal,M,F} + W = Diagonal(sqrt.(p.wts)) + p.qr = qr(W * p.X) + return p.delbeta = p.qr \ (W * r) +end + function GLM.inverse(x::SparsePredQR{T}) where {T} - Rinv = UpperTriangular(x.qr.R) \ Diagonal(ones(T, size(x.qr.R, 2))) - pinv = invperm(x.qr.pcol) - RinvRinvt = Rinv*Rinv' - return RinvRinvt[pinv, pinv] + rnk = GLM.linpred_rank(x) + ipiv = invperm(x.qr.pcol) + if rnk < size(x.X, 2) + ## rank deficient + Rinv = view(x.qr.R, 1:rnk, 1:rnk) \ Diagonal(ones(T, rnk)) + xinv = similar(Rinv, size(x.X, 2), size(x.X, 2)) + xinv[1:rnk, 1:rnk] .= Rinv * Rinv' + xinv[(rnk + 1):end, :] .= NaN + xinv[:, (rnk + 1):end] .= NaN + xinv = xinv[ipiv, ipiv] + else + Rinv = UpperTriangular(x.qr.R) \ Diagonal(ones(T, rnk)) + xinv = Rinv * Rinv' + xinv = xinv[ipiv, ipiv] + end + return xinv end ## Cholesky -mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C} <: GLM.LinPred +mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C,W<:AbstractWeights} <: GLM.LinPred X::M # model matrix Xt::M # X' beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} chol::C - scratch::M + wts::W + scratchm1::M end -function SparsePredChol(X::SparseMatrixCSC{T}) where {T} + +function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where {T} chol = cholesky(sparse(I, size(X, 2), size(X, 2))) - return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X, - X', - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - chol, - similar(X)) + return SparsePredChol{eltype(X),typeof(X),typeof(chol),typeof(wts)}(X, + X', + zeros(T, + size(X, 2)), + zeros(T, + size(X, 2)), + zeros(T, + size(X, 2)), + chol, + wts, + similar(X)) end -GLM.cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X) +function GLM.cholpred(X::SparseMatrixCSC, pivot::Bool=false, + wts::AbstractWeights=uweights(size(X, 1))) + return SparsePredChol(X, wts) +end function GLM.delbeta!(p::SparsePredChol{T}, r::Vector{T}, wt::Vector{T}) where {T} - scr = mul!(p.scratch, Diagonal(wt), p.X) - XtWX = p.Xt*scr - c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) + scr = mul!(p.scratchm1, Diagonal(wt), p.X) + XtWX = p.Xt * scr + c = cholesky!(p.chol, Symmetric(XtWX)) return p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end function GLM.delbeta!(p::SparsePredChol{T}, r::Vector{T}) where {T} - scr = p.scratch = p.X - XtWX = p.Xt*scr - c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) + scr = mul!(p.scratchm1, Diagonal(p.wts), p.X) + XtWX = p.Xt * scr + c = cholesky!(p.chol, Symmetric(XtWX)) return p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end @@ -88,7 +124,9 @@ function GLM.invchol(x::SparsePredChol) return cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) end - GLM.inverse(x::SparsePredChol) = GLM.invchol(x) +GLM.linpred_rank(p::SparsePredChol) = rank(sparse(p.chol)) +GLM.linpred_rank(p::SparsePredQR) = rank(p.qr) + end diff --git a/src/GLM.jl b/src/GLM.jl index b024e1d6..a38f55c6 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -1,5 +1,4 @@ module GLM - using Distributions, LinearAlgebra, Printf, Reexport, Statistics, StatsBase using LinearAlgebra: copytri!, QRCompactWY, Cholesky, CholeskyPivoted, BlasReal using Printf: @sprintf @@ -14,10 +13,11 @@ import Statistics: cor using StatsAPI import StatsBase: coef, coeftable, coefnames, confint, deviance, nulldeviance, dof, dof_residual, - loglikelihood, nullloglikelihood, nobs, stderror, vcov, + leverage, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, predict!, fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², - PValue + PValue, + fweights, pweights, aweights import SpecialFunctions: erfc, erfcinv, digamma, trigamma import StatsModels: hasintercept using Tables: Tables @@ -62,12 +62,16 @@ export devresid, # vector of squared deviance residuals formula, # extract the formula from a model glm, # general interface + leverage, # leverage linpred, # linear predictor lm, # linear model negbin, # interface to fitting negative binomial regression nobs, # total number of observations predict, # make predictions - ftest # compare models with an F test + ftest, # compare models with an F test + fweights, + pweights, + aweights const FP = AbstractFloat const FPVector{T<:FP} = AbstractArray{T,1} @@ -101,15 +105,17 @@ const COMMON_FIT_KWARGS_DOCS = """ The Cholesky decomposition is faster and more computationally efficient than QR, but is less numerically stable and thus may fail or produce less accurate estimates for some models. - - `wts::Vector`: Prior frequency (a.k.a. case) weights of observations. - Such weights are equivalent to repeating each observation a number of times equal - to its weight. Do note that this interpretation gives equal point estimates but - different standard errors from analytical (a.k.a. inverse variance) weights and - from probability (a.k.a. sampling) weights which are the default in some other - software. - Can be length 0 to indicate no weighting (default). + - `wts::AbstractWeights`: Weights of observations. + The weights can be of type `AnalyticWeights`, `FrequencyWeights`, + `ProbabilityWeights`, or `UnitWeights`. `AnalyticWeights` describe a non-random + relative importance (usually between 0 and 1) for each observation. These weights may + also be referred to as reliability weights, precision weights or inverse variance weights. + `FrequencyWeights` describe the number of times (or frequency) each observation was seen. + `ProbabilityWeights` represent the inverse of the sampling probability for each observation, + providing a correction mechanism for under- or over-sampling certain population groups. `UnitWeights` + (default) describes the case in which all weights are equal to 1 (so no weighting takes place) - `contrasts::AbstractDict{Symbol}`: a `Dict` mapping term names - (as `Symbol`s) to term types (e.g. `ContinuousTerm`) or contrasts + (as `Symbol`s) to term types (e.g., `ContinuousTerm`) or contrasts (e.g., `HelmertCoding()`, `SeqDiffCoding(; levels=["a", "b", "c"])`, etc.). If contrasts are not provided for a variable, the appropriate term type will be guessed based on the data type from the data column: diff --git a/src/ftest.jl b/src/ftest.jl index 1077d5bd..7ddeca36 100644 --- a/src/ftest.jl +++ b/src/ftest.jl @@ -28,12 +28,12 @@ function StatsModels.isnested(mod1::LinPredModel, mod2::LinPredModel; atol::Real rtol = Base.rtoldefault(typeof(pred1[1, 1])) nresp = size(pred2, 1) return norm(view(qr(pred2).Q'pred1, (npreds2 + 1):nresp, :)) <= - max(atol, rtol*norm(pred1)) + max(atol, rtol * norm(pred1)) end -_diffn(t::NTuple{N,T}) where {N,T} = ntuple(i->t[i]-t[i+1], N-1) +_diffn(t::NTuple{N,T}) where {N,T} = ntuple(i -> t[i] - t[i + 1], N - 1) -_diff(t::NTuple{N,T}) where {N,T} = ntuple(i->t[i+1]-t[i], N-1) +_diff(t::NTuple{N,T}) where {N,T} = ntuple(i -> t[i + 1] - t[i], N - 1) """ ftest(mod::LinearModel) @@ -57,7 +57,10 @@ F-statistic: 241.62 on 12 observations and 1 degrees of freedom, p-value: <1e-07 function ftest(mod::LinearModel) hasintercept(mod) || throw(ArgumentError("ftest only works for models with an intercept")) - + wts = weights(mod) + if wts isa ProbabilityWeights + throw(ArgumentError("`ftest` for probability weighted models is not currently supported.")) + end rss = deviance(mod) tss = nulldeviance(mod) @@ -136,15 +139,15 @@ function ftest(mods::LinearModel...; atol::Real=0.0) forward = length(mods) == 1 || dof(mods[1]) <= dof(mods[2]) if forward for i in 2:length(mods) - if dof(mods[i-1]) >= dof(mods[i]) || - !StatsModels.isnested(mods[i-1], mods[i]; atol=atol) + if dof(mods[i - 1]) >= dof(mods[i]) || + !StatsModels.isnested(mods[i - 1], mods[i]; atol=atol) throw(ArgumentError("F test is only valid for nested models")) end end else for i in 2:length(mods) - if dof(mods[i]) >= dof(mods[i-1]) || - !StatsModels.isnested(mods[i], mods[i-1]; atol=atol) + if dof(mods[i]) >= dof(mods[i - 1]) || + !StatsModels.isnested(mods[i], mods[i - 1]; atol=atol) throw(ArgumentError("F test is only valid for nested models")) end end @@ -185,7 +188,7 @@ function show(io::IO, ftr::FTestResult{N}) where {N} nc = 9 nr = N - outrows = Matrix{String}(undef, nr+1, nc) + outrows = Matrix{String}(undef, nr + 1, nc) outrows[1, :] = ["", "DOF", "ΔDOF", "SSR", "ΔSSR", "R²", "ΔR²", "F*", "p(>F)"] @@ -200,15 +203,15 @@ function show(io::IO, ftr::FTestResult{N}) where {N} r2vals[1], " ", " ", " "] for i in 2:nr - outrows[i+1, :] = ["[$i]", - @sprintf("%.0d", ftr.dof[i]), @sprintf("%.0d", Δdof[i-1]), - @sprintf("%.4f", ftr.ssr[i]), @sprintf("%.4f", Δssr[i-1]), - r2vals[i], @sprintf("%.4f", ΔR²[i-1]), - @sprintf("%.4f", ftr.fstat[i]), string(PValue(ftr.pval[i]))] + outrows[i + 1, :] = ["[$i]", + @sprintf("%.0d", ftr.dof[i]), @sprintf("%.0d", Δdof[i - 1]), + @sprintf("%.4f", ftr.ssr[i]), @sprintf("%.4f", Δssr[i - 1]), + r2vals[i], @sprintf("%.4f", ΔR²[i - 1]), + @sprintf("%.4f", ftr.fstat[i]), string(PValue(ftr.pval[i]))] end colwidths = length.(outrows) max_colwidths = [maximum(view(colwidths, :, i)) for i in 1:nc] - totwidth = sum(max_colwidths) + 2*8 + totwidth = sum(max_colwidths) + 2 * 8 println(io, "F-test: $N models fitted on $(ftr.nobs) observations") println(io, '─'^totwidth) @@ -218,9 +221,9 @@ function show(io::IO, ftr::FTestResult{N}) where {N} cur_cell = outrows[r, c] cur_cell_len = length(cur_cell) - padding = " "^(max_colwidths[c]-cur_cell_len) + padding = " "^(max_colwidths[c] - cur_cell_len) if c > 1 - padding = " "*padding + padding = " " * padding end print(io, padding) @@ -231,3 +234,7 @@ function show(io::IO, ftr::FTestResult{N}) where {N} end return print(io, '─'^totwidth) end + +function ftest(r::LinearModel{T,<:ProbabilityWeights}) where {T} + throw(ArgumentError("`ftest` for probability weighted models is not currently supported.")) +end diff --git a/src/glmfit.jl b/src/glmfit.jl index 3bb7cd76..d13b20f2 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -3,7 +3,7 @@ The response vector and various derived vectors in a generalized linear model. """ -struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp +struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link,W<:AbstractWeights} <: ModResp "`y`: response vector" y::V d::D @@ -17,15 +17,17 @@ struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp mu::V "`offset:` offset added to `Xβ` to form `eta`. Can be of length 0" offset::V - "`wts:` prior case weights. Can be of length 0." - wts::V + "`wts`: prior case weights" + wts::W "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm" wrkwt::V "`wrkresid`: working residuals for IRLS" wrkresid::V end -function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVector,D,L} +link(rr::GlmResp) = rr.d + +function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVector,D,L,W} n = length(y) nη = length(η) nμ = length(μ) @@ -35,41 +37,55 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVec # Check y values checky(y, d) + ## We don't support custom types of weights that a user may define + if !(wts isa Union{FrequencyWeights,AnalyticWeights,ProbabilityWeights,UnitWeights}) + throw(ArgumentError("The type of `wts` was $W. The supported weights types are " * + "`FrequencyWeights`, `AnalyticWeights`, `ProbabilityWeights` and `UnitWeights`.")) + end + # Lengths of y, η, and η all need to be n if !(nη == nμ == n) throw(DimensionMismatch("lengths of η, μ, and y ($nη, $nμ, $n) are not equal")) end # Lengths of wts and off can be either n or 0 - if lw != 0 && lw != n - throw(DimensionMismatch("wts must have length $n or length 0 but was $lw")) + if lw != n + throw(DimensionMismatch("wts must have length $n but was $lw")) end if lo != 0 && lo != n throw(DimensionMismatch("offset must have length $n or length 0 but was $lo")) end - return GlmResp{V,D,L}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) + return GlmResp{V,D,L,W}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) end -function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVector) +function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractWeights) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) _off = convert(Vector{float(eltype(off))}, off) - _wts = convert(Vector{float(eltype(wts))}, wts) η = similar(_y) μ = similar(_y) - r = GlmResp(_y, d, l, η, μ, _off, _wts) - initialeta!(r.eta, d, l, _y, _wts, _off) + r = GlmResp(_y, d, l, η, μ, _off, wts) + initialeta!(r.eta, d, l, _y, wts, _off) updateμ!(r, r.eta) return r end function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, - wts::AbstractVector{<:Real}) where {D,L} - return GlmResp(float(y), d, l, float(off), float(wts)) + wts::AbstractWeights) where {D,L} + return GlmResp(float(y), d, l, float(off), wts) +end + +function deviance(r::GlmResp) + wts = weights(r) + d = sum(r.devresid) + return wts isa ProbabilityWeights ? d * nobs(r) / sum(wts) : d end -deviance(r::GlmResp) = sum(r.devresid) +weights(r::GlmResp) = r.wts +function isweighted(r::GlmResp) + return weights(r) isa Union{AnalyticWeights,FrequencyWeights,ProbabilityWeights} +end """ cancancel(r::GlmResp{V,D,L}) @@ -97,14 +113,14 @@ function updateμ! end function updateμ!(r::GlmResp{T}, linPr::T) where {T<:FPVector} isempty(r.offset) ? copyto!(r.eta, linPr) : broadcast!(+, r.eta, linPr, r.offset) updateμ!(r) - if !isempty(r.wts) - map!(*, r.devresid, r.devresid, r.wts) + if isweighted(r) map!(*, r.wrkwt, r.wrkwt, r.wts) + map!(*, r.devresid, r.devresid, r.wts) end return r end -function updateμ!(r::GlmResp{V,D,L}) where {V<:FPVector,D,L} +function updateμ!(r::GlmResp) y, η, μ, wrkres, wrkwt, dres = r.y, r.eta, r.mu, r.wrkresid, r.wrkwt, r.devresid @inbounds for i in eachindex(y, η, μ, wrkres, wrkwt, dres) @@ -125,7 +141,7 @@ function _weights_residuals(yᵢ, ηᵢ, μᵢ, omμᵢ, dμdηᵢ, l::LogitLink else wrkresᵢ = ifelse(yᵢ == 1, omμᵢ, yᵢ - μᵢ) / dμdηᵢ end - wrkwtᵢ = μᵢ*omμᵢ + wrkwtᵢ = μᵢ * omμᵢ return wrkresᵢ, wrkwtᵢ end @@ -133,13 +149,13 @@ end function _weights_residuals(yᵢ, ηᵢ, μᵢ, omμᵢ, dμdηᵢ, l::ProbitLink) # Since μomμ will underflow before dμdη for Probit, we can just check the # former to decide when to evaluate with the tail approximation. - μomμᵢ = μᵢ*omμᵢ + μomμᵢ = μᵢ * omμᵢ if iszero(μomμᵢ) - wrkresᵢ = 1/abs(ηᵢ) + wrkresᵢ = 1 / abs(ηᵢ) wrkwtᵢ = dμdηᵢ else wrkresᵢ = ifelse(yᵢ == 1, omμᵢ, yᵢ - μᵢ) / dμdηᵢ - wrkwtᵢ = abs2(dμdηᵢ)/μomμᵢ + wrkwtᵢ = abs2(dμdηᵢ) / μomμᵢ end return wrkresᵢ, wrkwtᵢ @@ -157,11 +173,11 @@ function _weights_residuals(yᵢ, ηᵢ, μᵢ, omμᵢ, dμdηᵢ, l::CloglogLi # converges to -1 wrkresᵢ = -one(emη) else - wrkresᵢ = (yᵢ - μᵢ)/omμᵢ*emη + wrkresᵢ = (yᵢ - μᵢ) / omμᵢ * emη end end - wrkwtᵢ = exp(2*ηᵢ)/expm1(exp(ηᵢ)) + wrkwtᵢ = exp(2 * ηᵢ) / expm1(exp(ηᵢ)) # We know that both limits are zero so we'll convert NaNs wrkwtᵢ = ifelse(isnan(wrkwtᵢ), zero(wrkwtᵢ), wrkwtᵢ) @@ -170,8 +186,8 @@ end # Fallback for remaining link functions function _weights_residuals(yᵢ, ηᵢ, μᵢ, omμᵢ, dμdηᵢ, l::Link01) - wrkresᵢ = ifelse(yᵢ == 1, omμᵢ, yᵢ - μᵢ)/dμdηᵢ - wrkwtᵢ = abs2(dμdηᵢ)/(μᵢ*omμᵢ) + wrkresᵢ = ifelse(yᵢ == 1, omμᵢ, yᵢ - μᵢ) / dμdηᵢ + wrkwtᵢ = abs2(dμdηᵢ) / (μᵢ * omμᵢ) return wrkresᵢ, wrkwtᵢ end @@ -241,8 +257,7 @@ end function GeneralizedLinearModel(rr::GlmResp, pp::LinPred, f::Union{FormulaTerm,Nothing}, fit::Bool) - return GeneralizedLinearModel(rr, pp, f, fit, 0, NaN, - NaN, NaN) + return GeneralizedLinearModel(rr, pp, f, fit, 0, NaN, NaN, NaN) end function coeftable(mm::AbstractGLM; level::Real=0.95) @@ -250,10 +265,10 @@ function coeftable(mm::AbstractGLM; level::Real=0.95) se = stderror(mm) zz = cc ./ se p = 2 * ccdf.(Ref(Normal()), abs.(zz)) - ci = se*quantile(Normal(), (1-level)/2) - levstr = isinteger(level*100) ? string(Integer(level*100)) : string(level*100) + ci = se * quantile(Normal(), (1 - level) / 2) + levstr = isinteger(level * 100) ? string(Integer(level * 100)) : string(level * 100) cn = coefnames(mm) - return CoefTable(hcat(cc, se, zz, p, cc+ci, cc-ci), + return CoefTable(hcat(cc, se, zz, p, cc + ci, cc - ci), ["Coef.", "Std. Error", "z", "Pr(>|z|)", "Lower $levstr%", "Upper $levstr%"], cn, 4, 3) @@ -261,33 +276,36 @@ end function confint(obj::AbstractGLM; level::Real=0.95) return hcat(coef(obj), coef(obj)) + - stderror(obj)*quantile(Normal(), (1.0 - level)/2.0)*[1.0 -1.0] + stderror(obj) * quantile(Normal(), (1.0 - level) / 2.0) * [1.0 -1.0] end deviance(m::AbstractGLM) = deviance(m.rr) function nulldeviance(m::GeneralizedLinearModel) r = m.rr - wts = weights(r.wts) + wts = weights(r) y = r.y d = r.d offset = r.offset hasint = hasintercept(m) dev = zero(eltype(y)) if isempty(offset) # Faster method - if !isempty(wts) + if isweighted(m) mu = hasint ? mean(y, wts) : - linkinv(r.link, zero(eltype(y))*zero(eltype(wts))/1) + linkinv(r.link, zero(eltype(y)) * zero(eltype(wts)) / 1) @inbounds for i in eachindex(y, wts) dev += wts[i] * devresid(d, y[i], mu) end else - mu = hasint ? mean(y) : linkinv(r.link, zero(eltype(y))/1) + mu = hasint ? mean(y) : linkinv(r.link, zero(eltype(y)) / 1) @inbounds for i in eachindex(y) dev += devresid(d, y[i], mu) end end + if wts isa ProbabilityWeights + dev /= sum(wts) / nobs(m) + end else X = fill(1.0, length(y), hasint ? 1 : 0) nullm = fit(GeneralizedLinearModel, @@ -303,46 +321,59 @@ end function loglikelihood(m::AbstractGLM) r = m.rr - wts = r.wts y = r.y mu = r.mu - d = r.d + wts = weights(r) + d = link(r) ll = zero(eltype(mu)) - if !isempty(wts) - ϕ = deviance(m)/sum(wts) - @inbounds for i in eachindex(y, mu, wts) + n = nobs(r) + N = length(y) + δ = deviance(r) + ϕ = δ / n + if wts isa Union{FrequencyWeights,UnitWeights} + @inbounds for i in eachindex(y, mu) ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) end - else - ϕ = deviance(m)/length(y) - @inbounds for i in eachindex(y, mu) - ll += loglik_obs(d, y[i], mu[i], 1, ϕ) + elseif wts isa AnalyticWeights + if d isa Union{Bernoulli,Binomial} + throw(ArgumentError("The `loglikelihood` for analytic weighted models with `Bernoulli` and `Binomial` families is not supported.")) end + @inbounds for i in eachindex(y, mu, wts) + ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], δ, sum(wts), N) + end + else + throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) end return ll end function nullloglikelihood(m::GeneralizedLinearModel) r = m.rr - wts = r.wts + wts = weights(m) + sumwt = sum(wts) y = r.y d = r.d offset = r.offset hasint = hasintercept(m) ll = zero(eltype(y)) if isempty(r.offset) # Faster method - if !isempty(wts) - mu = hasint ? mean(y, weights(wts)) : linkinv(r.link, zero(ll)/1) - ϕ = nulldeviance(m)/sum(wts) + mu = hasint ? mean(y, wts) : linkinv(r.link, zero(ll) / 1) + δ = nulldeviance(m) + ϕ = nulldeviance(m) / nobs(m) + N = length(y) + if wts isa Union{FrequencyWeights,UnitWeights} @inbounds for i in eachindex(y, wts) ll += loglik_obs(d, y[i], mu, wts[i], ϕ) end - else - mu = hasint ? mean(y) : linkinv(r.link, zero(ll)/1) - ϕ = nulldeviance(m)/length(y) - @inbounds for i in eachindex(y) - ll += loglik_obs(d, y[i], mu, 1, ϕ) + elseif wts isa AnalyticWeights + if d isa Union{Bernoulli,Binomial} + throw(ArgumentError("The `nullloglikelihood` for analytic weighted models with `Bernoulli` and `Binomial` families is not supported.")) end + @inbounds for i in eachindex(y, mu, wts) + ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], δ, sum(wts), N) + end + else + throw(ArgumentError("The `nullloglikelihood` for probability weighted models is not currently supported.")) end else X = fill(1.0, length(y), hasint ? 1 : 0) @@ -361,7 +392,6 @@ dof(obj::GeneralizedLinearModel) = linpred_rank(obj) + dispersion_parameter(obj. function _fit!(m::AbstractGLM, maxiter::Integer, minstepfac::Real, atol::Real, rtol::Real, start) - # Return early if model has the fit flag set m.fit && return m @@ -374,7 +404,7 @@ function _fit!(m::AbstractGLM, maxiter::Integer, minstepfac::Real, lp = r.mu # Initialize β, μ, and compute deviance - if start == nothing || isempty(start) + if start === nothing || isempty(start) # Compute beta update based on default response value # if no starting values have been passed delbeta!(p, wrkresp(r), r.wrkwt) @@ -412,7 +442,7 @@ function _fit!(m::AbstractGLM, maxiter::Integer, minstepfac::Real, ## If the deviance isn't declining then half the step size ## The rtol*dev term is to avoid failure when deviance ## is unchanged except for rouding errors. - while !isfinite(dev) || dev > devold + rtol*dev + while !isfinite(dev) || dev > devold + rtol * dev f /= 2 f > minstepfac || error("step-halving failed at beta0 = $(p.beta0)") try @@ -425,8 +455,8 @@ function _fit!(m::AbstractGLM, maxiter::Integer, minstepfac::Real, p.beta0 .+= p.delbeta .* f # Test for convergence - @debug "IRLS optimization" iteration=i deviance=dev diff_dev=(devold - dev) - if devold - dev < max(rtol*devold, atol) + @debug "IRLS optimization" iteration = i deviance = dev diff_dev = (devold - dev) + if devold - dev < max(rtol * devold, atol) cvg = true break end @@ -464,7 +494,7 @@ end const FIT_GLM_DOC = """ In the first method, `formula` must be a [StatsModels.jl `Formula` object](https://juliastats.org/StatsModels.jl/stable/formula/) - and `data` a table (in the [Tables.jl](https://tables.juliadata.org/stable/) definition, e.g. a data frame). + and `data` a table (in the [Tables.jl](https://tables.juliadata.org/stable/) definition, e.g., a data frame). In the second method, `X` must be a matrix holding values of the independent variable(s) in columns (including if appropriate the intercept), and `y` must be a vector holding values of the dependent variable. @@ -503,21 +533,30 @@ function fit(::Type{M}, l::Link=canonicallink(d); dropcollinear::Bool=true, method::Symbol=:qr, - wts::AbstractVector{<:Real}=similar(y, 0), + wts::AbstractWeights=uweights(length(y)), offset::AbstractVector{<:Real}=similar(y, 0), fitargs...) where {M<:AbstractGLM} - # Check that X and y have the same number of observations if size(X, 1) != size(y, 1) throw(DimensionMismatch("number of rows in X and y must match")) end - rr = GlmResp(y, d, l, offset, wts) + # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights + _wts = convert_weights(wts) + if !(wts isa AbstractWeights) && isempty(_wts) + Base.depwarn("Using `wts` of zero length for unweighted regression is deprecated in favor of " * + "explicitly using `UnitWeights(length(y))`." * + " Proceeding by coercing `wts` to UnitWeights of size $(length(y)).", + :fit) + _wts = uweights(length(y)) + end + + rr = GlmResp(y, d, l, offset, _wts) if method === :cholesky - res = M(rr, cholpred(X, dropcollinear), nothing, false) + res = M(rr, cholpred(X, dropcollinear, _wts), nothing, false) elseif method === :qr - res = M(rr, qrpred(X, dropcollinear), nothing, false) + res = M(rr, qrpred(X, dropcollinear, _wts), nothing, false) else throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) end @@ -530,8 +569,7 @@ function fit(::Type{M}, y::AbstractVector, d::UnivariateDistribution, l::Link=canonicallink(d); kwargs...) where {M<:AbstractGLM} - return fit(M, float(X), float(y), d, - l; kwargs...) + return fit(M, float(X), float(y), d, l; kwargs...) end function fit(::Type{M}, @@ -546,20 +584,28 @@ function fit(::Type{M}, contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}(), fitargs...) where {M<:AbstractGLM} f, (y, X) = modelframe(f, data, contrasts, M) - + wts = wts === nothing ? uweights(length(y)) : wts + _wts = convert_weights(wts) + if !(wts isa AbstractWeights) && isempty(_wts) + Base.depwarn("Using `wts` of zero length for unweighted regression is deprecated in favor of " * + "explicitly using `UnitWeights(length(y))`." * + " Proceeding by coercing `wts` to UnitWeights of size $(length(y)).", + :fit) + _wts = uweights(length(y)) + end # Check that X and y have the same number of observations if size(X, 1) != size(y, 1) throw(DimensionMismatch("number of rows in X and y must match")) end off = offset === nothing ? similar(y, 0) : offset - wts = wts === nothing ? similar(y, 0) : wts - rr = GlmResp(y, d, l, off, wts) + + rr = GlmResp(y, d, l, off, _wts) if method === :cholesky - res = M(rr, cholpred(X, dropcollinear), f, false) + res = M(rr, cholpred(X, dropcollinear, _wts), f, false) elseif method === :qr - res = M(rr, qrpred(X, dropcollinear), f, false) + res = M(rr, qrpred(X, dropcollinear, _wts), f, false) else throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) end @@ -660,7 +706,7 @@ $PREDICT_COMMON """ function predict!(res::Union{AbstractVector, NamedTuple{(:prediction, :lower, :upper), - <: NTuple{3,AbstractVector}}}, + <:NTuple{3,AbstractVector}}}, mm::AbstractGLM, newX::AbstractMatrix; offset::FPVector=eltype(newX)[], interval::Union{Symbol,Nothing}=nothing, @@ -689,12 +735,12 @@ function predict!(res::Union{AbstractVector, length(mu) == length(lower) == length(upper) == size(newX, 1) || throw(DimensionMismatch("length of vectors in `res` must equal the number of rows in `newX`")) mu .= linkinv.(Link(mm), eta) - normalquantile = quantile(Normal(), (1 + level)/2) + normalquantile = quantile(Normal(), (1 + level) / 2) # Compute confidence intervals in two steps # (2nd step varies depending on `interval_method`) # 1. Estimate variance for eta based on variance for coefficients # through the diagonal of newX*vcov(mm)*newX' - vcovXnewT = vcov(mm)*newX' + vcovXnewT = vcov(mm) * newX' stdeta = [sqrt(dot(view(newX, i, :), view(vcovXnewT, :, i))) for i in axes(newX, 1)] if interval_method == :delta @@ -721,25 +767,12 @@ function initialeta!(eta::AbstractVector, dist::UnivariateDistribution, link::Link, y::AbstractVector, - wts::AbstractVector, + wts::AbstractWeights, off::AbstractVector) n = length(y) - lw = length(wts) lo = length(off) - if lw == n - @inbounds @simd for i in eachindex(y, eta, wts) - μ = mustart(dist, y[i], wts[i]) - eta[i] = linkfun(link, μ) - end - elseif lw == 0 - @inbounds @simd for i in eachindex(y, eta) - μ = mustart(dist, y[i], 1) - eta[i] = linkfun(link, μ) - end - else - throw(ArgumentError("length of wts must be either $n or 0 but was $lw")) - end + _initialeta!(eta, dist, link, y, wts) if lo == n @inbounds @simd for i in eachindex(eta, off) @@ -752,6 +785,24 @@ function initialeta!(eta::AbstractVector, return eta end +function _initialeta!(eta::AbstractVector, + dist::UnivariateDistribution, + link::Link, + y::AbstractVector, + wts::AbstractWeights) + if wts isa UnitWeights + @inbounds @simd for i in eachindex(y, eta) + μ = mustart(dist, y[i], 1) + eta[i] = linkfun(link, μ) + end + else + @inbounds @simd for i in eachindex(y, eta, wts) + μ = mustart(dist, y[i], wts[i]) + eta[i] = linkfun(link, μ) + end + end +end + # Helper function to check that the values of y are in the allowed domain function checky(y, d::Distribution) if any(x -> !insupport(d, x), y) @@ -765,3 +816,67 @@ function checky(y, d::Binomial) end return nothing end + +function nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:AbstractWeights} + return oftype(sum(one(eltype(weights(r)))), length(r.y)) +end +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = sum(r.wts) + +function residuals(r::GlmResp; weighted::Bool=false) + y, η, μ = r.y, r.eta, r.mu + dres = similar(μ) + + @inbounds for i in eachindex(y, μ) + μi = μ[i] + yi = y[i] + dres[i] = sqrt(max(0, devresid(r.d, yi, μi))) * sign(yi - μi) + end + + if weighted + dres .*= sqrt.(weights(r)) + end + + return dres +end + +function momentmatrix(m::GeneralizedLinearModel) + X = modelmatrix(m; weighted=false) + r = varstruct(m) + if link(m) isa Union{Gamma,InverseGaussian} + r .*= sum(working_weights(m)) / sum(abs2, r) + end + return Diagonal(r) * X +end + +function varstruct(x::GeneralizedLinearModel) + wrkwts = working_weights(x) + wts = weights(x) + wrkres = working_residuals(x) + if wts isa ProbabilityWeights + return wrkwts .* wrkres .* (nobs(x) / sum(wts)) + else + return wrkwts .* wrkres + end +end + +function invloglikhessian(m::GeneralizedLinearModel) + r = varstruct(m) + wts = weights(m) + return inverse(m.pp) * sum(wts) / nobs(m) +end + +function StatsBase.cooksdistance(m::GeneralizedLinearModel) + h = leverage(m) + hh = h ./ (1 .- h) .^ 2 + Rp = pearson_residuals(m) + return (Rp .^ 2) .* hh ./ (dispersion(m)^2 * dof(m)) +end + +function pearson_residuals(m::GeneralizedLinearModel) + y = m.rr.y + μ = predict(m) + v = glmvar.(m.rr.d, μ) + w = weights(m) + r = ((y .- μ) .* sqrt.(w)) ./ sqrt.(v) + return r +end diff --git a/src/glmtools.jl b/src/glmtools.jl index ee845acc..b8556eca 100644 --- a/src/glmtools.jl +++ b/src/glmtools.jl @@ -198,12 +198,12 @@ LogitLink() """ function canonicallink end -linkfun(::CauchitLink, μ::Real) = tan(pi * (μ - oftype(μ, 1/2))) -linkinv(::CauchitLink, η::Real) = oftype(η, 1/2) + atan(η) / pi +linkfun(::CauchitLink, μ::Real) = tan(pi * (μ - oftype(μ, 1 / 2))) +linkinv(::CauchitLink, η::Real) = oftype(η, 1 / 2) + atan(η) / pi function inverselink(::CauchitLink, η::Real) # atan decays so slowly that we don't need to be careful when evaluating μ μ = atan(η) / π - μ += one(μ)/2 + μ += one(μ) / 2 return μ, inv(π * (1 + abs2(η))), 1 - μ end @@ -337,7 +337,7 @@ glmvar(::Union{Bernoulli,Binomial}, μ::Real) = μ * (1 - μ) glmvar(::Gamma, μ::Real) = abs2(μ) glmvar(::Geometric, μ::Real) = μ * (1 + μ) glmvar(::InverseGaussian, μ::Real) = μ^3 -glmvar(d::NegativeBinomial, μ::Real) = μ * (1 + μ/d.r) +glmvar(d::NegativeBinomial, μ::Real) = μ * (1 + μ / d.r) glmvar(::Normal, μ::Real) = one(μ) glmvar(::Poisson, μ::Real) = μ @@ -370,11 +370,11 @@ true """ function mustart end -mustart(::Bernoulli, y, wt) = (y + oftype(y, 1/2)) / 2 -mustart(::Binomial, y, wt) = (wt * y + oftype(y, 1/2)) / (wt + one(y)) +mustart(::Bernoulli, y, wt) = (y + oftype(y, 1 / 2)) / 2 +mustart(::Binomial, y, wt) = (wt * y + oftype(y, 1 / 2)) / (wt + one(y)) function mustart(::Union{Gamma,InverseGaussian}, y, wt) fy = float(y) - return iszero(y) ? oftype(y, 1/10) : fy + return iszero(y) ? oftype(y, 1 / 10) : fy end function mustart(::Geometric, y, wt) fy = float(y) @@ -382,12 +382,12 @@ function mustart(::Geometric, y, wt) end function mustart(::NegativeBinomial, y, wt) fy = float(y) - return iszero(y) ? fy + oftype(fy, 1/6) : fy + return iszero(y) ? fy + oftype(fy, 1 / 6) : fy end mustart(::Normal, y, wt) = y function mustart(::Poisson, y, wt) fy = float(y) - return fy + oftype(fy, 1/10) + return fy + oftype(fy, 1 / 10) end """ @@ -430,7 +430,7 @@ function devresid(::Binomial, y, μ::Real) elseif y == 0 return -2 * log1p(-μ) else - return 2 * (y * (log(y) - log(μ)) + (1 - y)*(log1p(-y) - log1p(-μ))) + return 2 * (y * (log(y) - log(μ)) + (1 - y) * (log1p(-y) - log1p(-μ))) end end devresid(::Gamma, y, μ::Real) = -2 * (log(y / μ) - (y - μ) / μ) @@ -442,7 +442,7 @@ devresid(::InverseGaussian, y, μ::Real) = abs2(y - μ) / (y * abs2(μ)) function devresid(d::NegativeBinomial, y, μ::Real) μ == 0 && return convert(float(promote_type(typeof(μ), typeof(y))), NaN) θ = d.r - return 2 * (xlogy(y, y / μ) + xlogy(y + θ, (μ + θ)/(y + θ))) + return 2 * (xlogy(y, y / μ) + xlogy(y + θ, (μ + θ) / (y + θ))) end devresid(::Normal, y, μ::Real) = abs2(y - μ) devresid(::Poisson, y, μ::Real) = 2 * (xlogy(y, y / μ) - (y - μ)) @@ -490,23 +490,44 @@ The loglikelihood of a fitted model is the sum of these values over all the obse """ function loglik_obs end -loglik_obs(::Bernoulli, y, μ, wt, ϕ) = wt*logpdf(Bernoulli(μ), y) -loglik_obs(::Binomial, y, μ, wt, ϕ) = logpdf(Binomial(Int(wt), μ), _safe_int(y*wt)) -loglik_obs(::Gamma, y, μ, wt, ϕ) = wt*logpdf(Gamma(inv(ϕ), μ*ϕ), y) +loglik_obs(::Bernoulli, y, μ, wt, ϕ) = wt * logpdf(Bernoulli(μ), y) +loglik_obs(::Binomial, y, μ, wt, ϕ) = logpdf(Binomial(Int(wt), μ), _safe_int(y * wt)) +loglik_obs(::Gamma, y, μ, wt, ϕ) = wt * logpdf(Gamma(inv(ϕ), μ * ϕ), y) # In Distributions.jl, a Geometric distribution characterizes the number of failures before # the first success in a sequence of independent Bernoulli trials with success rate p. # The mean of Geometric distribution is (1 - p) / p. # Hence, p = 1 / (1 + μ). loglik_obs(::Geometric, y, μ, wt, ϕ) = wt * logpdf(Geometric(1 / (μ + 1)), y) -loglik_obs(::InverseGaussian, y, μ, wt, ϕ) = wt*logpdf(InverseGaussian(μ, inv(ϕ)), y) -loglik_obs(::Normal, y, μ, wt, ϕ) = wt*logpdf(Normal(μ, sqrt(ϕ)), y) -loglik_obs(::Poisson, y, μ, wt, ϕ) = wt*logpdf(Poisson(μ), y) +loglik_obs(::InverseGaussian, y, μ, wt, ϕ) = wt * logpdf(InverseGaussian(μ, inv(ϕ)), y) +loglik_obs(::Normal, y, μ, wt, ϕ) = wt * logpdf(Normal(μ, sqrt(ϕ)), y) +loglik_obs(::Poisson, y, μ, wt, ϕ) = wt * logpdf(Poisson(μ), y) # We use the following parameterization for the Negative Binomial distribution: # (Γ(θ+y) / (Γ(θ) * y!)) * μ^y * θ^θ / (μ+θ)^{θ+y} # The parameterization of NegativeBinomial(r=θ, p) in Distributions.jl is # Γ(θ+y) / (y! * Γ(θ)) * p^θ(1-p)^y # Hence, p = θ/(μ+θ) function loglik_obs(d::NegativeBinomial, y, μ, wt, ϕ) - return wt*logpdf(NegativeBinomial(d.r, d.r/(μ+d.r)), - y) + return wt * logpdf(NegativeBinomial(d.r, d.r / (μ + d.r)), y) +end + +## Slight different interface for analytic and probability weights +## ϕ is the deviance - not the deviance/n nor sum(wt) +## sumwt is sum(wt) +## n is the number of observations + +function loglik_apweights_obs(::Gamma, y, μ, wt, ϕ, sumwt, n) + return wt * logpdf(Gamma(inv(ϕ / sumwt), μ * ϕ / sumwt), y) +end +function loglik_apweights_obs(::Geometric, y, μ, wt, ϕ, sumwt, n) + return wt * logpdf(Geometric(1 / (μ + 1)), y) +end +function loglik_apweights_obs(::InverseGaussian, y, μ, wt, ϕ, sumwt, n) + return -(wt * (1 + log(2π * (ϕ / sumwt))) + 3 * log(y) * wt) / 2 +end +function loglik_apweights_obs(::Normal, y, μ, wt, ϕ, sumwt, n) + return ((-log(2π * ϕ / n) - 1) + log(wt)) / 2 +end +loglik_apweights_obs(::Poisson, y, μ, wt, ϕ, sumwt, n) = wt * logpdf(Poisson(μ), y) +function loglik_apweights_obs(d::NegativeBinomial, y, μ, wt, ϕ, sumwt, n) + return wt * logpdf(NegativeBinomial(d.r, d.r / (μ + d.r)), y) end diff --git a/src/linpred.jl b/src/linpred.jl index 943e0130..cf213b87 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -33,29 +33,40 @@ A `LinPred` type with a dense QR decomposition of `X` - `qr`: either a `QRCompactWY` or `QRPivoted` object created from `X`, with optional row weights. - `scratchm1`: scratch Matrix{T} of the same size as `X` """ -mutable struct DensePredQR{T<:BlasReal,Q<:Union{QRCompactWY,QRPivoted}} <: DensePred +mutable struct DensePredQR{T<:BlasReal,Q<:Union{QRCompactWY,QRPivoted}, + W<:AbstractWeights} <: DensePred X::Matrix{T} # model matrix beta0::Vector{T} # base coefficient vector delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} qr::Q + wts::W scratchm1::Matrix{T} - function DensePredQR(X::AbstractMatrix, pivot::Bool=false) + function DensePredQR(X::AbstractMatrix, pivot::Bool, + wts::W) where {W<:AbstractWeights} n, p = size(X) T = typeof(float(zero(eltype(X)))) Q = pivot ? QRPivoted : QRCompactWY fX = float(X) - cfX = fX === X ? copy(fX) : fX + if wts isa UnitWeights + cfX = fX === X ? copy(fX) : fX + else + cfX = Diagonal(sqrt.(wts)) * fX + end F = pivot ? qr!(cfX, ColumnNorm()) : qr!(cfX) - return new{T,Q}(Matrix{T}(X), - zeros(T, p), - zeros(T, p), - zeros(T, p), - F, - similar(X, T)) + return new{T,Q,W}(Matrix{T}(X), + zeros(T, p), + zeros(T, p), + zeros(T, p), + F, + wts, + similar(X, T)) end end + +DensePredQR(X::AbstractMatrix) = DensePredQR(X, false, uweights(size(X, 1))) + """ delbeta!(p::LinPred, r::Vector) @@ -64,12 +75,13 @@ Evaluate and return `p.delbeta` the increment to the coefficient vector from res function delbeta! end function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}) where {T<:BlasReal} - p.delbeta = p.qr \ r + r̃ = p.wts isa UnitWeights ? r : sqrt.(p.wts) .* r + p.delbeta = p.qr \ r̃ return p end -function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}, - wt::Vector{T}) where {T<:BlasReal} +function delbeta!(p::DensePredQR{T,<:QRCompactWY,<:AbstractWeights}, r::Vector{T}, + wt::AbstractVector) where {T<:BlasReal} X = p.X wtsqrt = sqrt.(wt) sqrtW = Diagonal(wtsqrt) @@ -81,23 +93,23 @@ function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}, end function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}) where {T<:BlasReal} + r̃ = p.wts isa UnitWeights ? r : sqrt.(p.wts) .* r rnk = linpred_rank(p) if rnk == length(p.delbeta) - p.delbeta = p.qr \ r + p.delbeta = p.qr \ r̃ else R = UpperTriangular(view(parent(p.qr.R), 1:rnk, 1:rnk)) piv = p.qr.p fill!(p.delbeta, 0) - p.delbeta[1:rnk] = R \ view(p.qr.Q'r, 1:rnk) + p.delbeta[1:rnk] = R \ view(p.qr.Q' * r̃, 1:rnk) invpermute!(p.delbeta, piv) end return p end -function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}, - wt::Vector{T}) where {T<:BlasReal} +function delbeta!(p::DensePredQR{T,<:QRPivoted,<:AbstractWeights}, r::Vector{T}, + wt::AbstractVector{T}) where {T<:BlasReal} X = p.X - W = Diagonal(wt) wtsqrt = sqrt.(wt) sqrtW = Diagonal(wtsqrt) mul!(p.scratchm1, sqrtW, X) @@ -110,7 +122,7 @@ function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}, for k in (rnk + 1):length(p.delbeta) p.delbeta[k] = zero(T) end - p.delbeta[1:rnk] = R \ view(p.qr.Q'*r̃, 1:rnk) + p.delbeta[1:rnk] = R \ view(p.qr.Q' * r̃, 1:rnk) invpermute!(p.delbeta, p.qr.p) return p @@ -129,32 +141,52 @@ A `LinPred` type with a dense Cholesky factorization of `X'X` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method - `chol`: a `Cholesky` object created from `X'X`, possibly using row weights. - `scratchm1`: scratch Matrix{T} of the same size as `X` -- `scratchm2`: scratch Matrix{T} os the same size as `X'X` +- `scratchm2`: scratch Matrix{T} of the same size as `X'X` """ -mutable struct DensePredChol{T<:BlasReal,C} <: DensePred +mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractWeights} <: DensePred X::Matrix{T} # model matrix beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} chol::C + wts::W scratchm1::Matrix{T} scratchm2::Matrix{T} end -function DensePredChol(X::AbstractMatrix, pivot::Bool) - F = Hermitian(float(X'X)) - T = eltype(F) + +function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) + if wts isa UnitWeights + F = Hermitian(float(X'X)) + T = eltype(F) + scr = similar(X, T) + else + T = float(promote_type(eltype(wts), eltype(X))) + scr = similar(X, T) + mul!(scr, Diagonal(wts), X) + F = Hermitian(float(scr'X)) + end F = pivot ? cholesky!(F, RowMaximum(); tol=(-one(T)), check=false) : cholesky!(F) return DensePredChol(Matrix{T}(X), zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), F, - similar(X, T), + wts, + scr, similar(cholfactors(F))) end -cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot) -qrpred(X::AbstractMatrix, pivot::Bool=false) = DensePredQR(X, pivot) +function DensePredChol(X::AbstractMatrix, pivot::Bool) + return DensePredChol(X, pivot, uweights(size(X, 1))) +end + +function cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights=uweights(size(X, 1))) + return DensePredChol(X, pivot, wts) +end +function qrpred(X::AbstractMatrix, pivot::Bool=false, + wts::AbstractWeights=uweights(size(X, 1))) + return DensePredQR(X, pivot, wts) +end cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -165,14 +197,18 @@ function cholesky(p::DensePredChol{T}) where {T<:FP} return Cholesky(copy(cholfactors(c)), c.uplo, c.info) end -function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where {T<:BlasReal} - ldiv!(p.chol, mul!(p.delbeta, transpose(p.X), r)) +function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, + r::Vector{T}) where {T<:BlasReal} + X = p.wts isa UnitWeights ? p.X : mul!(p.scratchm1, Diagonal(p.wts), p.X) + ldiv!(p.chol, mul!(p.delbeta, transpose(X), r)) return p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where {T<:BlasReal} +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, + r::Vector{T}) where {T<:BlasReal} ch = p.chol - delbeta = mul!(p.delbeta, adjoint(p.X), r) + X = p.wts isa UnitWeights ? p.scratchm1 .= p.X : mul!(p.scratchm1, Diagonal(p.wts), p.X) + delbeta = mul!(p.delbeta, adjoint(X), r) rnk = linpred_rank(p) if rnk == length(delbeta) ldiv!(ch, delbeta) @@ -187,7 +223,7 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where {T< return p end -function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, +function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where {T<:BlasReal} scr = mul!(p.scratchm1, Diagonal(wt), p.X) cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U)) @@ -196,7 +232,7 @@ function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, return p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where {T<:BlasReal} piv = p.chol.p # inverse vector delbeta = p.delbeta @@ -229,32 +265,30 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, return p end -function invqr(p::DensePredQR{T,<: QRCompactWY}) where {T} +function invqr(p::DensePredQR{T,<:QRCompactWY,<:AbstractWeights}) where {T} Rinv = inv(p.qr.R) - return Rinv*Rinv' + return Rinv * Rinv' end -function invqr(p::DensePredQR{T,<: QRPivoted}) where {T} +function invqr(p::DensePredQR{T,<:QRPivoted,<:AbstractWeights}) where {T} rnk = linpred_rank(p) k = length(p.delbeta) + ipiv = invperm(p.qr.p) if rnk == k Rinv = inv(p.qr.R) - xinv = Rinv*Rinv' - ipiv = invperm(p.qr.p) - return xinv[ipiv, ipiv] + xinv = Rinv * Rinv' else Rsub = UpperTriangular(view(p.qr.R, 1:rnk, 1:rnk)) RsubInv = inv(Rsub) xinv = fill(convert(T, NaN), (k, k)) - xinv[1:rnk, 1:rnk] = RsubInv*RsubInv' - ipiv = invperm(p.qr.p) - return xinv[ipiv, ipiv] + xinv[1:rnk, 1:rnk] = RsubInv * RsubInv' end + return xinv[ipiv, ipiv] end invchol(x::DensePred) = inv(cholesky!(x)) -function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where {T} +function invchol(x::DensePredChol{T,<:CholeskyPivoted}) where {T} ch = x.chol rnk = linpred_rank(x) p = length(x.delbeta) @@ -272,7 +306,46 @@ end inverse(x::DensePred) = invchol(x) inverse(x::DensePredQR) = invqr(x) -vcov(x::LinPredModel) = rmul!(inverse(x.pp), dispersion(x, true)) +working_residuals(x::LinPredModel) = x.rr.wrkresid +working_weights(x::LinPredModel) = x.rr.wrkwt + +function vcov(x::LinPredModel) + if weights(x) isa ProbabilityWeights + ## n-1 degrees of freedom - This is coherent with the `R` package `survey`, + ## `STATA` uses n-k + s = nobs(x) / (nobs(x) - 1) + mm = momentmatrix(x) + A = invloglikhessian(x) + if link(x) isa Union{Gamma,InverseGaussian} + r = varstruct(x) + A ./= sum(working_weights(x)) / sum(abs2, r) + end + _vcov(x.pp, mm, A) .* s + else + rmul!(inverse(x.pp), dispersion(x, true)) + end +end + +link(x::LinPredModel) = link(x.rr) + +function _vcov(pp::LinPred, Z::AbstractMatrix, A::AbstractMatrix) + if linpred_rank(pp) < size(Z, 2) + nancols = [all(isnan, col) for col in eachcol(A)] + nnancols = .!nancols + idx, nidx = findall(nancols), findall(nnancols) + Zv = view(Z, :, nidx) + B = Zv'Zv + Av = view(A, nidx, nidx) + V = similar(A, (size(A)...)) + V[nidx, nidx] = Av * B * Av + V[idx, :] .= NaN + V[:, idx] .= NaN + else + B = Z'Z + V = A * B * A + end + return V +end function cor(x::LinPredModel) Σ = vcov(x) @@ -300,38 +373,83 @@ function modelframe(f::FormulaTerm, data, contrasts::AbstractDict, ::Type{M}) wh return f, modelcols(f, data) end -modelmatrix(obj::LinPredModel) = obj.pp.X +function modelmatrix(obj::LinPredModel; weighted::Bool=false) + return modelmatrix(obj.pp; weighted=weighted) +end + +function modelmatrix(pp::LinPred; weighted::Bool=false) + return weighted ? Diagonal(sqrt.(pp.wts)) * pp.X : pp.X +end + +function leverage(x::LinPredModel) + h = vec(leverage(x.pp)) + return working_weights(x) .* h +end + +function leverage(pp::DensePredChol{T,<:CholeskyPivoted}) where {T} + X = modelmatrix(pp) + rnk = rank(pp.chol) + A = inverse(pp) + p = pp.chol.p[1:rnk] + Xv = @view X[:, p] + Av = @view A[p, p] + return diag(Xv * Av * Xv') +end + +function leverage(pp::DensePredChol{T,<:Cholesky}) where {T} + X = modelmatrix(pp) + return sum(x -> x^2, X / pp.chol.U; dims=2) +end + +function leverage(pp::DensePredQR{T,<:QRPivoted}) where {T} + X = modelmatrix(pp) + rnk = linpred_rank(pp) + R = UpperTriangular(view(parent(pp.qr.R), 1:rnk, 1:rnk)) + return sum(x -> x^2, view(X, :, pp.qr.p[1:rnk]) / R; dims=2) +end + +function leverage(pp::DensePredQR{T,<:QRCompactWY}) where {T} + X = modelmatrix(pp) + return sum(x -> x^2, X / pp.qr.R; dims=2) +end + response(obj::LinPredModel) = obj.rr.y fitted(m::LinPredModel) = m.rr.mu predict(mm::LinPredModel) = fitted(mm) -residuals(obj::LinPredModel) = residuals(obj.rr) function StatsModels.formula(obj::LinPredModel) obj.formula === nothing && throw(ArgumentError("model was fitted without a formula")) return obj.formula end +function residuals(obj::LinPredModel; weighted::Bool=false) + return residuals(obj.rr; weighted=weighted) +end + """ nobs(obj::LinearModel) nobs(obj::GLM) -For linear and generalized linear models, returns the number of rows, or, -when prior weights are specified, the sum of weights. +For linear and generalized linear models, return the number of rows when +the model is unweighted or uses analytical or probability weights. +If the model uses frequency weights, return the sum of weights. """ -function nobs(obj::LinPredModel) - if isempty(obj.rr.wts) - oftype(sum(one(eltype(obj.rr.wts))), length(obj.rr.y)) - else - sum(obj.rr.wts) - end +nobs(obj::LinPredModel) = nobs(obj.rr) + +weights(m::LinPredModel) = weights(m.rr) +weights(pp::LinPred) = pp.wts + +isweighted(m::LinPredModel) = isweighted(m.pp) +function isweighted(pp::LinPred) + return weights(pp) isa Union{FrequencyWeights,AnalyticWeights,ProbabilityWeights} end coef(x::LinPred) = x.beta0 coef(obj::LinPredModel) = coef(obj.pp) function coefnames(x::LinPredModel) return x.formula === nothing ? ["x$i" for i in 1:length(coef(x))] : - coefnames(formula(x).rhs) + StatsModels.vectorize(coefnames(formula(x).rhs)) end dof_residual(obj::LinPredModel) = nobs(obj) - linpred_rank(obj) diff --git a/src/lm.jl b/src/lm.jl index 1b10fa4c..4fc4a1dd 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -7,39 +7,44 @@ Encapsulates the response for a linear model - `mu`: current value of the mean response vector or fitted value - `offset`: optional offset added to the linear predictor to form `mu` -- `wts`: optional vector of prior frequency (a.k.a. case) weights for observations +- `wts`: optional weights for observations (as `AbstractWeights`) - `y`: observed response vector Either or both `offset` and `wts` may be of length 0 """ -mutable struct LmResp{V<:FPVector} <: ModResp # response in a linear model +mutable struct LmResp{V<:FPVector,W<:AbstractWeights} <: ModResp # response in a linear model mu::V # mean response offset::V # offset added to linear predictor (may have length 0) - wts::V # prior weights (may have length 0) + wts::W # prior weights (may have length 0) y::V # response - function LmResp{V}(mu::V, off::V, wts::V, y::V) where {V} + function LmResp{V,W}(mu::V, off::V, wts::W, y::V) where {V,W} n = length(y) - length(mu) == n || error("mismatched lengths of mu and y") - ll = length(off) - ll == 0 || ll == n || error("length of offset is $ll, must be $n or 0") - ll = length(wts) - ll == 0 || ll == n || error("length of wts is $ll, must be $n or 0") - return new{V}(mu, off, wts, y) + nμ = length(mu) + lw = length(wts) + lo = length(off) + if nμ != n + throw(DimensionMismatch("lengths of `mu` and `y` ($nμ, $n) are not equal")) + end + + # Lengths of wts and off can be either n or 0 + if lw != n + throw(DimensionMismatch("`wts` must have length $n but was $lw")) + end + if lo != 0 && lo != n + throw(DimensionMismatch("offset must have length $n but was $lo")) + end + return new{V,W}(mu, off, wts, y) end end -function LmResp(y::AbstractVector{<:Real}, - wts::Union{Nothing,AbstractVector{<:Real}}=nothing) +function LmResp(y::AbstractVector{<:Real}, wts::AbstractWeights) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) - _wts = if wts === nothing - similar(_y, 0) - else - convert(Vector{float(eltype(wts))}, wts) - end - return LmResp{typeof(_y)}(zero(_y), zero(_y), _wts, _y) + return LmResp{typeof(_y),typeof(wts)}(zero(_y), zero(_y), wts, _y) end +LmResp(y::AbstractVector{<:Real}) = LmResp(y, uweights(length(y))) + function updateμ!(r::LmResp{V}, linPr::V) where {V<:FPVector} n = length(linPr) length(r.y) == n || error("length(linPr) is $n, should be $(length(r.y))") @@ -53,25 +58,55 @@ function deviance(r::LmResp) y = r.y mu = r.mu wts = r.wts - v = zero(eltype(y)) + zero(eltype(y)) * zero(eltype(wts)) - if isempty(wts) - @inbounds @simd for i in eachindex(y, mu) + if wts isa UnitWeights + v = zero(eltype(y)) + zero(eltype(y)) + @inbounds @simd for i in eachindex(y, mu, wts) v += abs2(y[i] - mu[i]) end else + v = zero(eltype(y)) + zero(eltype(y)) * zero(eltype(wts)) @inbounds @simd for i in eachindex(y, mu, wts) - v += abs2(y[i] - mu[i])*wts[i] + v += abs2(y[i] - mu[i]) * wts[i] end end - return v + return wts isa ProbabilityWeights ? v ./ (sum(wts) / length(y)) : v end -function loglikelihood(r::LmResp) - n = isempty(r.wts) ? length(r.y) : sum(r.wts) - return -n/2 * (log(2π * deviance(r)/n) + 1) +weights(r::LmResp) = r.wts +function isweighted(r::LmResp) + return weights(r) isa Union{AnalyticWeights,FrequencyWeights,ProbabilityWeights} end -residuals(r::LmResp) = r.y - r.mu +nobs(r::LmResp{<:Any,W}) where {W<:FrequencyWeights} = sum(r.wts) +function nobs(r::LmResp{<:Any,W}) where {W<:AbstractWeights} + return oftype(sum(one(eltype(r.wts))), length(r.y)) +end + +function loglikelihood(r::LmResp{T,<:Union{UnitWeights,FrequencyWeights}}) where {T} + n = nobs(r) + return -n / 2 * (log(2π * deviance(r) / n) + 1) +end + +function loglikelihood(r::LmResp{T,<:AnalyticWeights}) where {T} + N = length(r.y) + n = sum(log, weights(r)) + return (n - N * (log(2π * deviance(r) / N) + 1)) / 2 +end + +function loglikelihood(r::LmResp{T,<:ProbabilityWeights}) where {T} + throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) +end + +function residuals(r::LmResp; weighted::Bool=false) + wts = weights(r) + if weighted && isweighted(r) + sqrt.(wts) .* (r.y .- r.mu) + else + r.y .- r.mu + end +end + +link(rr::LmResp) = IdentityLink() """ LinearModel @@ -94,11 +129,7 @@ end LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp) function StatsBase.fit!(obj::LinearModel) - if isempty(obj.rr.wts) - delbeta!(obj.pp, obj.rr.y) - else - delbeta!(obj.pp, obj.rr.y, obj.rr.wts) - end + delbeta!(obj.pp, obj.rr.y) obj.pp.beta0 .= obj.pp.delbeta updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y)))) return obj @@ -127,31 +158,55 @@ Fit a linear model to data. $FIT_LM_DOC """ + +function convert_weights(wts) + if wts isa Union{FrequencyWeights,AnalyticWeights,ProbabilityWeights,UnitWeights} + wts + elseif wts isa AbstractVector + Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * + "`AnalyticWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "by coercing `wts` to `FrequencyWeights`", + :fit) + fweights(wts) + else + throw(ArgumentError("`wts` should be an `AbstractVector` coercible to `AbstractWeights`")) + end +end + function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}; - wts::AbstractVector{<:Real}=similar(y, 0), - dropcollinear::Bool=true, - method::Symbol=:qr) + wts::Union{AbstractWeights,AbstractVector{<:Real}}=uweights(length(y)), + dropcollinear::Bool=true, method::Symbol=:qr) + # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights + _wts = convert_weights(wts) + if isempty(_wts) + Base.depwarn("Using `wts` of zero length for unweighted regression is deprecated in favor of " * + "explicitly using `UnitWeights(length(y))`." * + " Proceeding by coercing `wts` to UnitWeights of size $(length(y)).", + :fit) + _wts = uweights(length(y)) + end + if method === :cholesky - fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear), nothing)) + fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts), nothing)) elseif method === :qr - fit!(LinearModel(LmResp(y, wts), qrpred(X, dropcollinear), nothing)) + fit!(LinearModel(LmResp(y, _wts), qrpred(X, dropcollinear, _wts), nothing)) else throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) end end function fit(::Type{LinearModel}, f::FormulaTerm, data; - wts::Union{AbstractVector{<:Real},Nothing}=nothing, + wts::Union{AbstractWeights,AbstractVector{<:Real}}=uweights(0), dropcollinear::Bool=true, method::Symbol=:qr, contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}()) f, (y, X) = modelframe(f, data, contrasts, LinearModel) - wts === nothing && (wts = similar(y, 0)) - + _wts = convert_weights(wts) + _wts = isempty(_wts) ? uweights(length(y)) : _wts if method === :cholesky - fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear), f)) + fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts), f)) elseif method === :qr - fit!(LinearModel(LmResp(y, wts), qrpred(X, dropcollinear), f)) + fit!(LinearModel(LmResp(y, _wts), qrpred(X, dropcollinear, _wts), f)) else throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) end @@ -187,45 +242,52 @@ For linear models, the deviance of the null model is equal to the total sum of s """ function nulldeviance(obj::LinearModel) y = obj.rr.y - wts = obj.rr.wts - + wts = obj.pp.wts if hasintercept(obj) - if isempty(wts) - m = mean(y) - else - m = mean(y, weights(wts)) - end + m = mean(y, wts) else m = zero(eltype(y)) end - v = zero(eltype(y))*zero(eltype(wts)) - if isempty(wts) - @inbounds @simd for yi in y - v += abs2(yi - m) + v = zero(eltype(y)) * zero(eltype(wts)) + if wts isa UnitWeights + @inbounds @simd for i in eachindex(y) + v += abs2(y[i] - m) end else @inbounds @simd for i in eachindex(y, wts) - v += abs2(y[i] - m)*wts[i] + v += abs2(y[i] - m) * wts[i] end end return v end +function nullloglikelihood(m::LinearModel) + wts = weights(m) + if wts isa Union{UnitWeights,FrequencyWeights} + n = nobs(m) + -n / 2 * (log(2π * nulldeviance(m) / n) + 1) + else + # N = length(m.rr.y) + # n = sum(log, wts) + # (n - N * (log(2π * nulldeviance(m)/N) + 1))/2 + throw(ArgumentError("The `nullloglikelihood` for probability weighted models is not currently supported.")) + end +end + loglikelihood(obj::LinearModel) = loglikelihood(obj.rr) -function nullloglikelihood(obj::LinearModel) - r = obj.rr - n = isempty(r.wts) ? length(r.y) : sum(r.wts) - return -n/2 * (log(2π * nulldeviance(obj)/n) + 1) +r2(obj::LinearModel) = 1 - deviance(obj) / nulldeviance(obj) +function adjr2(obj::LinearModel) + return 1 - (1 - r²(obj)) * (nobs(obj) - hasintercept(obj)) / dof_residual(obj) end -r2(obj::LinearModel) = 1 - deviance(obj)/nulldeviance(obj) -adjr2(obj::LinearModel) = 1 - (1 - r²(obj))*(nobs(obj)-hasintercept(obj))/dof_residual(obj) +working_residuals(x::LinearModel) = residuals(x) +working_weights(x::LinearModel) = x.pp.wts function dispersion(x::LinearModel, sqr::Bool=false) dofr = dof_residual(x) - ssqr = deviance(x.rr)/dofr + ssqr = deviance(x.rr) / dofr dofr > 0 || return oftype(ssqr, Inf) return sqr ? ssqr : sqrt(ssqr) end @@ -237,14 +299,14 @@ function coeftable(mm::LinearModel; level::Real=0.95) tt = cc ./ se if dofr > 0 p = ccdf.(Ref(FDist(1, dofr)), abs2.(tt)) - ci = se*quantile(TDist(dofr), (1-level)/2) + ci = se * quantile(TDist(dofr), (1 - level) / 2) else p = [isnan(t) ? NaN : 1.0 for t in tt] ci = [isnan(t) ? NaN : -Inf for t in tt] end - levstr = isinteger(level*100) ? string(Integer(level*100)) : string(level*100) + levstr = isinteger(level * 100) ? string(Integer(level * 100)) : string(level * 100) cn = coefnames(mm) - return CoefTable(hcat(cc, se, tt, p, cc+ci, cc-ci), + return CoefTable(hcat(cc, se, tt, p, cc + ci, cc - ci), ["Coef.", "Std. Error", "t", "Pr(>|t|)", "Lower $levstr%", "Upper $levstr%"], cn, 4, 3) @@ -303,21 +365,20 @@ function StatsModels.predict!(res::Union{AbstractVector, prediction, lower, upper = res length(prediction) == length(lower) == length(upper) == size(newx, 1) || throw(DimensionMismatch("length of vectors in `res` must equal the number of rows in `newx`")) - length(mm.rr.wts) == 0 || + isweighted(mm) && error("prediction with confidence intervals not yet implemented for weighted regression") - dev = deviance(mm) dofr = dof_residual(mm) - ret = diag(newx*vcov(mm)*newx') + ret = diag(newx * vcov(mm) * newx') if interval == :prediction - ret .+= dev/dofr + ret .+= dev / dofr elseif interval != :confidence error("only :confidence and :prediction intervals are defined") end - ret .= quantile(TDist(dofr), (1 - level)/2) .* sqrt.(ret) + ret .= quantile(TDist(dofr), (1 - level) / 2) .* sqrt.(ret) prediction .= newx * coef(mm) lower .= prediction .+ ret - upper .= prediction - + ret + upper .= prediction - +ret end return res end @@ -325,7 +386,24 @@ end function confint(obj::LinearModel; level::Real=0.95) return hcat(coef(obj), coef(obj)) + stderror(obj) * - quantile(TDist(dof_residual(obj)), (1.0 - level)/2.0) * [1.0 -1.0] + quantile(TDist(dof_residual(obj)), (1.0 - level) / 2.0) * [1.0 -1.0] +end + +function momentmatrix(m::LinearModel) + X = modelmatrix(m) + r = residuals(m) + mm = X .* r + isweighted(m) && (mm .*= weights(m)) + return mm +end + +invloglikhessian(m::LinearModel) = inverse(m.pp) + +function varstruct(x::LinearModel) + wrkwt = working_weights(x) + wrkres = working_residuals(x) + r = wrkwt .* wrkres + return r, one(eltype(r)) end """ @@ -334,23 +412,12 @@ end Compute [Cook's distance](https://en.wikipedia.org/wiki/Cook%27s_distance) for each observation in linear model `obj`, giving an estimate of the influence of each data point. -Currently only implemented for linear models without weights. """ function StatsBase.cooksdistance(obj::LinearModel) - u = residuals(obj) + u = residuals(obj; weighted=isweighted(obj)) mse = dispersion(obj, true) - k = dof(obj)-1 - d_res = dof_residual(obj) - X = modelmatrix(obj) - XtX = crossmodelmatrix(obj) - k == size(X, 2) || - throw(ArgumentError("Models with collinear terms are not currently supported.")) - wts = obj.rr.wts - if isempty(wts) - hii = diag(X * inv(XtX) * X') - else - throw(ArgumentError("Weighted models are not currently supported.")) - end - D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) + k = dof(obj) - 1 + hii = leverage(obj) + D = @. u^2 * (hii / (1 - hii)^2) / (k * mse) return D end diff --git a/src/negbinfit.jl b/src/negbinfit.jl index 1abee2ec..2a0a26e8 100644 --- a/src/negbinfit.jl +++ b/src/negbinfit.jl @@ -1,23 +1,28 @@ -function mle_for_θ(y::AbstractVector, μ::AbstractVector, wts::AbstractVector; +function mle_for_θ(y::AbstractVector, μ::AbstractVector, wts::AbstractWeights; maxiter=30, tol=1.e-6) function first_derivative(θ::Real) - tmp(yi, μi) = (yi+θ)/(μi+θ) + log(μi+θ) - 1 - log(θ) - digamma(θ+yi) + digamma(θ) - return unit_weights ? sum(tmp(yi, μi) for (yi, μi) in zip(y, μ)) : + function tmp(yi, μi) + return (yi + θ) / (μi + θ) + log(μi + θ) - 1 - log(θ) - digamma(θ + yi) + + digamma(θ) + end + return wts isa UnitWeights ? sum(tmp(yi, μi) for (yi, μi) in zip(y, μ)) : sum(wti * tmp(yi, μi) for (wti, yi, μi) in zip(wts, y, μ)) end function second_derivative(θ::Real) - tmp(yi, μi) = -(yi+θ)/(μi+θ)^2 + 2/(μi+θ) - 1/θ - trigamma(θ+yi) + trigamma(θ) - return unit_weights ? sum(tmp(yi, μi) for (yi, μi) in zip(y, μ)) : + function tmp(yi, μi) + return -(yi + θ) / (μi + θ)^2 + 2 / (μi + θ) - 1 / θ - trigamma(θ + yi) + + trigamma(θ) + end + return wts isa UnitWeights ? sum(tmp(yi, μi) for (yi, μi) in zip(y, μ)) : sum(wti * tmp(yi, μi) for (wti, yi, μi) in zip(wts, y, μ)) end - unit_weights = length(wts) == 0 - if unit_weights + if wts isa UnitWeights n = length(y) - θ = n / sum((yi/μi - 1)^2 for (yi, μi) in zip(y, μ)) + θ = n / sum((yi / μi - 1)^2 for (yi, μi) in zip(y, μ)) else n = sum(wts) - θ = n / sum(wti * (yi/μi - 1)^2 for (wti, yi, μi) in zip(wts, y, μ)) + θ = n / sum(wti * (yi / μi - 1)^2 for (wti, yi, μi) in zip(wts, y, μ)) end δ, converged = one(θ), false @@ -69,7 +74,7 @@ In both cases, `link` may specify the link function function negbin(F, D, args...; - wts::Union{Nothing,AbstractVector}=nothing, + wts::Union{Nothing,AbstractWeights}=nothing, initialθ::Real=Inf, dropcollinear::Bool=true, method::Symbol=:qr, @@ -107,7 +112,7 @@ function negbin(F, wts = regmodel.rr.wts lw, ly = length(wts), length(y) if lw != ly && lw != 0 - throw(ArgumentError("length of wts must be either $ly or 0 but was $lw")) + throw(ArgumentError("length of `wts` must be $ly but was $lw")) end θ = mle_for_θ(y, μ, wts; maxiter=maxiter, tol=rtol) @@ -118,11 +123,11 @@ function negbin(F, converged = false for i in 1:maxiter - if abs(ll0 - ll)/d + abs(δ) <= rtol + if abs(ll0 - ll) / d + abs(δ) <= rtol converged = true break end - @debug "NegativeBinomial dispersion optimization" iteration=i θ=θ + @debug "NegativeBinomial dispersion optimization" iteration = i θ = θ regmodel = glm(F, D, NegativeBinomial(θ), args...; dropcollinear=dropcollinear, method=method, maxiter=maxiter, atol=atol, rtol=rtol, kwargs...) diff --git a/test/analytic_weights.jl b/test/analytic_weights.jl new file mode 100644 index 00000000..2c01636a --- /dev/null +++ b/test/analytic_weights.jl @@ -0,0 +1,843 @@ +rng = StableRNG(123) + +x1 = rand(rng, 25) +x2 = ifelse.(randn(rng, 25) .> 0, 1, 0) +y = ifelse.(0.004 .- 0.01 .* x1 .+ 1.5 .* x2 .+ randn(rng, 25) .> 0, 1, 0) +w = rand(rng, 25) * 6 +w = floor.(w) .+ 1 +df = DataFrame(; y=y, x1=x1, x2=x2, w=w) + +clotting = DataFrame(; u=log.([5, 10, 15, 20, 30, 40, 60, 80, 100]), + lot1=[118, 58, 42, 35, 27, 25, 21, 19, 18], + w=[1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7]) + +quine.aweights = log.(3 .+ 3 .* quine.Days) + +dobson = DataFrame(; Counts=[18.0, 17, 15, 20, 10, 20, 25, 13, 12], + Outcome=categorical(repeat(string.('A':'C'); outer=3)), + Treatment=categorical(repeat(string.('a':'c'); inner=3)), + w=[1, 2, 1, 2, 3, 4, 3, 2, 1]) + +itr = Iterators.product((:qr, :cholesky), (true, false)) + +@testset "Linear model ftest with $dmethod method with dropcollinear=$drop" for (dmethod, + drop) in + itr + + model_0 = lm(@formula(y ~ x1), df; wts=aweights(df.w), method=dmethod, + dropcollinear=drop) + model_1 = lm(@formula(y ~ x1 + x2), df; wts=aweights(df.w), + method=dmethod, dropcollinear=drop) + X = hcat(ones(length(df.y)), df.x1, df.x2) + model_2 = lm(X, y; wts=aweights(df.w), method=dmethod, dropcollinear=drop) + @test ftest(model_1).fstat ≈ 1.551275 rtol = 1e-05 + @test ftest(model_2) === ftest(model_1) + @test ftest(model_0, model_1).fstat[2] ≈ 1.7860438 rtol = 1e-05 + @test ftest(model_0, model_2).fstat[2] ≈ 1.7860438 rtol = 1e-05 +end + +@testset "GLM: Binomial with LogitLink link - AnalyticWeights with $dmethod method" for (dmethod, + drop) in + itr + + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=aweights(df.w), + method=dmethod, dropcollinear=drop, atol=1e-08, rtol=1e-08) + @test deviance(model) ≈ 39.58120350785813 rtol = 1e-06 + @test coef(model) ≈ [0.6333582770515337, 1.8861277804531265, 18.61281712203539] rtol = 1e-06 + @test stderror(model) ≈ [0.9021013750843575, 2.063002891039618, 2337.217357530545] rtol = 1e-07 + @test_throws ArgumentError loglikelihood(model) + @test_throws ArgumentError nullloglikelihood(model) + @test_throws ArgumentError aic(model) + @test_throws ArgumentError bic(model) + @test cooksdistance(model) ≈ [2.672806e-01; 2.516262e-02; 1.367146e-01; 3.382852e-02; + 3.708084e-10; 2.144406e-02; 2.290360e-01; 2.113025e-02; 9.451432e-03; + 2.145286e-03; 3.021442e-10; 2.085282e-10; 3.571678e-12; 2.593863e-11; + 1.727869e-10; 3.657217e-10; 7.122122e-02; 6.682159e-02; 2.762688e+00; + 2.051375e-01; 9.093471e-10; 2.544133e-10; 2.780846e-10; 2.258433e-10; + 3.242331e-02] rtol = 1e-04 + @test GLM.momentmatrix(model) ≈ [1.095702695280752 0.1983501744547734 0.0; + 0.6292210259386299 0.2312324009639611 0.0; + -0.869357286789858 -0.5816508224007081 -0.0; + 0.3287258318630744 0.0140466407925222 0.0; + 2.08484144507e-8 9.1314170785019e-9 2.085e-8; + 0.5274699949608909 0.2549210423027525 0.0; + -0.8883810924640739 -0.678699816121261 -0.0; + 0.169878909537636 0.15705018022658265 0.0; + 0.3346238606654708 0.1723463870596023 0.0; + 0.17263445723951362 0.085461258206114 0.0; + 1.90303118479143e-8 1.3346718776025414e-8 1.90e-8; + 1.60927355387381e-8 1.1161427644959331e-8 1.61e-8; + 2.2803477805569965e-9 1.9982067289774133e-9 2.28e-9; + 6.018177387787048e-9 5.682479294453922e-9 6.02e-9; + 1.4765287153134531e-8 1.0914686191045351e-8 1.48e-8; + 2.0721163707495327e-8 1.1594416669570361e-8 2.07e-8; + 0.8473483434776684 0.4295002258959193 0.0; + 0.5426271690329769 0.35055009398999054 0.0; + -4.541414638754089 -1.2097095921684677 -0.0; + 1.2385948537889822 0.31353946330992544 0.0; + 3.067144219729067e-8 1.078462894246146e-8 3.07e-8; + 1.7613428517425065e-8 9.289130102493344e-9 1.76e-8; + 1.8334365002191494e-8 1.3220774075528698e-8 1.83e-8; + 1.6687848932140258e-8 3.1458514759844027e-9 1.67e-8; + 0.4123258762224241 0.2630623634882926 0.0] rtol = 1e-07 +end + +@testset "GLM: Binomial with ProbitLink link - AnalyticWeights with $dmethod method" for (dmethod, + drop) in + itr + + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), ProbitLink(), + wts=aweights(df.w), method=dmethod, dropcollinear=drop, rtol=1e-09) + @test deviance(model) ≈ 39.595360462143866 rtol = 1e-06 + @test coef(model) ≈ [0.42120722997197313, 1.0416447141541567, 4.916910225354065] rtol = 1e-07 + @test stderror(model) ≈ [0.5216506352923727, 1.1455457218079563, 325.2782732702344] rtol = 1e-07 + @test_throws ArgumentError loglikelihood(model) + @test_throws ArgumentError nullloglikelihood(model) + @test_throws ArgumentError aic(model) + @test_throws ArgumentError bic(model) + @test GLM.momentmatrix(model) ≈ [1.8176341588673794 0.32903820904987535 0.0; + 1.0975399310212473 0.40333489019264895 0.0; + -1.6205390909958874 -1.0842353418245372 -0.0; + 0.5269343763678408 0.02251620404797942 0.0; + 2.1411260807372573e-7 9.377938695085409e-8 2.14e-7; + 0.9490575664910063 0.45867015444762754 0.0; + -1.7015508605205314 -1.2999401562600912 -0.0; + 0.33388087355781 0.3086672236664311 0.0; + 0.6070565699014323 0.31266152495891864 0.0; + 0.3115689943654785 0.15423965008066182 0.0; + 6.62696579317951e-8 4.647756142241091e-8 6.63e-8; + 5.7919749595281985e-8 4.0171361342870654e-8 5.79e-8; + 3.7134640590626956e-9 3.2540075395089014e-9 3.71e-9; + 7.229998935730756e-9 6.826704599068171e-9 7.23e-9; + 4.373923181354305e-8 3.233259092972413e-8 4.37e-8; + 1.3039185369174096e-7 7.296006649823636e-8 1.30e-7; + 1.5339832954887218 0.7775387501543354 0.0; + 1.016200760890786 0.6564899300523522 0.0; + -7.736929921295398 -2.060908127581581 -0.0; + 2.0943898311662204 0.5301764831468441 0.0; + 4.4180333118496e-7 1.553459717259102e-7 4.42e-7; + 1.2636718773015955e-7 6.664467660855266e-8 1.26e-7; + 5.869289625690482e-8 4.232301043195289e-8 5.86e-8; + 4.453972209837739e-7 8.396249934481249e-8 4.45e-7; + 0.7707735136764122 0.49175061259680825 0.0] rtol = 1e-07 +end + +@testset "GLM: Binomial with CauchitLink link - AnalyticWeights - method $dmethod" for (dmethod, + drop) in + itr + + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CauchitLink(), + wts=aweights(df.w), + method=dmethod, dropcollinear=drop, rtol=1e-08, atol=1e-08) + @test deviance(model) ≈ 39.627559015619845 rtol = 1e-07 + @test_throws ArgumentError loglikelihood(model) + @test_throws ArgumentError nullloglikelihood(model) + @test_throws ArgumentError aic(model) + @test_throws ArgumentError bic(model) + @test GLM.momentmatrix(model) ≈ [1.003054020887253 0.1815783979426737 0.0; + 0.4264622162277366 0.15672057689370572 0.0; + -0.41221991029044563 -0.27579920646405165 -0.0; + 0.39009720364187195 0.016669074233287458 0.0; + 0.0 0.0 0.0; + 0.311923278855423 0.15074944190941555 0.0; + -0.37849361968132017 -0.28915918208960645 -0.0; + 0.08167834727345773 0.07551025135974303 0.0; + 0.1919243490466221 0.0988496997230555 0.0; + 0.10090666946769812 0.049953010957329104 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.4897032828746528 0.2482186602895976 0.0; + 0.28232074585439737 0.18238593576314036 0.0; + -3.7015013060867705 -0.985979478109429 -0.0; + 0.9986020018483154 0.25278737010890295 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 4.7109001281144596e-17 0.0; + 0.21554272008110664 0.1375154474822352 0.0] rtol = 1e-07 +end + +@testset "GLM: Binomial with CloglogLink link - AnalyticWeights with $dmethod method" for (dmethod, + drop) in + itr + + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CloglogLink(), + wts=aweights(df.w), + method=dmethod, dropcollinear=drop, rtol=5e-10, atol=1e-10) + @test deviance(model) ≈ 39.61484762863061 rtol = 1e-07 + @test coef(model) ≈ [0.12095167614339054, 0.8666201161364425, 2.71457411130009] rtol = 1e-07 + @test stderror(model) ≈ [0.46442064138194333, 0.9661962332997427, 462.67067410332123] rtol = 1e-07 + @test_throws ArgumentError loglikelihood(model) + @test_throws ArgumentError nullloglikelihood(model) + @test_throws ArgumentError aic(model) + @test_throws ArgumentError bic(model) + @test GLM.momentmatrix(model) ≈ [1.9242952153533148 0.3483465846271526 0.0; + 1.2514530854268051 0.45989642702311906 0.0; + -2.0153062620933504 -1.348357645985017 -0.0; + 0.5261952160086218 0.02248461930760421 0.0; + 1.5917320997016487e-7 6.971642718424943e-8 1.5e-7; + 1.1286597324859617 0.5454701085543264 0.0; + -2.188084897309478 -1.6716393787069568 -0.0; + 0.44263603677181956 0.4092095336554625 0.0; + 0.7298024393361812 0.3758811862272388 0.0; + 0.37203439025955143 0.1841725435114846 0.0; + 1.79124103038821e-9 1.2562689715087133e-9 0.0; + 1.7546756596715808e-9 1.216989204144432e-9 0.0; + 5.7157964501561416e-12 5.00859694540867e-12 0.0; + 3.1204806380028555e-12 2.9464180717205265e-12 0.0; + 6.677005273849182e-10 4.93572637661486e-10 6.7e-10; + 2.403535247525871e-8 1.3448853323683519e-8 2.4e-8; + 1.839078160139044 0.9321839020515984 0.0; + 1.2724386238625014 0.8220257013418844 0.0; + -8.529708800751662 -2.2720829026754306 -0.0; + 2.2835873542705203 0.5780701827470168 0.0; + 7.796454326518419e-7 2.7413731130574843e-7 7.79e-7; + 3.441301868580375e-8 1.8149050735676883e-8 3.44e-8; + 1.181434863206281e-9 8.519238822580661e-10 1.18e-9; + 3.115862778487023e-6 5.873759740112369e-7 3.11e-6; + 0.9629196988432743 0.6143391585021523 0.0] rtol = 1e-05 +end + +@testset "GLM: Gamma with InverseLink link - AnalyticWeights with $dmethod method" for (dmethod, + drop) in + itr + + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-07, + rtol=1e-08) + @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 + @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 + @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 + @test stderror(model) ≈ [0.0009144223353860925, 0.0003450913537314497] rtol = 1e-04 + @test aic(model) ≈ 92.71815757538103 rtol = 1e-07 + @test bic(model) ≈ 93.30983130738969 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [1900.1063511093867 3058.103199132267; + -1643.317155973023 -3783.877586404854; + -420.13783432322964 -1137.7543467296691; + -981.2887166533023 -2939.6782781526754; + 313.30087123532877 1065.5981029180723; + -186.60227446859759 -688.353296378139; + 324.34628373045786 1327.9854430687467; + 430.8197010892654 1887.863404915401; + 262.77277766267576 1210.113361381432] rtol = 1e-07 +end + +@testset "GLM: Gamma with IdentityLink link - AnalyticWeights with $dmethod method" for (dmethod, + drop) in + itr + + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, + rtol=1e-10, atol=1e-10, minstepfac=0.00001) + @test deviance(model) ≈ 1.3435348802929383 rtol = 1e-07 + @test loglikelihood(model) ≈ -101.19916126647321 rtol = 1e-07 + @test coef(model) ≈ [86.45700434128152, -15.320695650698417] rtol = 1e-05 + @test stderror(model) ≈ [16.07962739541372, 3.766841480457265] rtol = 1e-05 + @test GLM.aic(model) ≈ 208.39832253294642 rtol = 1e-07 + @test GLM.bic(model) ≈ 208.9899962649551 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [0.26061914480947884 0.4194503323625281; + 0.06148544891860896 0.14157547811603585; + -0.019061929106842457 -0.051620660951180786; + -0.1795782998461795 -0.5379685084791557; + -0.1764962075232437 -0.6002984389013568; + -0.2277661940139623 -0.8402020334398342; + -0.3204523427685144 -1.3120423070655995; + -0.054878647210950426 -0.2404796937532563; + 0.6561290267416002 3.0215858321118008] rtol = 1e-04 +end + +@testset "GLM: Gamma with LogLink link - AnalyticWeights with $dmethod method" for (dmethod, + drop) in + itr + + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-09, + rtol=1e-09) + @test deviance(model) ≈ 0.41206342934199663 rtol = 1e-07 + @test loglikelihood(model) ≈ -81.79777246247532 rtol = 1e-07 + @test coef(model) ≈ [5.325107090308856, -0.5495682740033511] rtol = 1e-07 + @test stderror(model) ≈ [0.20287310816341905, 0.053062600599660774] rtol = 1e-07 + @test GLM.aic(model) ≈ 169.59554492495064 rtol = 1e-07 + @test GLM.bic(model) ≈ 170.18721865695932 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [14.39716447431257 23.171342336508012; + 0.0374983950207553 0.0863432453859933; + -2.5490869750808054 -6.903055495494598; + -12.821435846444906 -38.40958915849704; + -8.713283462827741 -29.635596899449876; + -6.520303896525519 -24.05261507847203; + -4.123729229896082 -16.88396834850135; + 3.70269025008355 16.225287295813413; + 16.590486289982852 76.40201283367323] rtol = 1e-07 +end + +@testset "GLM: Gamma with InverseLink link - AnalyticWeights with $dmethod method" for (dmethod, + drop) in + itr + + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-09, + rtol=1e-09) + @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 + @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 + @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 + @test stderror(model) ≈ [0.0009144223353860925, 0.0003450913537314497] rtol = 1e-07 + @test GLM.aic(model) ≈ 92.71815757538103 rtol = 1e-07 + @test GLM.bic(model) ≈ 93.30983130738969 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [1900.1063511093867 3058.103199132267; + -1643.317155973023 -3783.877586404854; + -420.13783432322964 -1137.7543467296691; + -981.2887166533023 -2939.6782781526754; + 313.30087123532877 1065.5981029180723; + -186.60227446859759 -688.353296378139; + 324.34628373045786 1327.9854430687467; + 430.8197010892654 1887.863404915401; + 262.77277766267576 1210.113361381432] rtol = 1e-07 +end + +@testset "GLM: InverseGaussian with InverseSquareLink link - AnalyticWeights with $dmethod method" for (dmethod, + drop) in + itr + + model = glm(@formula(lot1 ~ 1 + u), clotting, InverseGaussian(), InverseSquareLink(), + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-09, + rtol=1e-09) + @test deviance(model) ≈ 0.021377370485120707 rtol = 1e-07 + @test loglikelihood(model) ≈ -86.82546665077861 rtol = 1e-07 + @test coef(model) ≈ [-0.0012633718975150973, 0.0008126490405747128] rtol = 1e-07 + @test stderror(model) ≈ [0.00016779409928094252, 9.025235597677238e-5] rtol = 1e-07 + @test GLM.aic(model) ≈ 179.65093330155722 rtol = 1e-07 + @test GLM.bic(model) ≈ 180.2426070335659 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [28815.030725087538 46376.00289690935; + -21039.070620903 -48444.250382140235; + -6195.618377983015 -16778.045594449453; + -15686.073415243622 -46991.276375382586; + -1716.0787284468345 -5836.722477919495; + -2086.203482054124 -7695.75316205041; + 3418.087237993986 13994.826896081435; + 6065.271775021221 26578.18246467872; + 8424.676595366931 38797.069483575455] rtol = 1e-06 +end + +@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights - method: dmethod" for (dmethod, + drop) in + itr + + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), + LogLink(), wts=aweights(quine.aweights), method=dmethod, + dropcollinear=drop, atol=1e-08, rtol=1e-08) + @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 + @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 + @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, + -0.47848540911607984, 0.09677889908013552, 0.3562972562034356, + 0.3480161821981514] rtol = 1e-07 + @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, + 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, + 0.16126722793064027] rtol = 1e-07 + ## Tests below are broken because dof(model)==8 instead of 7 + @test_broken aic(model) ≈ 4023.1878928645556 rtol = 1e-07 + @test_broken bic(model) ≈ 4044.073139216514 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ + [-3.866780529709063 -0.0 -3.866780529709063 -0.0 -0.0 -0.0 -3.866780529709063 + -4.370085797122667 -0.0 -4.370085797122667 -0.0 -0.0 -0.0 -4.370085797122667 + -3.956562495375882 -0.0 -3.956562495375882 -0.0 -0.0 -0.0 -3.956562495375882 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -2.8243330916399567 -0.0 -2.8243330916399567 -0.0 -0.0 -0.0 -0.0 + -0.7247974261272416 -0.0 -0.7247974261272416 -0.0 -0.0 -0.0 -0.0 + -0.0382123316932152 -0.0 -0.0382123316932152 -0.0 -0.0 -0.0 -0.0 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -1.593192001014045 -0.0 -1.593192001014045 -1.593192001014045 -0.0 -0.0 -1.593192001014045 + -2.7127578570401822 -0.0 -2.7127578570401822 -2.7127578570401822 -0.0 -0.0 -0.0 + 0.14484002662039835 0.0 0.14484002662039835 0.14484002662039835 0.0 0.0 0.0 + -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 + -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 + 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 + 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 + -2.991376095035898 -0.0 -2.991376095035898 -0.0 -2.991376095035898 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.226530220466526 -0.0 -2.226530220466526 -0.0 -2.226530220466526 -0.0 -0.0 + 5.713017320814697 0.0 5.713017320814697 0.0 5.713017320814697 0.0 0.0 + 6.908456992944485 0.0 6.908456992944485 0.0 6.908456992944485 0.0 0.0 + 8.12839634400043 0.0 8.12839634400043 0.0 8.12839634400043 0.0 0.0 + -4.628254089687799 -0.0 -4.628254089687799 -0.0 -0.0 -4.628254089687799 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -0.9503472567532946 -0.0 -0.9503472567532946 -0.0 -0.0 -0.9503472567532946 -0.0 + 0.6731546773300909 0.0 0.6731546773300909 0.0 0.0 0.6731546773300909 0.0 + 1.2423198758199778 0.0 1.2423198758199778 0.0 0.0 1.2423198758199778 0.0 + 1.8236065476231822 0.0 1.8236065476231822 0.0 0.0 1.8236065476231822 0.0 + -4.171836641319677 -0.0 -0.0 -0.0 -0.0 -0.0 -4.171836641319677 + -3.9882995353410657 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + -3.0399730926465205 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + 1.309672612863431 0.0 0.0 0.0 0.0 0.0 0.0 + 10.661189363296968 0.0 0.0 0.0 0.0 0.0 0.0 + -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 + -1.885334027349047 -0.0 -0.0 -1.885334027349047 -0.0 -0.0 -1.885334027349047 + 2.106807550276347 0.0 0.0 2.106807550276347 0.0 0.0 2.106807550276347 + 3.0150038937286183 0.0 0.0 3.0150038937286183 0.0 0.0 3.0150038937286183 + 6.387064937752826 0.0 0.0 6.387064937752826 0.0 0.0 6.387064937752826 + 17.72394862307137 0.0 0.0 17.72394862307137 0.0 0.0 17.72394862307137 + 18.296957173355864 0.0 0.0 18.296957173355864 0.0 0.0 18.296957173355864 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -0.8508688349707806 -0.0 -0.0 -0.8508688349707806 -0.0 -0.0 -0.0 + 2.2977798382338515 0.0 0.0 2.2977798382338515 0.0 0.0 0.0 + 3.4686807301080997 0.0 0.0 3.4686807301080997 0.0 0.0 0.0 + -4.658715933989554 -0.0 -0.0 -0.0 -4.658715933989554 -0.0 -4.658715933989554 + -4.187227633826471 -0.0 -0.0 -0.0 -4.187227633826471 -0.0 -4.187227633826471 + -4.04126740785402 -0.0 -0.0 -0.0 -4.04126740785402 -0.0 -4.04126740785402 + -2.940568463040927 -0.0 -0.0 -0.0 -2.940568463040927 -0.0 -2.940568463040927 + 4.342318636532548 0.0 0.0 0.0 4.342318636532548 0.0 4.342318636532548 + 4.653011109293142 0.0 0.0 0.0 4.653011109293142 0.0 4.653011109293142 + 8.523536317826032 0.0 0.0 0.0 8.523536317826032 0.0 8.523536317826032 + 15.787943104351504 0.0 0.0 0.0 15.787943104351504 0.0 15.787943104351504 + -3.6818016272511183 -0.0 -0.0 -0.0 -3.6818016272511183 -0.0 -0.0 + -2.057196136670586 -0.0 -0.0 -0.0 -0.0 -2.057196136670586 -0.0 + -3.834339745304657 -0.0 -0.0 -0.0 -0.0 -3.834339745304657 -0.0 + -4.1780090350069425 -0.0 -0.0 -0.0 -0.0 -4.1780090350069425 -0.0 + -4.491340364181187 -0.0 -0.0 -0.0 -0.0 -4.491340364181187 -0.0 + -4.3190736545666875 -0.0 -0.0 -0.0 -0.0 -4.3190736545666875 -0.0 + -3.731819061288569 -0.0 -0.0 -0.0 -0.0 -3.731819061288569 -0.0 + -2.238272513055515 -0.0 -0.0 -0.0 -0.0 -2.238272513055515 -0.0 + 1.9859737921268132 0.0 0.0 0.0 0.0 1.9859737921268132 0.0 + 3.2559592797891495 0.0 0.0 0.0 0.0 3.2559592797891495 0.0 + -3.8426774654770597 -3.8426774654770597 -3.8426774654770597 -0.0 -0.0 -0.0 -3.8426774654770597 + -0.9876822943882244 -0.9876822943882244 -0.9876822943882244 -0.0 -0.0 -0.0 -0.9876822943882244 + 23.20842027925341 23.20842027925341 23.20842027925341 0.0 0.0 0.0 23.20842027925341 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -3.2888901738202923 -3.2888901738202923 -3.2888901738202923 -0.0 -0.0 -0.0 -0.0 + -2.758113321833414 -2.758113321833414 -2.758113321833414 -0.0 -0.0 -0.0 -0.0 + -1.306843142455193 -1.306843142455193 -1.306843142455193 -0.0 -0.0 -0.0 -0.0 + -0.8751747276035264 -0.8751747276035264 -0.8751747276035264 -0.0 -0.0 -0.0 -0.0 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.0 -0.0 -0.6051770620747217 + 2.697606708942873 2.697606708942873 2.697606708942873 2.697606708942873 0.0 0.0 2.697606708942873 + -2.628558928820721 -2.628558928820721 -2.628558928820721 -2.628558928820721 -0.0 -0.0 -0.0 + -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -0.0 -0.0 -0.0 + 0.11268811798135936 0.11268811798135936 0.11268811798135936 0.0 0.11268811798135936 0.0 0.11268811798135936 + 3.1826005245112854 3.1826005245112854 3.1826005245112854 0.0 3.1826005245112854 0.0 3.1826005245112854 + 5.692953263520725 5.692953263520725 5.692953263520725 0.0 5.692953263520725 0.0 5.692953263520725 + -2.7839804243079254 -2.7839804243079254 -2.7839804243079254 -0.0 -2.7839804243079254 -0.0 -0.0 + -1.9433894208611948 -1.9433894208611948 -1.9433894208611948 -0.0 -1.9433894208611948 -0.0 -0.0 + -2.962526696741388 -2.962526696741388 -2.962526696741388 -0.0 -2.962526696741388 -0.0 -0.0 + -3.4432739212266052 -3.4432739212266052 -3.4432739212266052 -0.0 -3.4432739212266052 -0.0 -0.0 + -3.0516553688541084 -3.0516553688541084 -3.0516553688541084 -0.0 -3.0516553688541084 -0.0 -0.0 + 0.3128048727055356 0.3128048727055356 0.3128048727055356 0.0 0.3128048727055356 0.0 0.0 + 5.983398649554576 5.983398649554576 5.983398649554576 0.0 5.983398649554576 0.0 0.0 + -1.9961184031161041 -1.9961184031161041 -1.9961184031161041 -0.0 -0.0 -1.9961184031161041 -0.0 + 4.212201806010905 4.212201806010905 4.212201806010905 0.0 0.0 4.212201806010905 0.0 + -3.152192412974143 -3.152192412974143 -3.152192412974143 -0.0 -0.0 -3.152192412974143 -0.0 + -2.03792823060008 -2.03792823060008 -2.03792823060008 -0.0 -0.0 -2.03792823060008 -0.0 + 2.9007973162738843 2.9007973162738843 2.9007973162738843 0.0 0.0 2.9007973162738843 0.0 + 9.364366020386104 9.364366020386104 9.364366020386104 0.0 0.0 9.364366020386104 0.0 + 24.059031354439128 24.059031354439128 24.059031354439128 0.0 0.0 24.059031354439128 0.0 + 2.864621620127876 2.864621620127876 0.0 0.0 0.0 0.0 2.864621620127876 + -1.374372490365048 -1.374372490365048 -0.0 -0.0 -0.0 -0.0 -0.0 + -0.9287032240778311 -0.9287032240778311 -0.0 -0.0 -0.0 -0.0 -0.0 + 3.919550403175515 3.919550403175515 0.0 0.0 0.0 0.0 0.0 + 12.426707944681816 12.426707944681816 0.0 0.0 0.0 0.0 0.0 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -1.8681224147832116 -1.8681224147832116 -0.0 -1.8681224147832116 -0.0 -0.0 -1.8681224147832116 + -2.778411659017331 -2.778411659017331 -0.0 -2.778411659017331 -0.0 -0.0 -2.778411659017331 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -0.18952790670731487 -0.18952790670731487 -0.0 -0.18952790670731487 -0.0 -0.0 -0.18952790670731487 + 2.1145280030507307 2.1145280030507307 0.0 2.1145280030507307 0.0 0.0 2.1145280030507307 + -1.7407825357737137 -1.7407825357737137 -0.0 -1.7407825357737137 -0.0 -0.0 -0.0 + 4.548120970699322 4.548120970699322 0.0 4.548120970699322 0.0 0.0 0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -0.6449075179371568 -0.6449075179371568 -0.0 -0.6449075179371568 -0.0 -0.0 -0.0 + 17.819813171012125 17.819813171012125 0.0 17.819813171012125 0.0 0.0 0.0 + -1.999110422648601 -1.999110422648601 -0.0 -0.0 -1.999110422648601 -0.0 -1.999110422648601 + -3.9564518053768536 -3.9564518053768536 -0.0 -0.0 -3.9564518053768536 -0.0 -3.9564518053768536 + -2.1216196203872557 -2.1216196203872557 -0.0 -0.0 -2.1216196203872557 -0.0 -2.1216196203872557 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.8495441274063715 -3.8495441274063715 -0.0 -0.0 -3.8495441274063715 -0.0 -3.8495441274063715 + -3.6199500530041027 -3.6199500530041027 -0.0 -0.0 -3.6199500530041027 -0.0 -3.6199500530041027 + -3.209822061567088 -3.209822061567088 -0.0 -0.0 -3.209822061567088 -0.0 -3.209822061567088 + -2.702521155801149 -2.702521155801149 -0.0 -0.0 -2.702521155801149 -0.0 -2.702521155801149 + -2.921923505820458 -2.921923505820458 -0.0 -0.0 -2.921923505820458 -0.0 -0.0 + -3.058405902935942 -3.058405902935942 -0.0 -0.0 -0.0 -3.058405902935942 -0.0 + -3.1473667781351766 -3.1473667781351766 -0.0 -0.0 -0.0 -3.1473667781351766 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.8041614268127484 -3.8041614268127484 -0.0 -0.0 -0.0 -3.8041614268127484 -0.0 + -1.3131162740760067 -1.3131162740760067 -0.0 -0.0 -0.0 -1.3131162740760067 -0.0 + -0.18645252170591944 -0.18645252170591944 -0.0 -0.0 -0.0 -0.18645252170591944 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + 8.572921389223637 8.572921389223637 0.0 0.0 0.0 8.572921389223637 0.0] rtol = 1e-04 +end + +@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights with $dmethod method" for (dmethod, + drop) in + itr + + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, + NegativeBinomial(2), LogLink(), wts=aweights(quine.aweights), + method=dmethod, dropcollinear=drop, rtol=1e-08, atol=1e-08) + @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 + @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 + @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, + -0.47848540911607984, 0.09677889908013552, 0.3562972562034356, + 0.3480161821981514] rtol = 1e-07 + @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, + 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, + 0.16126722793064027] rtol = 1e-07 + @test_broken GLM.aic(model) ≈ 4023.1878928645556 rtol = 1e-07 + @test_broken GLM.bic(model) ≈ 4044.073139216514 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ + [-3.866780529709063 -0.0 -3.866780529709063 -0.0 -0.0 -0.0 -3.866780529709063 + -4.370085797122667 -0.0 -4.370085797122667 -0.0 -0.0 -0.0 -4.370085797122667 + -3.956562495375882 -0.0 -3.956562495375882 -0.0 -0.0 -0.0 -3.956562495375882 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -2.8243330916399567 -0.0 -2.8243330916399567 -0.0 -0.0 -0.0 -0.0 + -0.7247974261272416 -0.0 -0.7247974261272416 -0.0 -0.0 -0.0 -0.0 + -0.0382123316932152 -0.0 -0.0382123316932152 -0.0 -0.0 -0.0 -0.0 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -1.593192001014045 -0.0 -1.593192001014045 -1.593192001014045 -0.0 -0.0 -1.593192001014045 + -2.7127578570401822 -0.0 -2.7127578570401822 -2.7127578570401822 -0.0 -0.0 -0.0 + 0.14484002662039835 0.0 0.14484002662039835 0.14484002662039835 0.0 0.0 0.0 + -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 + -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 + 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 + 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 + -2.991376095035898 -0.0 -2.991376095035898 -0.0 -2.991376095035898 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.226530220466526 -0.0 -2.226530220466526 -0.0 -2.226530220466526 -0.0 -0.0 + 5.713017320814697 0.0 5.713017320814697 0.0 5.713017320814697 0.0 0.0 + 6.908456992944485 0.0 6.908456992944485 0.0 6.908456992944485 0.0 0.0 + 8.12839634400043 0.0 8.12839634400043 0.0 8.12839634400043 0.0 0.0 + -4.628254089687799 -0.0 -4.628254089687799 -0.0 -0.0 -4.628254089687799 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -0.9503472567532946 -0.0 -0.9503472567532946 -0.0 -0.0 -0.9503472567532946 -0.0 + 0.6731546773300909 0.0 0.6731546773300909 0.0 0.0 0.6731546773300909 0.0 + 1.2423198758199778 0.0 1.2423198758199778 0.0 0.0 1.2423198758199778 0.0 + 1.8236065476231822 0.0 1.8236065476231822 0.0 0.0 1.8236065476231822 0.0 + -4.171836641319677 -0.0 -0.0 -0.0 -0.0 -0.0 -4.171836641319677 + -3.9882995353410657 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + -3.0399730926465205 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + 1.309672612863431 0.0 0.0 0.0 0.0 0.0 0.0 + 10.661189363296968 0.0 0.0 0.0 0.0 0.0 0.0 + -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 + -1.885334027349047 -0.0 -0.0 -1.885334027349047 -0.0 -0.0 -1.885334027349047 + 2.106807550276347 0.0 0.0 2.106807550276347 0.0 0.0 2.106807550276347 + 3.0150038937286183 0.0 0.0 3.0150038937286183 0.0 0.0 3.0150038937286183 + 6.387064937752826 0.0 0.0 6.387064937752826 0.0 0.0 6.387064937752826 + 17.72394862307137 0.0 0.0 17.72394862307137 0.0 0.0 17.72394862307137 + 18.296957173355864 0.0 0.0 18.296957173355864 0.0 0.0 18.296957173355864 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -0.8508688349707806 -0.0 -0.0 -0.8508688349707806 -0.0 -0.0 -0.0 + 2.2977798382338515 0.0 0.0 2.2977798382338515 0.0 0.0 0.0 + 3.4686807301080997 0.0 0.0 3.4686807301080997 0.0 0.0 0.0 + -4.658715933989554 -0.0 -0.0 -0.0 -4.658715933989554 -0.0 -4.658715933989554 + -4.187227633826471 -0.0 -0.0 -0.0 -4.187227633826471 -0.0 -4.187227633826471 + -4.04126740785402 -0.0 -0.0 -0.0 -4.04126740785402 -0.0 -4.04126740785402 + -2.940568463040927 -0.0 -0.0 -0.0 -2.940568463040927 -0.0 -2.940568463040927 + 4.342318636532548 0.0 0.0 0.0 4.342318636532548 0.0 4.342318636532548 + 4.653011109293142 0.0 0.0 0.0 4.653011109293142 0.0 4.653011109293142 + 8.523536317826032 0.0 0.0 0.0 8.523536317826032 0.0 8.523536317826032 + 15.787943104351504 0.0 0.0 0.0 15.787943104351504 0.0 15.787943104351504 + -3.6818016272511183 -0.0 -0.0 -0.0 -3.6818016272511183 -0.0 -0.0 + -2.057196136670586 -0.0 -0.0 -0.0 -0.0 -2.057196136670586 -0.0 + -3.834339745304657 -0.0 -0.0 -0.0 -0.0 -3.834339745304657 -0.0 + -4.1780090350069425 -0.0 -0.0 -0.0 -0.0 -4.1780090350069425 -0.0 + -4.491340364181187 -0.0 -0.0 -0.0 -0.0 -4.491340364181187 -0.0 + -4.3190736545666875 -0.0 -0.0 -0.0 -0.0 -4.3190736545666875 -0.0 + -3.731819061288569 -0.0 -0.0 -0.0 -0.0 -3.731819061288569 -0.0 + -2.238272513055515 -0.0 -0.0 -0.0 -0.0 -2.238272513055515 -0.0 + 1.9859737921268132 0.0 0.0 0.0 0.0 1.9859737921268132 0.0 + 3.2559592797891495 0.0 0.0 0.0 0.0 3.2559592797891495 0.0 + -3.8426774654770597 -3.8426774654770597 -3.8426774654770597 -0.0 -0.0 -0.0 -3.8426774654770597 + -0.9876822943882244 -0.9876822943882244 -0.9876822943882244 -0.0 -0.0 -0.0 -0.9876822943882244 + 23.20842027925341 23.20842027925341 23.20842027925341 0.0 0.0 0.0 23.20842027925341 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -3.2888901738202923 -3.2888901738202923 -3.2888901738202923 -0.0 -0.0 -0.0 -0.0 + -2.758113321833414 -2.758113321833414 -2.758113321833414 -0.0 -0.0 -0.0 -0.0 + -1.306843142455193 -1.306843142455193 -1.306843142455193 -0.0 -0.0 -0.0 -0.0 + -0.8751747276035264 -0.8751747276035264 -0.8751747276035264 -0.0 -0.0 -0.0 -0.0 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.0 -0.0 -0.6051770620747217 + 2.697606708942873 2.697606708942873 2.697606708942873 2.697606708942873 0.0 0.0 2.697606708942873 + -2.628558928820721 -2.628558928820721 -2.628558928820721 -2.628558928820721 -0.0 -0.0 -0.0 + -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -0.0 -0.0 -0.0 + 0.11268811798135936 0.11268811798135936 0.11268811798135936 0.0 0.11268811798135936 0.0 0.11268811798135936 + 3.1826005245112854 3.1826005245112854 3.1826005245112854 0.0 3.1826005245112854 0.0 3.1826005245112854 + 5.692953263520725 5.692953263520725 5.692953263520725 0.0 5.692953263520725 0.0 5.692953263520725 + -2.7839804243079254 -2.7839804243079254 -2.7839804243079254 -0.0 -2.7839804243079254 -0.0 -0.0 + -1.9433894208611948 -1.9433894208611948 -1.9433894208611948 -0.0 -1.9433894208611948 -0.0 -0.0 + -2.962526696741388 -2.962526696741388 -2.962526696741388 -0.0 -2.962526696741388 -0.0 -0.0 + -3.4432739212266052 -3.4432739212266052 -3.4432739212266052 -0.0 -3.4432739212266052 -0.0 -0.0 + -3.0516553688541084 -3.0516553688541084 -3.0516553688541084 -0.0 -3.0516553688541084 -0.0 -0.0 + 0.3128048727055356 0.3128048727055356 0.3128048727055356 0.0 0.3128048727055356 0.0 0.0 + 5.983398649554576 5.983398649554576 5.983398649554576 0.0 5.983398649554576 0.0 0.0 + -1.9961184031161041 -1.9961184031161041 -1.9961184031161041 -0.0 -0.0 -1.9961184031161041 -0.0 + 4.212201806010905 4.212201806010905 4.212201806010905 0.0 0.0 4.212201806010905 0.0 + -3.152192412974143 -3.152192412974143 -3.152192412974143 -0.0 -0.0 -3.152192412974143 -0.0 + -2.03792823060008 -2.03792823060008 -2.03792823060008 -0.0 -0.0 -2.03792823060008 -0.0 + 2.9007973162738843 2.9007973162738843 2.9007973162738843 0.0 0.0 2.9007973162738843 0.0 + 9.364366020386104 9.364366020386104 9.364366020386104 0.0 0.0 9.364366020386104 0.0 + 24.059031354439128 24.059031354439128 24.059031354439128 0.0 0.0 24.059031354439128 0.0 + 2.864621620127876 2.864621620127876 0.0 0.0 0.0 0.0 2.864621620127876 + -1.374372490365048 -1.374372490365048 -0.0 -0.0 -0.0 -0.0 -0.0 + -0.9287032240778311 -0.9287032240778311 -0.0 -0.0 -0.0 -0.0 -0.0 + 3.919550403175515 3.919550403175515 0.0 0.0 0.0 0.0 0.0 + 12.426707944681816 12.426707944681816 0.0 0.0 0.0 0.0 0.0 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -1.8681224147832116 -1.8681224147832116 -0.0 -1.8681224147832116 -0.0 -0.0 -1.8681224147832116 + -2.778411659017331 -2.778411659017331 -0.0 -2.778411659017331 -0.0 -0.0 -2.778411659017331 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -0.18952790670731487 -0.18952790670731487 -0.0 -0.18952790670731487 -0.0 -0.0 -0.18952790670731487 + 2.1145280030507307 2.1145280030507307 0.0 2.1145280030507307 0.0 0.0 2.1145280030507307 + -1.7407825357737137 -1.7407825357737137 -0.0 -1.7407825357737137 -0.0 -0.0 -0.0 + 4.548120970699322 4.548120970699322 0.0 4.548120970699322 0.0 0.0 0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -0.6449075179371568 -0.6449075179371568 -0.0 -0.6449075179371568 -0.0 -0.0 -0.0 + 17.819813171012125 17.819813171012125 0.0 17.819813171012125 0.0 0.0 0.0 + -1.999110422648601 -1.999110422648601 -0.0 -0.0 -1.999110422648601 -0.0 -1.999110422648601 + -3.9564518053768536 -3.9564518053768536 -0.0 -0.0 -3.9564518053768536 -0.0 -3.9564518053768536 + -2.1216196203872557 -2.1216196203872557 -0.0 -0.0 -2.1216196203872557 -0.0 -2.1216196203872557 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.8495441274063715 -3.8495441274063715 -0.0 -0.0 -3.8495441274063715 -0.0 -3.8495441274063715 + -3.6199500530041027 -3.6199500530041027 -0.0 -0.0 -3.6199500530041027 -0.0 -3.6199500530041027 + -3.209822061567088 -3.209822061567088 -0.0 -0.0 -3.209822061567088 -0.0 -3.209822061567088 + -2.702521155801149 -2.702521155801149 -0.0 -0.0 -2.702521155801149 -0.0 -2.702521155801149 + -2.921923505820458 -2.921923505820458 -0.0 -0.0 -2.921923505820458 -0.0 -0.0 + -3.058405902935942 -3.058405902935942 -0.0 -0.0 -0.0 -3.058405902935942 -0.0 + -3.1473667781351766 -3.1473667781351766 -0.0 -0.0 -0.0 -3.1473667781351766 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.8041614268127484 -3.8041614268127484 -0.0 -0.0 -0.0 -3.8041614268127484 -0.0 + -1.3131162740760067 -1.3131162740760067 -0.0 -0.0 -0.0 -1.3131162740760067 -0.0 + -0.18645252170591944 -0.18645252170591944 -0.0 -0.0 -0.0 -0.18645252170591944 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + 8.572921389223637 8.572921389223637 0.0 0.0 0.0 8.572921389223637 0.0] rtol = 1e-04 +end + +@testset "GLM: NegativeBinomial with SqrtLink link - AnalyticWeights with $dmethod method" for (dmethod, + drop) in + itr + + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), + SqrtLink(), wts=aweights(quine.aweights), + method=dmethod, dropcollinear=drop, rtol=1e-08, atol=1e-09) + @test deviance(model) ≈ 626.6464732988984 rtol = 1e-07 + @test loglikelihood(model) ≈ -2005.5355831034462 rtol = 1e-07 + @test coef(model) ≈ [4.733877229152363, -1.007977895471349, 0.02522392818548873, + -0.9859743168046422, 0.2132095063819721, 0.7456070470961186, + 0.5840284357554036] rtol = 1e-07 + @test stderror(model) ≈ [0.42307979153860564, 0.286636744566765, 0.29612422536777805, + 0.42042723748229144, 0.45565954626859695, 0.4766324296069839, + 0.3235019638755972] rtol = 1e-06 + @test_broken GLM.aic(model) ≈ 4025.0711662068925 rtol = 1e-07 + @test_broken GLM.bic(model) ≈ 4045.956412558851 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ + [-1.4294351675636041 -0.0 -1.4294351675636041 -0.0 -0.0 -0.0 -1.4294351675636041 + -1.5410055711037194 -0.0 -1.5410055711037194 -0.0 -0.0 -0.0 -1.5410055711037194 + -1.3571249039047424 -0.0 -1.3571249039047424 -0.0 -0.0 -0.0 -1.3571249039047424 + -1.7394058711709879 -0.0 -1.7394058711709879 -0.0 -0.0 -0.0 -0.0 + -1.7394058711709879 -0.0 -1.7394058711709879 -0.0 -0.0 -0.0 -0.0 + -1.229734152157926 -0.0 -1.229734152157926 -0.0 -0.0 -0.0 -0.0 + -0.3742348640443611 -0.0 -0.3742348640443611 -0.0 -0.0 -0.0 -0.0 + -0.09370480172054219 -0.0 -0.09370480172054219 -0.0 -0.0 -0.0 -0.0 + -1.7293809063089827 -0.0 -1.7293809063089827 -1.7293809063089827 -0.0 -0.0 -1.7293809063089827 + -1.7293809063089827 -0.0 -1.7293809063089827 -1.7293809063089827 -0.0 -0.0 -1.7293809063089827 + -0.6748210571645206 -0.0 -0.6748210571645206 -0.6748210571645206 -0.0 -0.0 -0.6748210571645206 + -1.5016227445218024 -0.0 -1.5016227445218024 -1.5016227445218024 -0.0 -0.0 -0.0 + -0.058778966482651636 -0.0 -0.058778966482651636 -0.058778966482651636 -0.0 -0.0 -0.0 + -1.6582836355486288 -0.0 -1.6582836355486288 -0.0 -1.6582836355486288 -0.0 -1.6582836355486288 + 0.11341508381030255 0.0 0.11341508381030255 0.0 0.11341508381030255 0.0 0.11341508381030255 + 2.4651888863431344 0.0 2.4651888863431344 0.0 2.4651888863431344 0.0 2.4651888863431344 + 2.9517152556309942 0.0 2.9517152556309942 0.0 2.9517152556309942 0.0 2.9517152556309942 + -1.2288386266845785 -0.0 -1.2288386266845785 -0.0 -1.2288386266845785 -0.0 -0.0 + -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -0.0 + -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -0.0 + -0.9274622643228648 -0.0 -0.9274622643228648 -0.0 -0.9274622643228648 -0.0 -0.0 + 2.212861910664014 0.0 2.212861910664014 0.0 2.212861910664014 0.0 0.0 + 2.6862849076558937 0.0 2.6862849076558937 0.0 2.6862849076558937 0.0 0.0 + 3.1694781034873523 0.0 3.1694781034873523 0.0 3.1694781034873523 0.0 0.0 + -1.6534192741665588 -0.0 -1.6534192741665588 -0.0 -0.0 -1.6534192741665588 -0.0 + -0.702446330017668 -0.0 -0.702446330017668 -0.0 -0.0 -0.702446330017668 -0.0 + -0.702446330017668 -0.0 -0.702446330017668 -0.0 -0.0 -0.702446330017668 -0.0 + -0.23123674216762394 -0.0 -0.23123674216762394 -0.0 -0.0 -0.23123674216762394 -0.0 + 0.3871584524726257 0.0 0.3871584524726257 0.0 0.0 0.3871584524726257 0.0 + 0.6036586921589513 0.0 0.6036586921589513 0.0 0.0 0.6036586921589513 0.0 + 0.8246522973739006 0.0 0.8246522973739006 0.0 0.0 0.8246522973739006 0.0 + -1.560441651521342 -0.0 -0.0 -0.0 -0.0 -0.0 -1.560441651521342 + -1.7419685003857353 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + -1.4153955925789807 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + 0.23770439864218734 0.0 0.0 0.0 0.0 0.0 0.0 + 3.853247675936175 0.0 0.0 0.0 0.0 0.0 0.0 + -1.7692672149493731 -0.0 -0.0 -1.7692672149493731 -0.0 -0.0 -1.7692672149493731 + -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 + -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 + -1.4769837741682987 -0.0 -0.0 -1.4769837741682987 -0.0 -0.0 -1.4769837741682987 + -0.9582774689727417 -0.0 -0.0 -0.9582774689727417 -0.0 -0.0 -0.9582774689727417 + 0.8052632685284861 0.0 0.0 0.8052632685284861 0.0 0.0 0.8052632685284861 + 1.2077994352773953 0.0 0.0 1.2077994352773953 0.0 0.0 1.2077994352773953 + 2.7042310768987665 0.0 0.0 2.7042310768987665 0.0 0.0 2.7042310768987665 + 7.744950633464035 0.0 0.0 7.744950633464035 0.0 0.0 7.744950633464035 + 7.999933021719232 0.0 0.0 7.999933021719232 0.0 0.0 7.999933021719232 + -1.7392669278461907 -0.0 -0.0 -1.7392669278461907 -0.0 -0.0 -0.0 + -1.7392669278461907 -0.0 -0.0 -1.7392669278461907 -0.0 -0.0 -0.0 + -0.7262212443523531 -0.0 -0.0 -0.7262212443523531 -0.0 -0.0 -0.0 + 0.7835691668207807 0.0 0.0 0.7835691668207807 0.0 0.0 0.0 + 1.3489349925379315 0.0 0.0 1.3489349925379315 0.0 0.0 0.0 + -1.6522100272913283 -0.0 -0.0 -0.0 -1.6522100272913283 -0.0 -1.6522100272913283 + -1.4590418232851277 -0.0 -0.0 -0.0 -1.4590418232851277 -0.0 -1.4590418232851277 + -1.4015111702500997 -0.0 -0.0 -0.0 -1.4015111702500997 -0.0 -1.4015111702500997 + -0.9738202253475602 -0.0 -0.0 -0.0 -0.9738202253475602 -0.0 -0.9738202253475602 + 1.8091899079230156 0.0 0.0 0.0 1.8091899079230156 0.0 1.8091899079230156 + 1.9274245415701026 0.0 0.0 0.0 1.9274245415701026 0.0 1.9274245415701026 + 3.399094699981504 0.0 0.0 0.0 3.399094699981504 0.0 3.399094699981504 + 6.157344170373497 0.0 0.0 0.0 6.157344170373497 0.0 6.157344170373497 + -1.5082203700488148 -0.0 -0.0 -0.0 -1.5082203700488148 -0.0 -0.0 + -0.7518968254083281 -0.0 -0.0 -0.0 -0.0 -0.7518968254083281 -0.0 + -1.403623374340758 -0.0 -0.0 -0.0 -0.0 -1.403623374340758 -0.0 + -1.5307566638052945 -0.0 -0.0 -0.0 -0.0 -1.5307566638052945 -0.0 + -1.6487615285777935 -0.0 -0.0 -0.0 -0.0 -1.6487615285777935 -0.0 + -1.5960112869101046 -0.0 -0.0 -0.0 -0.0 -1.5960112869101046 -0.0 + -1.3904968459197917 -0.0 -0.0 -0.0 -0.0 -1.3904968459197917 -0.0 + -0.8618818687491527 -0.0 -0.0 -0.0 -0.0 -0.8618818687491527 -0.0 + 0.6414580291618693 0.0 0.0 0.0 0.0 0.6414580291618693 0.0 + 1.0942097094869556 0.0 0.0 0.0 0.0 1.0942097094869556 0.0 + -1.7282583231217719 -1.7282583231217719 -1.7282583231217719 -0.0 -0.0 -0.0 -1.7282583231217719 + -0.31744768418403196 -0.31744768418403196 -0.31744768418403196 -0.0 -0.0 -0.0 -0.31744768418403196 + 11.375280355235768 11.375280355235768 11.375280355235768 0.0 0.0 0.0 11.375280355235768 + -1.0256901959548927 -1.0256901959548927 -1.0256901959548927 -0.0 -0.0 -0.0 -0.0 + -1.0256901959548927 -1.0256901959548927 -1.0256901959548927 -0.0 -0.0 -0.0 -0.0 + -1.7598032161223125 -1.7598032161223125 -1.7598032161223125 -0.0 -0.0 -0.0 -0.0 + -1.491030768800334 -1.491030768800334 -1.491030768800334 -0.0 -0.0 -0.0 -0.0 + -0.7301769140610584 -0.7301769140610584 -0.7301769140610584 -0.0 -0.0 -0.0 -0.0 + -0.5034045168716083 -0.5034045168716083 -0.5034045168716083 -0.0 -0.0 -0.0 -0.0 + -1.113506915630734 -1.113506915630734 -1.113506915630734 -1.113506915630734 -0.0 -0.0 -1.113506915630734 + -1.113506915630734 -1.113506915630734 -1.113506915630734 -1.113506915630734 -0.0 -0.0 -1.113506915630734 + -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545 + -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545 + -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545 + -0.07026189294137537 -0.07026189294137537 -0.07026189294137537 -0.07026189294137537 -0.0 -0.0 -0.07026189294137537 + 2.0844355685058127 2.0844355685058127 2.0844355685058127 2.0844355685058127 0.0 0.0 2.0844355685058127 + -1.7313903927438412 -1.7313903927438412 -1.7313903927438412 -1.7313903927438412 -0.0 -0.0 -0.0 + -1.480745232754872 -1.480745232754872 -1.480745232754872 -1.480745232754872 -0.0 -0.0 -0.0 + 0.21539031393949493 0.21539031393949493 0.21539031393949493 0.0 0.21539031393949493 0.0 0.21539031393949493 + 1.6360787089859707 1.6360787089859707 1.6360787089859707 0.0 1.6360787089859707 0.0 1.6360787089859707 + 2.7952193074887086 2.7952193074887086 2.7952193074887086 0.0 2.7952193074887086 0.0 2.7952193074887086 + -1.448364418208364 -1.448364418208364 -1.448364418208364 -0.0 -1.448364418208364 -0.0 -0.0 + -0.9833503482488964 -0.9833503482488964 -0.9833503482488964 -0.0 -0.9833503482488964 -0.0 -0.0 + -1.5017276161539084 -1.5017276161539084 -1.5017276161539084 -0.0 -1.5017276161539084 -0.0 -0.0 + -1.7640356839137032 -1.7640356839137032 -1.7640356839137032 -0.0 -1.7640356839137032 -0.0 -0.0 + -1.5776069676233444 -1.5776069676233444 -1.5776069676233444 -0.0 -1.5776069676233444 -0.0 -0.0 + 0.06361165131312438 0.06361165131312438 0.06361165131312438 0.0 0.06361165131312438 0.0 0.0 + 2.8475608847598153 2.8475608847598153 2.8475608847598153 0.0 2.8475608847598153 0.0 0.0 + -0.8892460264142052 -0.8892460264142052 -0.8892460264142052 -0.0 -0.0 -0.8892460264142052 -0.0 + 1.7743695974457907 1.7743695974457907 1.7743695974457907 0.0 0.0 1.7743695974457907 0.0 + -1.4305200814192562 -1.4305200814192562 -1.4305200814192562 -0.0 -0.0 -1.4305200814192562 -0.0 + -0.9478929479399423 -0.9478929479399423 -0.9478929479399423 -0.0 -0.0 -0.9478929479399423 -0.0 + 1.2024302930353608 1.2024302930353608 1.2024302930353608 0.0 0.0 1.2024302930353608 0.0 + 4.02280289664674 4.02280289664674 4.02280289664674 0.0 0.0 4.02280289664674 0.0 + 10.440933185941839 10.440933185941839 10.440933185941839 0.0 0.0 10.440933185941839 0.0 + 1.262517093518885 1.262517093518885 0.0 0.0 0.0 0.0 1.262517093518885 + -0.9176184029771589 -0.9176184029771589 -0.0 -0.0 -0.0 -0.0 -0.0 + -0.6982138187318754 -0.6982138187318754 -0.0 -0.0 -0.0 -0.0 -0.0 + 1.7133696015602422 1.7133696015602422 0.0 0.0 0.0 0.0 0.0 + 5.976953806399672 5.976953806399672 0.0 0.0 0.0 0.0 0.0 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.1866621929181271 -1.1866621929181271 -0.0 -1.1866621929181271 -0.0 -0.0 -1.1866621929181271 + -1.1194589330024307 -1.1194589330024307 -0.0 -1.1194589330024307 -0.0 -0.0 -1.1194589330024307 + -1.6605118926433484 -1.6605118926433484 -0.0 -1.6605118926433484 -0.0 -0.0 -1.6605118926433484 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.1866621929181271 -1.1866621929181271 -0.0 -1.1866621929181271 -0.0 -0.0 -1.1866621929181271 + -0.016084003453250676 -0.016084003453250676 -0.0 -0.016084003453250676 -0.0 -0.0 -0.016084003453250676 + 1.4107278812149031 1.4107278812149031 0.0 1.4107278812149031 0.0 0.0 1.4107278812149031 + -1.1128985115655265 -1.1128985115655265 -0.0 -1.1128985115655265 -0.0 -0.0 -0.0 + 3.7957001151581404 3.7957001151581404 0.0 3.7957001151581404 0.0 0.0 0.0 + -0.7046958095802869 -0.7046958095802869 -0.0 -0.7046958095802869 -0.0 -0.0 -0.0 + -0.7046958095802869 -0.7046958095802869 -0.0 -0.7046958095802869 -0.0 -0.0 -0.0 + -0.2475403067755282 -0.2475403067755282 -0.0 -0.2475403067755282 -0.0 -0.0 -0.0 + 14.054845699928913 14.054845699928913 0.0 14.054845699928913 0.0 0.0 0.0 + -0.8850373634971601 -0.8850373634971601 -0.0 -0.0 -0.8850373634971601 -0.0 -0.8850373634971601 + -1.7594068536637126 -1.7594068536637126 -0.0 -0.0 -1.7594068536637126 -0.0 -1.7594068536637126 + -0.9681259531090506 -0.9681259531090506 -0.0 -0.0 -0.9681259531090506 -0.0 -0.9681259531090506 + -1.5970364987524888 -1.5970364987524888 -0.0 -0.0 -1.5970364987524888 -0.0 -1.5970364987524888 + -1.5970364987524888 -1.5970364987524888 -0.0 -0.0 -1.5970364987524888 -0.0 -1.5970364987524888 + -1.7082890535667876 -1.7082890535667876 -0.0 -0.0 -1.7082890535667876 -0.0 -1.7082890535667876 + -1.6168827210404924 -1.6168827210404924 -0.0 -0.0 -1.6168827210404924 -0.0 -1.6168827210404924 + -1.4399676449006795 -1.4399676449006795 -0.0 -0.0 -1.4399676449006795 -0.0 -1.4399676449006795 + -1.2202487676722908 -1.2202487676722908 -0.0 -0.0 -1.2202487676722908 -0.0 -1.2202487676722908 + -1.5079358693315765 -1.5079358693315765 -0.0 -0.0 -1.5079358693315765 -0.0 -0.0 + -1.3842064467607202 -1.3842064467607202 -0.0 -0.0 -0.0 -1.3842064467607202 -0.0 + -1.5208922216041325 -1.5208922216041325 -0.0 -0.0 -0.0 -1.5208922216041325 -0.0 + 0.3453894161447818 0.3453894161447818 0.0 0.0 0.0 0.3453894161447818 0.0 + -1.717557999730545 -1.717557999730545 -0.0 -0.0 -0.0 -1.717557999730545 -0.0 + -1.717557999730545 -1.717557999730545 -0.0 -0.0 -0.0 -1.717557999730545 -0.0 + -1.76269912849327 -1.76269912849327 -0.0 -0.0 -0.0 -1.76269912849327 -0.0 + -0.7863622513628796 -0.7863622513628796 -0.0 -0.0 -0.0 -0.7863622513628796 -0.0 + -0.32795262618891574 -0.32795262618891574 -0.0 -0.0 -0.0 -0.32795262618891574 -0.0 + 0.3453894161447818 0.3453894161447818 0.0 0.0 0.0 0.3453894161447818 0.0 + 3.2758115948278728 3.2758115948278728 0.0 0.0 0.0 3.2758115948278728 0.0] rtol = 1e-04 +end + +admit_agr = DataFrame(; count=[28.0, 97, 93, 55, 33, 54, 28, 12], + admit=repeat([false, true]; inner=[4]), + rank=categorical(repeat(1:4; outer=2))) + +@testset "Aggregated Binomial LogitLink (AnalyticWeights)" begin + for distr in (Binomial, Bernoulli) + gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), + wts=aweights(admit_agr.count)) + @test dof(gm14) == 4 + @test nobs(gm14) == 8 + @test isapprox(deviance(gm14), 474.9667184280627) + @test isapprox(coef(gm14), + [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], + atol=1e-5) + @test_throws ArgumentError loglikelihood(gm14) + @test_throws ArgumentError aic(gm14) + @test_throws ArgumentError aicc(gm14) + @test_throws ArgumentError bic(gm14) + end +end diff --git a/test/probability_weights.jl b/test/probability_weights.jl new file mode 100644 index 00000000..dbcfd563 --- /dev/null +++ b/test/probability_weights.jl @@ -0,0 +1,332 @@ +rng = StableRNG(123) +x1 = rand(rng, 50) +x2 = ifelse.(randn(rng, 50) .> 0, 1, 0) +y = ifelse.(0.004 .- 0.01 .* x1 .+ 1.5 .* x2 .+ randn(rng, 50) .> 0, 1, 0) +df = DataFrame(; y=y, x1=x1, x2=x2, pweights=floor.(rand(rng, 50) * 6) .+ 1) + +clotting = DataFrame(; u=log.([5, 10, 15, 20, 30, 40, 60, 80, 100]), + lot1=[118, 58, 42, 35, 27, 25, 21, 19, 18], + pweights=[1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7]) + +quine = RDatasets.dataset("MASS", "quine") +quine.pweights = log.(3 .+ 3 .* quine.Days) +dobson = DataFrame(; Counts=[18.0, 17, 15, 20, 10, 20, 25, 13, 12], + Outcome=categorical(repeat(string.('A':'C'); outer=3)), + Treatment=categorical(repeat(string.('a':'c'); inner=3)), + pweights=[1, 2, 1, 2, 3, 4, 3, 2, 1]) + +itr = Iterators.product((:qr, :cholesky), (true, false)) + +@testset "Linear Model ftest/loglikelihod with $dmethod method with dropcollinear=$drop" for (dmethod, + drop) in + itr + + model_1 = lm(@formula(y ~ x1 + x2), df; wts=pweights(df.pweights), method=dmethod) + X = hcat(ones(length(df.y)), df.x1, df.x2) + model_2 = lm(X, y; wts=pweights(df.pweights)) + @test_throws ArgumentError ftest(model_1) + @test_throws ArgumentError ftest(model_2) + @test_throws ArgumentError loglikelihood(model_1) + @test_throws ArgumentError loglikelihood(model_2) +end + +@testset "GLM: Binomial with LogitLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, + drop) in + itr + + model = glm(@formula(y ~ 1 + x1 + x2), + df, + Binomial(), + LogitLink(); + wts=pweights(df.pweights), + method=dmethod, + dropcollinear=drop, + rtol=1e-09, + atol=1e-09) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 47.311214978934785 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.5241460813701, 0.14468927249342, 2.487500063309] rtol = 1e-06 + @test dof_residual(model) == 47.0 + @test stderror(model) ≈ [1.07077535201799, 1.4966446912323, 0.7679252464101] rtol = 1e-05 +end + +@testset "GLM: Binomial with ProbitLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, + drop) in + itr + + model = glm(@formula(y ~ 1 + x1 + x2), + df, + Binomial(), + ProbitLink(); + wts=pweights(df.pweights), + method=dmethod, + dropcollinear=drop, + rtol=1e-09, + atol=1e-09) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 47.280413566179 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.379823362118, 0.17460125170132, 1.4927538978259] rtol = 1e-05 + @test dof_residual(model) == 47.0 + @test stderror(model) ≈ [0.6250657160317, 0.851366312489, 0.4423686640689] rtol = 1e-05 +end + +@testset "GLM: Binomial with CauchitLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, + drop) in + itr + + model = glm(@formula(y ~ 1 + x1 + x2), + df, + Binomial(), + CauchitLink(); + wts=pweights(df.pweights), + method=dmethod, + dropcollinear=drop, + rtol=1e-09, + atol=1e-09) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 47.17915872474391 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.007674579802284, -0.5378132620063, 2.994759904353] rtol = 1e-04 + @test dof_residual(model) == 47.0 + @test stderror(model) ≈ [1.020489214335, 1.5748610330014, 1.5057621596148] rtol = 1e-03 +end + +@testset "GLM: Binomial with CloglogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, + drop) in + itr + + model = glm(@formula(y ~ 1 + x1 + x2), + df, + Binomial(), + CloglogLink(); + wts=pweights(df.pweights), + method=dmethod, + dropcollinear=drop, + rtol=1e-09, + atol=1e-09) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 47.063354817529856 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.9897210433718, 0.449902058467, 1.5467108410611] rtol = 1e-04 + @test dof_residual(model) == 47.0 + @test stderror(model) ≈ [0.647026270959, 0.74668663622095, 0.49056337945919] rtol = 1e-04 +end + +@testset "GLM: Gamma with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, + drop) in + itr + + model = glm(@formula(lot1 ~ 1 + u), + clotting, + Gamma(), + LogLink(); + wts=pweights(clotting.pweights), + method=dmethod, + dropcollinear=drop, + rtol=1e-9, + atol=1e-9, + minstepfac=1e-05) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 0.113412 rtol = 1e-06 + @test nulldeviance(model) ≈ 2.55 rtol = 1e-04 + @test coef(model) ≈ [5.32511, -0.549568] rtol = 1e-6 + @test dof_residual(model) == 7.0 + @test stderror(model) ≈ [0.265172, 0.0670627] rtol = 1e-05 +end + +@testset "GLM: NegativeBinomial(2) with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, + drop) in + itr + + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), + quine, + NegativeBinomial(2), + LogLink(); + wts=pweights(quine.pweights), + method=dmethod, + dropcollinear=drop, + atol=1e-09, + rtol=1e-09, + minstepfac=1e-04) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 178.46174895746665 rtol = 1e-07 + @test nulldeviance(model) ≈ 214.52243528092782 rtol = 1e-07 + @test coef(model) ≈ [3.0241, + -0.464139, + 0.07186, + -0.4785, + 0.0967725, + 0.356318, + 0.348026] rtol = 1e-04 + @test dof_residual(model) == 139.0 + @test stderror(model) ≈ [0.20080, + 0.14069, + 0.14407, + 0.25335, + 0.24012, + 0.23211, + 0.19039] rtol = 1e-04 +end + +@testset "GLM: NegativeBinomial(1) with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, + drop) in + itr + + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), + quine, + NegativeBinomial(1), + LogLink(); + wts=pweights(quine.pweights), + method=dmethod, + dropcollinear=drop, + atol=1e-09, + rtol=1e-09, + minstepfac=1e-04) + ## Geometric Link is NegativeBinomial(1) + model_geom = glm(@formula(Days ~ Eth + Sex + Age + Lrn), + quine, + Geometric(), + LogLink(); + wts=pweights(quine.pweights), + method=dmethod, + dropcollinear=drop, + atol=1e-09, + rtol=1e-09, + minstepfac=1e-04) + @test_throws ArgumentError loglikelihood(model) + @test_throws ArgumentError loglikelihood(model_geom) + @test deviance(model) ≈ 98.45804 rtol = 1e-05 + @test nulldeviance(model) ≈ 117.407 rtol = 1e-05 + @test deviance(model_geom) ≈ 98.45804 rtol = 1e-05 + @test nulldeviance(model_geom) ≈ 117.407 rtol = 1e-05 + @test coef(model) ≈ [3.0312469487958, + -0.4659209078765, + 0.0676685535488, + -0.4817223025756, + 0.0931703051304, + 0.3543515249482, + 0.3437194303582] rtol = 1e-04 + @test coef(model_geom) ≈ coef(model) rtol = 1e-07 + @test dof_residual(model) == 139.0 + @test dof_residual(model) == dof_residual(model_geom) + @test stderror(model) ≈ [0.20007959342369136, + 0.14082290024757069, + 0.14407240634114291, + 0.25339272642416644, + 0.2402419933610615, + 0.23248097541210141, + 0.19122111799256292] rtol = 1e-04 + @test stderror(model) ≈ stderror(model_geom) rtol = 1e-06 +end + +@testset "GLM: NegaiveBinomial(2) with SqrtLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, + drop) in + itr + + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), + quine, + NegativeBinomial(2), + SqrtLink(); + wts=pweights(quine.pweights), + method=dmethod, + dropcollinear=drop, + rtol=1e-08, + atol=1e-08, + minstepfac=1e-04) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 178.99970038364276 rtol = 1e-07 + @test nulldeviance(model) ≈ 214.52243528092782 rtol = 1e-07 + @test coef(model) ≈ [4.733877229152367, + -1.0079778954713488, + 0.025223928185488836, + -0.985974316804644, + 0.2132095063819702, + 0.7456070470961171, + 0.5840284357554048] rtol = 1e-07 + + @test dof_residual(model) == 139.0 + @test stderror(model) ≈ [0.4156607040373307, + 0.30174203746555045, + 0.30609799754882105, + 0.526030598769091, + 0.5384102946567921, + 0.5328456049279787, + 0.4065359817407846] rtol = 1e-04 +end + +@testset "GLM: Poisson with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, + drop) in + itr + + model = glm(@formula(Counts ~ 1 + Outcome + Treatment), + dobson, + Poisson(), + LogLink(); + wts=pweights(dobson.pweights), + method=dmethod, + dropcollinear=drop) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 4.837327189925912 rtol = 1e-07 + @test nulldeviance(model) ≈ 12.722836814903907 rtol = 1e-07 + @test coef(model) ≈ [3.1097109912423444, + -0.5376892683400354, + -0.19731134600684794, + -0.05011966661241072, + 0.010415729161988225] rtol = 1e-07 + @test dof_residual(model) == 4.0 + @test stderror(model) ≈ [0.15474638805584298, + 0.13467582259453692, + 0.1482320418486368, + 0.17141304156534284, + 0.17488650070332398] rtol = 1e-06 +end + +@testset "InverseGaussian ProbabilityWeights with $dmethod" for dmethod in (:cholesky, :qr) + gm8a = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, InverseGaussian(); + wts=pweights(9 * clotting.pweights / sum(clotting.pweights)), method=dmethod, + rtol=1e-08, atol=1e-08, minstepfac=1e-04,) + @test dof(gm8a) == 3 + @test deviance(gm8a) ≈ 0.0058836 rtol = 1e-04 + @test nulldeviance(gm8a) ≈ 0.07531257 rtol = 1e-04 + @test coef(gm8a) ≈ [-0.001263439, 0.0008126671] rtol = 1e-04 + @test stderror(gm8a) ≈ [0.0001246, 7.655616888887675e-5] atol = 1e-06 +end + +@testset "Test sparse LM with ProbabilityWeights" for dmethod in (:cholesky, :qr) + # Test sparse with probability weights + rng = StableRNG(123) + X = sprand(rng, 20, 10, 0.2) + β = rand(rng, 10) + y = X * β .+ randn(20) + wts = rand(20) + model_sparse = lm(X, y; wts=pweights(wts), method=dmethod) + model_dense = lm(Matrix(X), y; wts=pweights(wts), method=dmethod) + @test deviance(model_sparse) ≈ deviance(model_dense) rtol = 1e-07 + @test nulldeviance(model_sparse) ≈ nulldeviance(model_dense) rtol = 1e-07 + @test coef(model_sparse) ≈ coef(model_dense) rtol = 1e-07 + @test stderror(model_sparse) ≈ stderror(model_dense) rtol = 1e-07 + @test dof_residual(model_sparse) ≈ dof_residual(model_dense) rtol = 1e-07 + @test dof(model_sparse) ≈ dof(model_dense) rtol = 1e-07 +end + +@testset "Test sparse Rank-deficient LM ProbabilityWeights" begin + # Test sparse with probability weights + rng = StableRNG(123) + X = sprand(rng, 20, 10, 0.2) + β = rand(rng, 10) + y = X * β .+ randn(20) + X = hcat(X[:, 1:7], X[:, 1:2], X[:, 8:9], X[:, 6], X[:, 10]) # make it rank deficient + wts = rand(20) + model_sparse = lm(X, y; wts=pweights(wts), method=:qr) + model_dense = lm(Matrix(X), y; wts=pweights(wts), method=:qr) + @test deviance(model_sparse) ≈ deviance(model_dense) rtol = 1e-07 + @test nulldeviance(model_sparse) ≈ nulldeviance(model_dense) rtol = 1e-07 + se_sparse = stderror(model_sparse) + se_dense = stderror(model_dense) + isnan_sparse = isnan.(se_sparse) + isnan_dense = isnan.(se_dense) + @test sort(coef(model_sparse)[.!isnan_sparse]) ≈ sort(coef(model_dense)[.!isnan_dense]) + @test sort(se_sparse[.!isnan_sparse]) ≈ sort(se_dense[.!isnan_dense]) rtol = 1e-07 +end diff --git a/test/runtests.jl b/test/runtests.jl index 1da5113d..168847f8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -23,6 +23,36 @@ end linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y +@testset "Test GLmResp/LmResp constructor" begin + y = ifelse.(randn(10) .< 0, 1.0, 0.0) + d = Binomial() + η = similar(y) + μ = similar(y) + wts = similar(y) + aw = aweights(wts) + uw = GLM.uweights(10) + o = ones(10) + oe = Float64[] + l = GLM.LogitLink() + + ## wts must be AbstractWeights + @test_throws ArgumentError GLM.GlmResp(y, d, l, η, μ, oe, wts) + ## Length of weights must match length of y + @test_throws DimensionMismatch GLM.GlmResp(y, d, l, η, μ, oe, uw[1:9]) + @test_throws DimensionMismatch GLM.LmResp{typeof(y),typeof(aw)}(μ, oe, aw[1:2], y) + @test_throws DimensionMismatch GLM.LmResp{typeof(y),typeof(aw)}(μ, oe, aw, y[1:2]) + ## Length of y must match length of η + @test_throws DimensionMismatch GLM.GlmResp(y[1:9], d, l, η, μ, oe, uw) + ## Length of y must match length of μ + @test_throws DimensionMismatch GLM.GlmResp(y, d, l, η, μ[1:9], oe, uw) + @test_throws DimensionMismatch GLM.LmResp{typeof(y),typeof(aw)}(μ[1:2], oe, aw, y) + ## Length of offset must match length of y + @test_throws DimensionMismatch GLM.GlmResp(y, d, l, η, μ, o[1:9], uw) + @test_throws DimensionMismatch GLM.LmResp{typeof(y),typeof(aw)}(μ, o[1:9], aw, y) + ## y must be in the support of d + @test_throws ArgumentError GLM.GlmResp([1.0, 2.0], d, l, η, μ, o, uw) +end + @testset "LM with $dmethod" for dmethod in (:cholesky, :qr) Σ = [6.136653061224592e-05 -9.464489795918525e-05 -9.464489795918525e-05 1.831836734693908e-04] @@ -30,9 +60,9 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form; method=dmethod) test_show(lm1) @test isapprox(coef(lm1), linreg(form.Carb, form.OptDen)) - + @test residuals(lm1.rr) ≈ residuals(lm1) @test isapprox(vcov(lm1), Σ) - @test isapprox(cor(lm1), Diagonal(diag(Σ))^(-1/2)*Σ*Diagonal(diag(Σ))^(-1/2)) + @test isapprox(cor(lm1), Diagonal(diag(Σ))^(-1 / 2) * Σ * Diagonal(diag(Σ))^(-1 / 2)) @test dof(lm1) == 3 @test isapprox(deviance(lm1), 0.0002992000000000012) @test isapprox(loglikelihood(lm1), 21.204842144047973) @@ -45,6 +75,7 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y @test isapprox(aic(lm1), -36.409684288095946) @test isapprox(aicc(lm1), -24.409684288095946) @test isapprox(bic(lm1), -37.03440588041178) + @test GLM.working_residuals(lm1) ≈ residuals(lm1) lm2 = fit(LinearModel, hcat(ones(6), 10form.Carb), form.OptDen; method=dmethod) if dmethod == :cholesky @test isa(lm2.pp.chol, CholeskyPivoted) @@ -61,11 +92,11 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y @testset "low level constructors" begin X = [ones(10) randn(10)] - y = X*ones(2) + randn(10)*0.1 + y = X * ones(2) + randn(10) * 0.1 r = GLM.LmResp(y) pch = GLM.DensePredChol(X, false) pqr = GLM.DensePredQR(X) - β̂ = X\y + β̂ = X \ y @test coef(fit!(LinearModel(r, pch, nothing))) ≈ β̂ @test coef(fit!(LinearModel(r, pqr, nothing))) ≈ β̂ # Test last one once more to ensure that no state in pqr affects the result @@ -74,21 +105,22 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y end @testset "Cook's Distance in Linear Model with $dmethod" for dmethod in (:cholesky, :qr) - st_df = DataFrame(Y=[6.4, 7.4, 10.4, 15.1, 12.3, 11.4], + st_df = DataFrame(; Y=[6.4, 7.4, 10.4, 15.1, 12.3, 11.4], XA=[1.5, 6.5, 11.5, 19.9, 17.0, 15.5], XB=[1.8, 7.8, 11.8, 20.5, 17.3, 15.8], XC=[3.0, 13.0, 23.0, 39.8, 34.0, 31.0], # values from SAS proc reg - CooksD_base=[1.4068501943, 0.176809102, 0.0026655177, 1.0704009915, - 0.0875726457, 0.1331183932], - CooksD_noint=[0.0076891801, 0.0302993877, 0.0410262965, 0.0294348488, - 0.0691589296, 0.0273045538], - CooksD_multi=[1.7122291956, 18.983407026, 0.000118078, 0.8470797843, - 0.0715921999, 0.1105843157]) + CooksD_base=[1.4068501943, 0.176809102, 0.0026655177, + 1.0704009915, 0.0875726457, 0.1331183932], + CooksD_noint=[0.0076891801, 0.0302993877, 0.0410262965, + 0.0294348488, 0.0691589296, 0.0273045538], + CooksD_multi=[1.7122291956, 18.983407026, 0.000118078, + 0.8470797843, 0.0715921999, 0.1105843157]) # linear regression t_lm_base = lm(@formula(Y ~ XA), st_df; method=dmethod) @test isapprox(st_df.CooksD_base, cooksdistance(t_lm_base)) + @test GLM.momentmatrix(t_lm_base) == modelmatrix(t_lm_base) .* residuals(t_lm_base) # linear regression, no intercept t_lm_noint = lm(@formula(Y ~ XA + 0), st_df; method=dmethod) @@ -100,19 +132,35 @@ end # linear regression, two full collinear variables (XC = 2 XA) hence should get the same results as the original # after pivoting - t_lm_colli = lm(@formula(Y ~ XA + XC), st_df; dropcollinear=true, method=dmethod) - # Currently fails as the collinear variable is not dropped from `modelmatrix(obj)` - @test_throws ArgumentError isapprox(st_df.CooksD_base, cooksdistance(t_lm_colli)) + t_lm_colli = lm(@formula(Y ~ XA + XC), st_df; dropcollinear=true) + t_lm_colli_b = lm(@formula(Y ~ XC), st_df; dropcollinear=true) + @test isapprox(cooksdistance(t_lm_colli), cooksdistance(t_lm_colli_b)) end @testset "Linear model with weights and $dmethod" for dmethod in (:cholesky, :qr) df = dataset("quantreg", "engel") N = nrow(df) - df.weights = repeat(1:5, Int(N/5)) + df.weights = fweights(repeat(1:5, Int(N / 5))) f = @formula(FoodExp ~ Income) - - lm_model = lm(f, df, wts=df.weights; method=dmethod) - glm_model = glm(f, df, Normal(), wts=df.weights; method=dmethod) + @test GLM.convert_weights(repeat(1:5, Int(N / 5))) == df.weights + @test_logs (:warn, + "Passing weights as vector is deprecated in favor of explicitly using " * + "`AnalyticWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "by coercing `wts` to `FrequencyWeights`") + lm_model = lm(f, df; wts=df.weights, method=dmethod) + glm_model = glm(f, df, Normal(); wts=df.weights, method=dmethod) + @test residuals(lm_model) ≈ df.FoodExp .- predict(lm_model) + @test residuals(lm_model) ≈ residuals(lm_model) + @test residuals(lm_model; weighted=true) ≈ + sqrt.(GLM.weights(lm_model)) .* (df.FoodExp .- predict(lm_model)) + @test residuals(glm_model; weighted=true) ≈ residuals(lm_model; weighted=true) + @test residuals(glm_model; weighted=false) ≈ residuals(lm_model; weighted=false) + @test residuals(glm_model.rr; weighted=false) ≈ residuals(lm_model; weighted=false) + @test residuals(glm_model.rr; weighted=true) ≈ residuals(lm_model; weighted=true) + + @test GLM.varstruct(lm_model) == + (GLM.working_weights(lm_model) .* GLM.working_residuals(lm_model), 1.0) + @test residuals(lm_model.rr) ≈ residuals(lm_model) @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) @@ -123,9 +171,35 @@ end -0.06772589439264813 6.670664781664879e-5]) @test isapprox(first(predict(lm_model)), 357.57694841780994) @test isapprox(loglikelihood(lm_model), -4353.946729075838) + @test isapprox(loglikelihood(lm_model.rr), -4353.946729075838) @test isapprox(loglikelihood(glm_model), -4353.946729075838) @test isapprox(nullloglikelihood(lm_model), -4984.892139711452) @test isapprox(mean(residuals(lm_model)), -5.412966629787718) + @test GLM.working_residuals(lm_model) ≈ residuals(lm_model) + @test r2(lm_model) ≈ 0.8330258148644486 + @test adjr2(lm_model) ≈ 0.832788298242634 + + lm_model = fit(LinearModel, f, df; wts=uweights(0)) + @test_logs (:warn, + "Using `wts` of zero length for unweighted regression is deprecated in favor of " * + "explicitly using `UnitWeights(length(y))`." * + " Proceeding by coercing `wts` to UnitWeights of size $(N).") + @test GLM.weights(lm_model) == uweights(N) + + lm1 = fit(GeneralizedLinearModel, f, df, Normal(), IdentityLink(); + wts=pweights(df.weights)) + @test_logs (:warn, + "Passing weights as vector is deprecated in favor of explicitly using " * + "`AnalyticWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "by coercing `wts` to `FrequencyWeights`") + + @test_throws ArgumentError loglikelihood(lm1) + @test_throws ArgumentError nullloglikelihood(lm1) + lm1 = fit(LinearModel, f, df; wts=pweights(df.weights)) + @test_throws ArgumentError loglikelihood(lm1) + @test_throws ArgumentError nullloglikelihood(lm1) + @test residuals(lm1) == residuals(lm1.rr) + @test GLM.isweighted(lm1) == GLM.isweighted(lm1.rr) end @testset "Linear model with dropcollinearity and $dmethod" for dmethod in (:cholesky, :qr) @@ -150,10 +224,10 @@ end @testset "Linear model with $dmethod and rankdeficieny" for dmethod in (:cholesky, :qr) rng = StableRNG(1234321) # an example of rank deficiency caused by a missing cell in a table - dfrm = DataFrame([categorical(repeat(string.('A':'D'), inner=6)), - categorical(repeat(string.('a':'c'), inner=2, outer=4))], + dfrm = DataFrame([categorical(repeat(string.('A':'D'); inner=6)), + categorical(repeat(string.('a':'c'); inner=2, outer=4))], [:G, :H]) - f = @formula(0 ~ 1 + G*H) + f = @formula(0 ~ 1 + G * H) X = ModelMatrix(ModelFrame(f, dfrm)).m y = X * (1:size(X, 2)) + 0.1 * randn(rng, size(X, 1)) inds = deleteat!(collect(1:length(y)), 7:8) @@ -162,6 +236,7 @@ end @test isapprox(deviance(m1), 0.12160301538297297) Xmissingcell = X[inds, :] ymissingcell = y[inds] + m2p = fit(LinearModel, Xmissingcell, ymissingcell; method=dmethod) m2p_dep_kw = fit(LinearModel, Xmissingcell, ymissingcell; method=dmethod, dropcollinear=true) @@ -207,9 +282,8 @@ end @test isapprox(coef(m2p_dep_kw), coef(m2p)) end -@testset "Saturated linear model with $dmethod" for dmethod in (:cholesky, :qr) - df1 = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) - +@testset "saturated linear model" for dmethod in (:cholesky, :qr) + df1 = DataFrame(; x=["a", "b", "c"], y=[1, 2, 3]) model = lm(@formula(y ~ x), df1; method=dmethod) ct = coeftable(model) @test dof_residual(model) == 0 @@ -255,7 +329,7 @@ end Inf 0.0 1.0 -Inf Inf]) # Saturated and rank-deficient model - df2 = DataFrame(x1=["a", "b", "c"], x2=["a", "b", "c"], y=[1, 2, 3]) + df2 = DataFrame(; x1=["a", "b", "c"], x2=["a", "b", "c"], y=[1, 2, 3]) for model in (lm(@formula(y ~ x1 + x2), df2; method=dmethod), glm(@formula(y ~ x1 + x2), df2, Normal(), IdentityLink(); method=dmethod)) ct = coeftable(model) @@ -277,7 +351,7 @@ end # test case to test r2 for no intercept model # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt1.dat - data = DataFrame(x=60:70, y=130:140) + data = DataFrame(; x=60:70, y=130:140) mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky) @test coef(mdl) ≈ [2.07438016528926] @@ -301,7 +375,7 @@ end # test case to test r2 for no intercept model # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt2.dat - data = DataFrame(x=[4, 5, 6], y=[3, 4, 4]) + data = DataFrame(; x=[4, 5, 6], y=[3, 4, 4]) mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky) @test coef(mdl) ≈ [0.727272727272727] @@ -322,7 +396,7 @@ end @testset "Test with without formula" begin X = [4 5 6]' y = [3, 4, 4] - data = DataFrame(x=[4, 5, 6], y=[3, 4, 4]) + data = DataFrame(; x=[4, 5, 6], y=[3, 4, 4]) mdl1 = lm(@formula(y ~ 0 + x), data; method=dmethod) mdl2 = lm(X, y) @@ -345,9 +419,8 @@ end @testset "Linear model with QR method and NASTY data" begin x = [1, 2, 3, 4, 5, 6, 7, 8, 9] - nasty = DataFrame(X=x, TINY=1.0E-12*x) + nasty = DataFrame(; X=x, TINY=1.0E-12 * x) mdl = lm(@formula(X ~ TINY), nasty) - @test coef(mdl) ≈ [0, 1.0E+12] @test dof(mdl) ≈ 3 @test r2(mdl) ≈ 1.0 @@ -376,6 +449,8 @@ dobson = DataFrame(; Counts=[18.0, 17, 15, 20, 10, 20, 25, 13, 12], # Deprecated methods @test gm1.model === gm1 @test gm1.mf.f == formula(gm1) + @test cooksdistance(gm1) ≈ [0.35162220; 0.43125000; 0.01468043; 0.03906913; 0.35640497; + 0.62024818; 0.62510614; 0.00356405; 0.44408301] rtol = 1e-04 end ## Example from http://www.ats.ucla.edu/stat/r/dae/logit.htm @@ -458,7 +533,7 @@ end rng = StableRNG(127) X = [ones(n) randn(rng, n)] - y = logistic.(X*ones(2) + 1/10*randn(rng, n)) .> 1/2 + y = logistic.(X * ones(2) + 1 / 10 * randn(rng, n)) .> 1 / 2 @test coeftable(glm(X, y, Binomial(), CloglogLink(); method=dmethod)).cols[4][2] < 0.05 end @@ -469,7 +544,7 @@ anorexia = CSV.read(joinpath(glm_datadir, "anorexia.csv"), DataFrame) @testset "Normal offset with $dmethod" for dmethod in (:cholesky, :qr) gm6 = fit(GeneralizedLinearModel, @formula(Postwt ~ 1 + Prewt + Treat), anorexia, - Normal(), IdentityLink(), method=dmethod, + Normal(), IdentityLink(); method=dmethod, offset=Array{Float64}(anorexia.Prewt)) @test GLM.cancancel(gm6.rr) test_show(gm6) @@ -490,7 +565,7 @@ end @testset "Normal LogLink offset with $dmethod" for dmethod in (:cholesky, :qr) gm7 = fit(GeneralizedLinearModel, @formula(Postwt ~ 1 + Prewt + Treat), anorexia, - Normal(), LogLink(), method=dmethod, offset=anorexia.Prewt, rtol=1e-8) + Normal(), LogLink(); method=dmethod, offset=anorexia.Prewt, rtol=1e-8) @test !GLM.cancancel(gm7.rr) test_show(gm7) @test isapprox(deviance(gm7), 3265.207242977156) @@ -508,7 +583,7 @@ end @testset "Poisson LogLink offset with $dmethod" for dmethod in (:cholesky, :qr) gm7p = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 1 + Prewt + Treat), anorexia, - Poisson(), LogLink(), method=dmethod, offset=log.(anorexia.Prewt), rtol=1e-8) + Poisson(), LogLink(); method=dmethod, offset=log.(anorexia.Prewt), rtol=1e-8) @test GLM.cancancel(gm7p.rr) test_show(gm7p) @@ -525,8 +600,8 @@ end @testset "Poisson LogLink offset with weights with $dmethod" for dmethod in (:cholesky, :qr) gm7pw = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 1 + Prewt + Treat), anorexia, - Poisson(), LogLink(), method=dmethod, offset=log.(anorexia.Prewt), - wts=repeat(1:4, outer=18), rtol=1e-8) + Poisson(), LogLink(); method=dmethod, offset=log.(anorexia.Prewt), + wts=fweights(repeat(1:4; outer=18)), rtol=1e-8) @test GLM.cancancel(gm7pw.rr) test_show(gm7pw) @@ -583,7 +658,7 @@ end end @testset "Gamma LogLink with $dmethod" for dmethod in (:cholesky, :qr) - gm9 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), + gm9 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(); method=dmethod, rtol=1e-8, atol=0.0) @test !GLM.cancancel(gm9.rr) test_show(gm9) @@ -624,10 +699,10 @@ admit_agr = DataFrame(; count=[28.0, 97, 93, 55, 33, 54, 28, 12], admit=repeat([false, true]; inner=[4]), rank=categorical(repeat(1:4; outer=2))) -@testset "Aggregated Binomial LogitLink with $dmethod" for dmethod in (:cholesky, :qr) +@testset "Aggregated Binomial LogitLink" begin for distr in (Binomial, Bernoulli) gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(); - method=dmethod, wts=Array(admit_agr.count)) + wts=fweights(Array(admit_agr.count))) @test dof(gm14) == 4 @test nobs(gm14) == 400 @test isapprox(deviance(gm14), 474.9667184280627) @@ -650,8 +725,8 @@ admit_agr2.p = admit_agr2.admit ./ admit_agr2.count ## The model matrix here is singular so tests like the deviance are just round off error @testset "Binomial LogitLink aggregated with $dmethod" for dmethod in (:cholesky, :qr) - gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(), - method=dmethod, wts=admit_agr2.count) + gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(); + wts=fweights(admit_agr2.count)) test_show(gm15) @test dof(gm15) == 4 @test nobs(gm15) == 400 @@ -668,8 +743,8 @@ end # Weighted Gamma example (weights are totally made up) @testset "Gamma InverseLink Weights with $dmethod" for dmethod in (:cholesky, :qr) - gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), - method=dmethod, wts=[1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7]) + gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(); + wts=fweights([1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7])) test_show(gm16) @test dof(gm16) == 3 @test nobs(gm16) == 32.7 @@ -684,9 +759,10 @@ end end # Weighted Poisson example (weights are totally made up) -@testset "Poisson LogLink Weights with $dmethod" for dmethod in (:cholesky, :qr) - gm17 = glm(@formula(Counts ~ Outcome + Treatment), dobson, Poisson(), - method=dmethod, wts=[1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7]) +@testset "Poisson LogLink Weights" begin + gm17 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, + Poisson(); + wts=fweights([1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7])) test_show(gm17) @test dof(gm17) == 5 @test isapprox(deviance(gm17), 17.699857821414266) @@ -704,8 +780,8 @@ end # "quine" dataset discussed in Section 7.4 of "Modern Applied Statistics with S" quine = dataset("MASS", "quine") @testset "NegativeBinomial LogLink Fixed θ with $dmethod" for dmethod in (:cholesky, :qr) - gm18 = fit(GeneralizedLinearModel, @formula(Days ~ Eth+Sex+Age+Lrn), quine, - NegativeBinomial(2.0), LogLink(), method=dmethod) + gm18 = fit(GeneralizedLinearModel, @formula(Days ~ Eth + Sex + Age + Lrn), quine, + NegativeBinomial(2.0), LogLink(); method=dmethod) @test !GLM.cancancel(gm18.rr) test_show(gm18) @test dof(gm18) == 8 @@ -724,8 +800,8 @@ end @testset "NegativeBinomial NegativeBinomialLink Fixed θ" begin # the default/canonical link is NegativeBinomialLink - gm19 = fit(GeneralizedLinearModel, @formula(Days ~ Eth+Sex+Age+Lrn), quine, - NegativeBinomial(2.0)) + gm19 = fit(GeneralizedLinearModel, @formula(Days ~ Eth + Sex + Age + Lrn), + quine, NegativeBinomial(2.0)) @test GLM.cancancel(gm19.rr) test_show(gm19) @test dof(gm19) == 8 @@ -743,7 +819,7 @@ end end @testset "NegativeBinomial LogLink, θ to be estimated with Cholesky" begin - gm20 = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink()) + gm20 = negbin(@formula(Days ~ Eth + Sex + Age + Lrn), quine, LogLink()) test_show(gm20) @test dof(gm20) == 8 @test isapprox(deviance(gm20), 167.9518430624193, rtol=1e-7) @@ -754,16 +830,16 @@ end @test isapprox(aicc(gm20), 1110.202113650851) @test isapprox(bic(gm20), 1133.0198717340068) @test isapprox(coef(gm20)[1:7], - [2.894527697811509, -0.5693411448715979, 0.08238813087070128, - -0.4484636623590206, + [2.894527697811509, -0.5693411448715979, + 0.08238813087070128, -0.4484636623590206, 0.08805060372902418, 0.3569553124412582, 0.2921383118842893]) @testset "NegativeBinomial Parameter estimation" begin # Issue #302 - df = DataFrame(y=[1, 1, 0, 2, 3, 0, 0, 1, 1, 0, 2, 1, 3, 1, 1, 1, 4]) + df = DataFrame(; y=[1, 1, 0, 2, 3, 0, 0, 1, 1, 0, 2, 1, 3, 1, 1, 1, 4]) for maxiter in [30, 50] try - negbin(@formula(y ~ 1), df, maxiter=maxiter, + negbin(@formula(y ~ 1), df; maxiter=maxiter, # set minstepfac to a very small value to avoid an ErrorException # instead of a ConvergenceException minstepfac=1e-20) @@ -779,9 +855,10 @@ end end @testset "Weighted NegativeBinomial LogLink, θ to be estimated with Cholesky" begin - halfn = round(Int, 0.5*size(quine, 1)) + halfn = round(Int, 0.5 * size(quine, 1)) wts = vcat(fill(0.8, halfn), fill(1.2, size(quine, 1) - halfn)) - gm20a = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink(); wts=wts) + gm20a = negbin(@formula(Days ~ Eth + Sex + Age + Lrn), quine, LogLink(); + wts=fweights(wts)) test_show(gm20a) @test dof(gm20a) == 8 @test isapprox(deviance(gm20a), 164.45910399188858, rtol=1e-7) @@ -792,13 +869,13 @@ end @test isapprox(aicc(gm20a), 1110.244740690765) @test isapprox(bic(gm20a), 1133.0624987739207) @test isapprox(coef(gm20a)[1:7], - [2.894916710026395, -0.5694300339439156, 0.08215779733345588, - -0.44861865904551734, + [2.894916710026395, -0.5694300339439156, + 0.08215779733345588, -0.44861865904551734, 0.08783288494046998, 0.3568327292046044, 0.29190920267019166]) end @testset "NegativeBinomial LogLink, θ to be estimated with QR" begin - gm20 = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink(); method=:qr) + gm20 = negbin(@formula(Days ~ Eth + Sex + Age + Lrn), quine, LogLink(); method=:qr) test_show(gm20) @test dof(gm20) == 8 @test isapprox(deviance(gm20), 167.9518430624193, rtol=1e-7) @@ -809,8 +886,8 @@ end @test isapprox(aicc(gm20), 1110.202113650851) @test isapprox(bic(gm20), 1133.0198717340068) @test isapprox(coef(gm20)[1:7], - [2.894527697811509, -0.5693411448715979, 0.08238813087070128, - -0.4484636623590206, + [2.894527697811509, -0.5693411448715979, + 0.08238813087070128, -0.4484636623590206, 0.08805060372902418, 0.3569553124412582, 0.2921383118842893]) end @@ -818,7 +895,7 @@ end (:cholesky, :qr) # the default/canonical link is NegativeBinomialLink - gm21 = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine; method=dmethod) + gm21 = negbin(@formula(Days ~ Eth + Sex + Age + Lrn), quine; method=dmethod) test_show(gm21) @test dof(gm21) == 8 @test isapprox(deviance(gm21), 168.0465485656672, rtol=1e-7) @@ -829,8 +906,8 @@ end @test isapprox(aicc(gm21), 1110.660815681978) @test isapprox(bic(gm21), 1133.4785737651337) @test isapprox(coef(gm21)[1:7], - [-0.08288628676491684, -0.03697387258037785, 0.010284124099280421, - -0.027411445371127288, + [-0.08288628676491684, -0.03697387258037785, + 0.010284124099280421, -0.027411445371127288, 0.01582155341041012, 0.029074956147127032, 0.023628812427424876]) end @@ -912,7 +989,7 @@ end nointglm3 = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 0 + Prewt + Treat), anorexia, Poisson(), LogLink(); offset=log.(anorexia.Prewt), - wts=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false) + wts=fweights(repeat(1:4; outer=18)), rtol=1e-8, dropcollinear=false) @test !hasintercept(nointglm3) @test GLM.cancancel(nointglm3.rr) test_show(nointglm3) @@ -970,7 +1047,7 @@ end nointglm3 = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 0 + Prewt + Treat), anorexia, Poisson(), LogLink(); method=dmethod, offset=log.(anorexia.Prewt), - wts=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false) + wts=fweights(repeat(1:4; outer=18)), rtol=1e-8, dropcollinear=false) @test !hasintercept(nointglm3) @test GLM.cancancel(nointglm3.rr) test_show(nointglm3) @@ -994,6 +1071,10 @@ end X = sprand(rng, 1000, 10, 0.01) β = randn(rng, 10) y = Bool[rand(rng) < logistic(x) for x in X * β] + @test_throws DimensionMismatch fit(GeneralizedLinearModel, Matrix(X)[1:20, :], y, + Binomial(); method=:cholesky) + @test_throws ArgumentError fit(GeneralizedLinearModel, Matrix(X), y, + Binomial(); method=:chol) gmsparse = fit(GeneralizedLinearModel, X, y, Binomial(); method=:cholesky) gmdense = fit(GeneralizedLinearModel, Matrix(X), y, Binomial(); method=:cholesky) @@ -1002,6 +1083,48 @@ end @test isapprox(vcov(gmsparse), vcov(gmdense)) end +@testset "Sparse GLM (rank deficient)" begin + rng = StableRNG(1) + X = sprand(rng, 1000, 10, 0.01) + β = randn(rng, 12) + X = [X[:, 1:8] X[:, 1:2] X[:, 9:10]] + y = Bool[rand(rng) < logistic(x) for x in X * β] + + gmsparse = fit(GeneralizedLinearModel, X, y, Binomial(); method=:qr, rtol=1.0e-8) + gmdense = fit(GeneralizedLinearModel, Matrix(X), y, Binomial(); method=:qr, rtol=1.0e-8) + + @test isapprox(deviance(gmsparse), deviance(gmdense)) + @test isapprox(coef(gmsparse), coef(gmdense)) + inan = isnan.(vcov(gmdense)) + @test isnan.(vcov(gmsparse)) == inan + @test vcov(gmsparse)[.!inan] ≈ vcov(gmdense)[.!inan] + se0 = [1.4287165; 1.0703834; 1.2612823; 1.0018692; 1.3699154; + 5.32805; 2.0248812; 0.9454645; 1.3078763; 2.0653020] + se = stderror(gmsparse) + @test se[.!isnan.(se)] ≈ se0 atol = 1e-05 +end + +@testset "Sparse GLM weights (rank deficient)" begin + rng = StableRNG(1) + X = sprand(rng, 1000, 10, 0.5) + β = randn(rng, 12) + X = [X[:, 1:8] X[:, 1:2] X[:, 9:10]] + y = Bool[rand(rng) < logistic(x) for x in X * β] + w = rand(rng, 1000) + gmsparse = fit(LinearModel, X, y; wts=aweights(w), method=:qr) + gmdense = fit(LinearModel, Matrix(X), y; wts=aweights(w), method=:qr) + isnans = isnan.(stderror(gmsparse)) + isnand = isnan.(stderror(gmdense)) + + @test isapprox(sort(coef(gmsparse)[.!isnans]), sort(coef(gmdense)[.!isnand])) + inand = isnan.(vcov(gmdense)) + inans = isnan.(vcov(gmsparse)) + se0 = [0.04834967; 0.04718502; 0.04734963; 0.04692182; 0.04834876; 0.04643505; + 0.04393111; 0.04591312; 0.04518676; 0.04779138] + se = stderror(gmsparse) + @test se[.!isnans] ≈ se0 atol = 1e-05 +end + @testset "Sparse LM" for method in (:qr, :cholesky) rng = StableRNG(1) X = sprand(rng, 1000, 10, 0.01) @@ -1020,17 +1143,41 @@ end @test isapprox(coef(gmsparse), coef(gmdense)) @test isapprox(vcov(gmsparse), vcov(gmdense)) end +end - @testset "weights" begin - wts = rand(1000) - ft_sparse_w = fit(LinearModel, X, y; wts, method) - ft_dense_w = fit(LinearModel, Matrix(X), y; wts, method) - @test coef(ft_sparse_w) ≈ coef(ft_dense_w) - @test vcov(ft_sparse_w) ≈ vcov(ft_dense_w) - end +@testset "Sparse LM/GLM weights" for dmethod in (:qr, :cholesky) + rng = StableRNG(1) + wts = rand(1000) + X = sprand(rng, 1000, 10, 0.01) + β = randn(rng, 10) + y = Bool[rand(rng) < logistic(x) for x in X * β] + ft_sparse_w = fit(LinearModel, X, y; wts=aweights(wts), method=dmethod) + ft_dense_w = fit(LinearModel, Matrix(X), y; wts=aweights(wts), method=dmethod) + @test coef(ft_sparse_w) ≈ coef(ft_dense_w) + @test vcov(ft_sparse_w) ≈ vcov(ft_dense_w) + + gmsparse = fit(GeneralizedLinearModel, X, y, Binomial(); method=dmethod, + wts=aweights(wts)) + gmdense = fit(GeneralizedLinearModel, Matrix(X), y, Binomial(); method=dmethod, + wts=aweights(wts)) + @test isapprox(deviance(gmsparse), deviance(gmdense)) + @test isapprox(coef(gmsparse), coef(gmdense)) + @test isapprox(vcov(gmsparse), vcov(gmdense)) + ft_sparse_w = fit(LinearModel, X, y; wts=pweights(wts), method=dmethod) + ft_dense_w = fit(LinearModel, Matrix(X), y; wts=pweights(wts), method=dmethod) + @test coef(ft_sparse_w) ≈ coef(ft_dense_w) + @test vcov(ft_sparse_w) ≈ vcov(ft_dense_w) + + gmsparse = fit(GeneralizedLinearModel, X, y, Binomial(); method=dmethod, + wts=pweights(wts)) + gmdense = fit(GeneralizedLinearModel, Matrix(X), y, Binomial(); method=dmethod, + wts=pweights(wts)) + @test isapprox(deviance(gmsparse), deviance(gmdense)) + @test isapprox(coef(gmsparse), coef(gmdense)) + @test isapprox(vcov(gmsparse), vcov(gmdense)) end -@testset "Predict with $dmethod" for dmethod in (:cholesky, :qr) +@testset "Predict" for dmethod in (:cholesky, :qr) # Binomial GLM rng = StableRNG(123) X = rand(rng, 10, 2) @@ -1046,8 +1193,8 @@ end gm11_pred2 = predict(gm11, newX; interval=:confidence, interval_method=:delta) gm11_pred3 = predict(gm11, newX; interval=:confidence, interval_method=:transformation) @test gm11_pred1 == gm11_pred2.prediction == gm11_pred3.prediction ≈ newY - J = newX .* getindex.(GLM.inverselink.(LogitLink(), newX*coef(gm11)), 2) - se_pred = sqrt.(diag(J*vcov(gm11)*J')) + J = newX .* getindex.(GLM.inverselink.(LogitLink(), newX * coef(gm11)), 2) + se_pred = sqrt.(diag(J * vcov(gm11) * J')) @test gm11_pred2.lower ≈ gm11_pred2.prediction .- quantile(Normal(), 0.975) .* se_pred ≈ [0.20478201781547786, 0.2894172253195125, 0.17487705636545708, 0.024943206131575357, 0.41670326978944977] @@ -1069,16 +1216,18 @@ end @test predict!((prediction=similar(Y, size(newX, 1)), lower=similar(Y, size(newX, 1)), upper=similar(Y, size(newX, 1))), - gm11, newX, interval=:confidence, interval_method=:delta) == - predict(gm11, newX, interval=:confidence, interval_method=:delta) + gm11, newX; interval=:confidence, interval_method=:delta) == + predict(gm11, newX; interval=:confidence, interval_method=:delta) @test predict!((prediction=similar(Y, size(newX, 1)), lower=similar(Y, size(newX, 1)), upper=similar(Y, size(newX, 1))), - gm11, newX, interval=:confidence, interval_method=:transformation) == - predict(gm11, newX, interval=:confidence, interval_method=:transformation) + gm11, newX; interval=:confidence, interval_method=:transformation) == + predict(gm11, newX; interval=:confidence, interval_method=:transformation) @test_throws ArgumentError predict!((prediction=similar(Y, size(newX, 1)), lower=similar(Y, size(newX, 1)), - upper=similar(Y, size(newX, 1))), gm11, newX) + upper=similar(Y, size(newX, 1))), + gm11, + newX) @test_throws ArgumentError predict!(similar(Y, size(newX, 1)), gm11, newX, interval=:confidence) @test_throws DimensionMismatch predict!([1], gm11, newX) @@ -1094,7 +1243,7 @@ end gm12 = fit(GeneralizedLinearModel, X, Y, Binomial(); method=dmethod, offset=off) @test_throws ArgumentError predict(gm12, newX) - @test isapprox(predict(gm12, newX, offset=newoff), + @test isapprox(predict(gm12, newX; offset=newoff), logistic.(newX * coef(gm12) .+ newoff)) # Prediction from DataFrames @@ -1144,12 +1293,14 @@ end ismissing.(gm13m_pred3.prediction) == ismissing.(expected_pred) expected_lower = allowmissing(predict(gm13m, drep; - interval=:confidence, interval_method=:delta).lower) + interval=:confidence, + interval_method=:delta).lower) expected_lower[3] = missing @test collect(skipmissing(gm13m_pred2.lower)) ≈ collect(skipmissing(expected_lower)) @test ismissing.(gm13m_pred2.lower) == ismissing.(expected_lower) expected_upper = allowmissing(predict(gm13m, drep; - interval=:confidence, interval_method=:delta).upper) + interval=:confidence, + interval_method=:delta).upper) expected_upper[3] = missing @test collect(skipmissing(gm13m_pred2.upper)) ≈ collect(skipmissing(expected_upper)) @test ismissing.(gm13m_pred2.upper) == ismissing.(expected_upper) @@ -1158,20 +1309,20 @@ end Ylm = X * [0.8, 1.6] + 0.8randn(rng, 10) mm = fit(LinearModel, X, Ylm; method=dmethod) pred1 = predict(mm, newX) - pred2 = predict(mm, newX, interval=:confidence) - se_pred = sqrt.(diag(newX*vcov(mm)*newX')) + pred2 = predict(mm, newX; interval=:confidence) + se_pred = sqrt.(diag(newX * vcov(mm) * newX')) @test pred1 == pred2.prediction ≈ - [1.1382137814295972, 1.2097057044789292, 1.7983095679661645, 1.0139576473310072, - 0.9738243263215998] + [1.1382137814295972, 1.2097057044789292, 1.7983095679661645, + 1.0139576473310072, 0.9738243263215998] @test pred2.lower ≈ - pred2.prediction - quantile(TDist(dof_residual(mm)), 0.975)*se_pred ≈ - [0.5483482828723035, 0.3252331944785751, 0.6367574076909834, 0.34715818536935505, - -0.41478974520958345] + pred2.prediction - quantile(TDist(dof_residual(mm)), 0.975) * se_pred ≈ + [0.5483482828723035, 0.3252331944785751, 0.6367574076909834, + 0.34715818536935505, -0.41478974520958345] @test pred2.upper ≈ - pred2.prediction + quantile(TDist(dof_residual(mm)), 0.975)*se_pred ≈ - [1.7280792799868907, 2.0941782144792835, 2.9598617282413455, 1.6807571092926594, - 2.362438397852783] + pred2.prediction + quantile(TDist(dof_residual(mm)), 0.975) * se_pred ≈ + [1.7280792799868907, 2.0941782144792835, + 2.9598617282413455, 1.6807571092926594, 2.362438397852783] @test ndims(pred1) == 1 @@ -1179,37 +1330,39 @@ end @test ndims(pred2.lower) == 1 @test ndims(pred2.upper) == 1 - pred3 = predict(mm, newX, interval=:prediction) + pred3 = predict(mm, newX; interval=:prediction) @test pred1 == pred3.prediction ≈ - [1.1382137814295972, 1.2097057044789292, 1.7983095679661645, 1.0139576473310072, - 0.9738243263215998] + [1.1382137814295972, 1.2097057044789292, 1.7983095679661645, + 1.0139576473310072, 0.9738243263215998] @test pred3.lower ≈ pred3.prediction - - quantile(TDist(dof_residual(mm)), - 0.975)*sqrt.(diag(newX*vcov(mm)*newX') .+ deviance(mm)/dof_residual(mm)) ≈ + quantile(TDist(dof_residual(mm)), 0.975) * + sqrt.(diag(newX * vcov(mm) * newX') .+ deviance(mm) / dof_residual(mm)) ≈ [-1.6524055967145255, -1.6576810549645142, -1.1662846080257512, -1.7939306570282658, -2.0868723667435027] @test pred3.upper ≈ pred3.prediction + - quantile(TDist(dof_residual(mm)), - 0.975)*sqrt.(diag(newX*vcov(mm)*newX') .+ deviance(mm)/dof_residual(mm)) ≈ - [3.9288331595737196, 4.077092463922373, 4.762903743958081, 3.82184595169028, - 4.034521019386702] + quantile(TDist(dof_residual(mm)), 0.975) * + sqrt.(diag(newX * vcov(mm) * newX') .+ deviance(mm) / dof_residual(mm)) ≈ + [3.9288331595737196, 4.077092463922373, + 4.762903743958081, 3.82184595169028, 4.034521019386702] @test predict!(similar(Y, size(newX, 1)), mm, newX) == predict(mm, newX) @test predict!((prediction=similar(Y, size(newX, 1)), lower=similar(Y, size(newX, 1)), upper=similar(Y, size(newX, 1))), - mm, newX, interval=:confidence) == - predict(mm, newX, interval=:confidence) + mm, newX; interval=:confidence) == + predict(mm, newX; interval=:confidence) @test predict!((prediction=similar(Y, size(newX, 1)), lower=similar(Y, size(newX, 1)), upper=similar(Y, size(newX, 1))), - mm, newX, interval=:prediction) == - predict(mm, newX, interval=:prediction) + mm, newX; interval=:prediction) == + predict(mm, newX; interval=:prediction) @test_throws ArgumentError predict!((prediction=similar(Y, size(newX, 1)), lower=similar(Y, size(newX, 1)), - upper=similar(Y, size(newX, 1))), mm, newX) + upper=similar(Y, size(newX, 1))), + mm, + newX) @test_throws ArgumentError predict!(similar(Y, size(newX, 1)), mm, newX, interval=:confidence) @test_throws ArgumentError predict!(similar(Y, size(newX, 1)), mm, newX, @@ -1278,11 +1431,11 @@ end 1.0 2.0 1.0 -1.0] y = [1.0, 3.0, -2.0] - m1 = lm(x, y, dropcollinear=true, method=dmethod) - m2 = lm(x, y, dropcollinear=false, method=dmethod) + m1 = lm(x, y; dropcollinear=true, method=dmethod) + m2 = lm(x, y; dropcollinear=false, method=dmethod) - p1 = predict(m1, x, interval=:confidence) - p2 = predict(m2, x, interval=:confidence) + p1 = predict(m1, x; interval=:confidence) + p2 = predict(m2, x; interval=:confidence) @test p1.prediction ≈ p2.prediction @test p1.upper ≈ p2.upper @@ -1294,11 +1447,11 @@ end 1.0 -1000.0 4.6 1.0 5000 2.4] y = [1.0, 3.0, -2.0, 4.5] - m1 = lm(x, y, dropcollinear=true, method=dmethod) - m2 = lm(x, y, dropcollinear=false, method=dmethod) + m1 = lm(x, y; dropcollinear=true, method=dmethod) + m2 = lm(x, y; dropcollinear=false, method=dmethod) - p1 = predict(m1, x, interval=:confidence) - p2 = predict(m2, x, interval=:confidence) + p1 = predict(m1, x; interval=:confidence) + p2 = predict(m2, x; interval=:confidence) @test p1.prediction ≈ p2.prediction @test p1.upper ≈ p2.upper @@ -1310,20 +1463,20 @@ end 1.0 2.0 3.0 1.0 -1.0 0.0] y = [1.0, 3.0, -2.0] - m1 = lm(x, y, method=dmethod) - m2 = lm(x[:, 1:2], y, method=dmethod) + m1 = lm(x, y; method=dmethod) + m2 = lm(x[:, 1:2], y; method=dmethod) @test predict(m1) ≈ predict(m2) - @test_broken predict(m1, interval=:confidence) ≈ - predict(m2, interval=:confidence) - @test_broken predict(m1, interval=:prediction) ≈ - predict(m2, interval=:prediction) + @test_broken predict(m1; interval=:confidence) ≈ + predict(m2; interval=:confidence) + @test_broken predict(m1; interval=:prediction) ≈ + predict(m2; interval=:prediction) @test_throws ArgumentError predict(m1, x, interval=:confidence) @test_throws ArgumentError predict(m1, x, interval=:prediction) end @testset "GLM confidence intervals with $dmethod" for dmethod in (:cholesky, :qr) - X = [fill(1, 50) range(0, 1, length=50)] + X = [fill(1, 50) range(0, 1; length=50)] Y = vec([0 0 0 1 0 1 1 0 0 0 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1]) gm = fit(GeneralizedLinearModel, X, Y, Binomial(); method=dmethod) @@ -1335,29 +1488,29 @@ end R_glm_se = [0.09748766, 0.09808412, 0.07963897, 0.07495792, 0.05177654] - preds_transformation = predict(gm, newX, interval=:confidence, + preds_transformation = predict(gm, newX; interval=:confidence, interval_method=:transformation) - preds_delta = predict(gm, newX, interval=:confidence, interval_method=:delta) + preds_delta = predict(gm, newX; interval=:confidence, interval_method=:delta) @test preds_transformation.prediction == preds_delta.prediction - @test preds_transformation.prediction ≈ ggplot_prediction atol=1e-3 - @test preds_transformation.lower ≈ ggplot_lower atol=1e-3 - @test preds_transformation.upper ≈ ggplot_upper atol=1e-3 + @test preds_transformation.prediction ≈ ggplot_prediction atol = 1e-3 + @test preds_transformation.lower ≈ ggplot_lower atol = 1e-3 + @test preds_transformation.upper ≈ ggplot_upper atol = 1e-3 - @test preds_delta.upper .- preds_delta.lower ≈ 2 .* 1.96 .* R_glm_se atol=1e-3 + @test preds_delta.upper .- preds_delta.lower ≈ 2 .* 1.96 .* R_glm_se atol = 1e-3 @test_throws ArgumentError predict(gm, newX, interval=:confidence, interval_method=:undefined_method) @test_throws ArgumentError predict(gm, newX, interval=:undefined) end @testset "F test with $dmethod comparing to null model" for dmethod in (:cholesky, :qr) - d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.0], + d = DataFrame(; Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.0], Result=[1.1, 1.2, 1, 2.2, 1.9, 2, 0.9, 1, 1, 2.2, 2, 2], Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1])) - mod = lm(@formula(Result~Treatment), d) - othermod = lm(@formula(Result~Other), d) - nullmod = lm(@formula(Result~1), d) - bothmod = lm(@formula(Result~Other+Treatment), d) + mod = lm(@formula(Result ~ Treatment), d) + othermod = lm(@formula(Result ~ Other), d) + nullmod = lm(@formula(Result ~ 1), d) + bothmod = lm(@formula(Result ~ Other + Treatment), d) nointerceptmod = lm(reshape(d.Treatment, :, 1), d.Result) ft1 = ftest(mod) @@ -1394,13 +1547,13 @@ end end @testset "F test for model comparison" begin - d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.0], + d = DataFrame(; Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.0], Result=[1.1, 1.2, 1, 2.2, 1.9, 2, 0.9, 1, 1, 2.2, 2, 2], Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1])) - mod = lm(@formula(Result~Treatment), d) - othermod = lm(@formula(Result~Other), d) - nullmod = lm(@formula(Result~1), d) - bothmod = lm(@formula(Result~Other+Treatment), d) + mod = lm(@formula(Result ~ Treatment), d) + othermod = lm(@formula(Result ~ Other), d) + nullmod = lm(@formula(Result ~ 1), d) + bothmod = lm(@formula(Result ~ Other + Treatment), d) @test StatsModels.isnested(nullmod, mod) @test !StatsModels.isnested(othermod, mod) @test StatsModels.isnested(mod, bothmod) @@ -1408,7 +1561,7 @@ end @test StatsModels.isnested(othermod, bothmod) d.Sum = d.Treatment + (d.Other .== 1) - summod = lm(@formula(Result~Sum), d) + summod = lm(@formula(Result ~ Sum), d) @test StatsModels.isnested(summod, bothmod) ft1a = ftest(mod, nullmod) @@ -1435,7 +1588,7 @@ end [2] 3 1 0.1283 -3.1008 0.9603 0.9603 241.6234 <1e-07 ─────────────────────────────────────────────────────────────────""" - bigmod = lm(@formula(Result~Treatment+Other), d) + bigmod = lm(@formula(Result ~ Treatment + Other), d) ft2a = ftest(nullmod, mod, bigmod) @test isnan(ft2a.pval[1]) @test ft2a.pval[2] ≈ 2.481215056713184e-8 @@ -1467,14 +1620,14 @@ end @test_throws ArgumentError ftest(mod, bigmod, nullmod) @test_throws ArgumentError ftest(nullmod, bigmod, mod) @test_throws ArgumentError ftest(bigmod, nullmod, mod) - mod2 = lm(@formula(Result~Treatment), d[2:end, :]) + mod2 = lm(@formula(Result ~ Treatment), d[2:end, :]) @test_throws ArgumentError ftest(mod, mod2) end @testset "F test rounding error" begin # Data and Regressors - Y = [8.95554, 10.7601, 11.6401, 6.53665, 9.49828, 10.5173, 9.34927, 5.95772, 6.87394, - 9.56881, 13.0369, 10.1762] + Y = [8.95554, 10.7601, 11.6401, 6.53665, 9.49828, 10.5173, + 9.34927, 5.95772, 6.87394, 9.56881, 13.0369, 10.1762] X = [1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.0 0.0 1.0 0.0]' # Correlation matrix @@ -1492,12 +1645,12 @@ end 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 7.0] # Cholesky RL = cholesky(V).L - Yc = RL\Y + Yc = RL \ Y # Fit 1 (intercept) - Xc1 = RL\X[:, [1]] + Xc1 = RL \ X[:, [1]] mod1 = lm(Xc1, Yc) # Fit 2 (both) - Xc2 = RL\X + Xc2 = RL \ X mod2 = lm(Xc2, Yc) @test StatsModels.isnested(mod1, mod2) end @@ -1511,8 +1664,8 @@ end @test hcat(t.cols[5:6]...) == confint(lm1) # TODO: call coeftable(gm1, ...) directly once DataFrameRegressionModel # supports keyword arguments - t = coeftable(lm1, level=0.99) - @test hcat(t.cols[5:6]...) == confint(lm1, level=0.99) + t = coeftable(lm1; level=0.99) + @test hcat(t.cols[5:6]...) == confint(lm1; level=0.99) gm1 = fit(GeneralizedLinearModel, @formula(Counts ~ 1 + Outcome + Treatment), dobson, Poisson()) @@ -1522,8 +1675,8 @@ end @test t.cols[4] ≈ [5.4267674619082684e-71, 0.024647114627808674, 0.12848651178787643, 0.9999999999999981, 0.9999999999999999] @test hcat(t.cols[5:6]...) == confint(gm1) - t = coeftable(gm1, level=0.99) - @test hcat(t.cols[5:6]...) == confint(gm1, level=0.99) + t = coeftable(gm1; level=0.99) + @test hcat(t.cols[5:6]...) == confint(gm1; level=0.99) end @testset "Issue 84" begin @@ -1537,7 +1690,7 @@ end end @testset "Issue 117" begin - data = DataFrame(x=[1, 2, 3, 4], y=[24, 34, 44, 54]) + data = DataFrame(; x=[1, 2, 3, 4], y=[24, 34, 44, 54]) @test isapprox(coef(glm(@formula(y ~ x), data, Normal(), IdentityLink())), [14.0, 10]) end @@ -1553,9 +1706,8 @@ end @testset "Issue 224" begin rng = StableRNG(1009) # Make X slightly ill conditioned to amplify rounding errors - X = Matrix(qr(randn(rng, 100, 5)).Q)*Diagonal(10 .^ (-2.0:1.0:2.0))*Matrix(qr(randn(rng, - 5, - 5)).Q)' + X = Matrix(qr(randn(rng, 100, 5)).Q) * Diagonal(10 .^ (-2.0:1.0:2.0)) * + Matrix(qr(randn(rng, 5, 5)).Q)' y = randn(rng, 100) @test coef(glm(X, y, Normal(), IdentityLink())) ≈ coef(lm(X, y)) end @@ -1581,10 +1733,10 @@ end @testset "Issue #286 (separable data)" begin x = rand(1000) - df = DataFrame(y=x .> 0.5, x₁=x, x₂=rand(1000)) + df = DataFrame(; y=x .> 0.5, x₁=x, x₂=rand(1000)) @testset "Binomial with $l" for l in (LogitLink(), ProbitLink(), CauchitLink(), CloglogLink()) - @test deviance(glm(@formula(y ~ x₁ + x₂), df, Binomial(), l, maxiter=40)) < 1e-6 + @test deviance(glm(@formula(y ~ x₁ + x₂), df, Binomial(), l; maxiter=40)) < 1e-6 end end @@ -1603,14 +1755,14 @@ end end @testset "hasintercept" begin - d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.0], + d = DataFrame(; Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.0], Result=[1.1, 1.2, 1, 2.2, 1.9, 2, 0.9, 1, 1, 2.2, 2, 2], Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1])) - mod = lm(@formula(Result~Treatment), d) + mod = lm(@formula(Result ~ Treatment), d) @test hasintercept(mod) - nullmod = lm(@formula(Result~1), d) + nullmod = lm(@formula(Result ~ 1), d) @test hasintercept(nullmod) nointerceptmod = lm(reshape(d.Treatment, :, 1), d.Result) @@ -1627,8 +1779,8 @@ end @testset "Views with $dmethod method" for dmethod in (:cholesky, :qr) @testset "#444" begin X = randn(10, 2) - y = X*ones(2) + randn(10) - @test coef(glm(X, y, Normal(), IdentityLink(), method=dmethod)) == + y = X * ones(2) + randn(10) + @test coef(glm(X, y, Normal(), IdentityLink(); method=dmethod)) == coef(glm(view(X, 1:10, :), view(y, 1:10), Normal(), IdentityLink(); method=dmethod)) @@ -1639,32 +1791,32 @@ end lm4 = lm(view(x, :, :), view(y, :); method=dmethod) @test coef(lm1) == coef(lm2) == coef(lm3) == coef(lm4) - lm5 = lm(x, y, wts=w, method=dmethod) - lm6 = lm(x, view(y, :), method=dmethod, wts=w) - lm7 = lm(view(x, :, :), y, method=dmethod, wts=w) - lm8 = lm(view(x, :, :), view(y, :), method=dmethod, wts=w) - lm9 = lm(x, y, method=dmethod, wts=view(w, :)) - lm10 = lm(x, view(y, :), method=dmethod, wts=view(w, :)) - lm11 = lm(view(x, :, :), y, method=dmethod, wts=view(w, :)) - lm12 = lm(view(x, :, :), view(y, :), method=dmethod, wts=view(w, :)) + lm5 = lm(x, y; wts=fweights(w), method=dmethod) + lm6 = lm(x, view(y, :); method=dmethod, wts=fweights(w)) + lm7 = lm(view(x, :, :), y; method=dmethod, wts=fweights(w)) + lm8 = lm(view(x, :, :), view(y, :); method=dmethod, wts=fweights(w)) + lm9 = lm(x, y; method=dmethod, wts=fweights(view(w, :))) + lm10 = lm(x, view(y, :); method=dmethod, wts=fweights(view(w, :))) + lm11 = lm(view(x, :, :), y; method=dmethod, wts=fweights(view(w, :))) + lm12 = lm(view(x, :, :), view(y, :); method=dmethod, wts=fweights(view(w, :))) @test coef(lm5) == coef(lm6) == coef(lm7) == coef(lm8) == coef(lm9) == coef(lm10) == coef(lm11) == coef(lm12) x, y, w = rand(100, 2), rand(Bool, 100), rand(100) - glm1 = glm(x, y, Binomial(), method=dmethod) - glm2 = glm(x, view(y, :), Binomial(), method=dmethod) - glm3 = glm(view(x, :, :), y, Binomial(), method=dmethod) - glm4 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod) + glm1 = glm(x, y, Binomial(); method=dmethod) + glm2 = glm(x, view(y, :), Binomial(); method=dmethod) + glm3 = glm(view(x, :, :), y, Binomial(); method=dmethod) + glm4 = glm(view(x, :, :), view(y, :), Binomial(); method=dmethod) @test coef(glm1) == coef(glm2) == coef(glm3) == coef(glm4) - glm5 = glm(x, y, Binomial(), method=dmethod, wts=w) - glm6 = glm(x, view(y, :), Binomial(), method=dmethod, wts=w) - glm7 = glm(view(x, :, :), y, Binomial(), method=dmethod, wts=w) - glm8 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, wts=w) - glm9 = glm(x, y, Binomial(), method=dmethod, wts=view(w, :)) - glm10 = glm(x, view(y, :), Binomial(), method=dmethod, wts=view(w, :)) - glm11 = glm(view(x, :, :), y, Binomial(), method=dmethod, wts=view(w, :)) - glm12 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, wts=view(w, :)) + glm5 = glm(x, y, Binomial(); wts=fweights(w)) + glm6 = glm(x, view(y, :), Binomial(); wts=fweights(w)) + glm7 = glm(view(x, :, :), y, Binomial(); wts=fweights(w)) + glm8 = glm(view(x, :, :), view(y, :), Binomial(); wts=fweights(w)) + glm9 = glm(x, y, Binomial(); wts=fweights(view(w, :))) + glm10 = glm(x, view(y, :), Binomial(); wts=fweights(view(w, :))) + glm11 = glm(view(x, :, :), y, Binomial(); wts=fweights(view(w, :))) + glm12 = glm(view(x, :, :), view(y, :), Binomial(); wts=fweights(view(w, :))) @test coef(glm5) == coef(glm6) == coef(glm7) == coef(glm8) == coef(glm9) == coef(glm10) == coef(glm11) == coef(glm12) @@ -1703,6 +1855,7 @@ end @test GLM.linkinv(InverseLink(), 10) ≈ GLM.linkinv(PowerLink(-1), 10) @test GLM.linkinv(InverseSquareLink(), 10) ≈ GLM.linkinv(PowerLink(-2), 10) @test GLM.linkinv(PowerLink(1 / 3), 10) ≈ 1000.0 + @test GLM.linkinv(CauchitLink(), 0.0) ≈ 0.5 @test PowerLink(1 / 3) == PowerLink(1 / 3) @test isequal(PowerLink(1 / 3), PowerLink(1 / 3)) @@ -1714,7 +1867,7 @@ end mdl = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(1 / 3); method=dmethod, rtol=1.0e-12, atol=1.0e-12) @test coef(mdl) ≈ [-0.05132238692134761, 0.01428684676273272, 0.15033126098228242] - @test stderror(mdl) ≈ [0.224095414423756, 0.003342439119757, 0.005838227761632] atol=1.0e-8 + @test stderror(mdl) ≈ [0.224095414423756, 0.003342439119757, 0.005838227761632] atol = 1.0e-8 @test dof(mdl) == 4 @test GLM.dispersion(mdl, true) ≈ 6.577062388609384 @test loglikelihood(mdl) ≈ -71.60507986987612 @@ -1772,42 +1925,138 @@ end end end +@testset "momentmatrix" begin + @testset "Poisson" begin + dobson = DataFrame(; Counts=[18.0, 17, 15, 20, 10, 20, 25, 13, 12], + Outcome=categorical(repeat(string.('A':'C'); outer=3)), + Treatment=categorical(repeat(string.('a':'c'); inner=3)), + Weights=[0.3, 0.2, 0.9, 0.8, 0.2, 0.3, 0.4, 0.8, 0.9]) + + f = @formula(Counts ~ 1 + Outcome + Treatment) + + gm_pois = fit(GeneralizedLinearModel, f, dobson, Poisson()) + + mm0_pois = [-2.9999999792805436 -0.0 -0.0 -0.0 -0.0; + 3.666666776430482 3.666666776430482 0.0 0.0 0.0; + -0.6666666790442577 -0.0 -0.6666666790442577 -0.0 -0.0; + -1.0000000123284563 -0.0 -0.0 -1.0000000123284563 -0.0; + -3.3333334972350723 -3.3333334972350723 -0.0 -3.3333334972350723 -0.0; + 4.333333497138949 0.0 4.333333497138949 4.333333497138949 0.0; + 4.000000005907649 0.0 0.0 0.0 4.000000005907649; + -0.33333334610634496 -0.33333334610634496 -0.0 -0.0 -0.33333334610634496; + -3.6666667654825043 -0.0 -3.6666667654825043 -0.0 -3.6666667654825043] + + gm_poisw = fit(GeneralizedLinearModel, f, dobson, Poisson(); + wts=fweights(dobson.Weights)) + + mm0_poisw = [-0.9624647521850039 -0.0 -0.0 -0.0 -0.0; + 0.6901050904949885 0.6901050904949885 0.0 0.0 0.0; + 0.2723596655008255 0.0 0.2723596655008255 0.0 0.0; + -0.9062167634177802 -0.0 -0.0 -0.9062167634177802 -0.0; + -0.7002548908882033 -0.7002548908882033 -0.0 -0.7002548908882033 -0.0; + 1.606471661159352 0.0 1.606471661159352 1.606471661159352 0.0; + 1.8686815106332157 0.0 0.0 0.0 1.8686815106332157; + 0.010149793505874801 0.010149793505874801 0.0 0.0 0.010149793505874801; + -1.8788313148033928 -0.0 -1.8788313148033928 -0.0 -1.8788313148033928] + @test mm0_pois ≈ GLM.momentmatrix(gm_pois) atol = 1e-06 + @test mm0_poisw ≈ GLM.momentmatrix(gm_poisw) atol = 1e-06 + end + @testset "Binomial" begin + f = @formula(admit ~ 1 + rank) + gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial(); rtol=1e-8) + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(); + wts=fweights(admit_agr.count), rtol=1e-08) + + mm0_bin = [-0.5 -0.0 -0.0 -0.0 + -0.5 -0.5 -0.0 -0.0 + -0.5 -0.0 -0.5 -0.0 + -0.5 -0.0 -0.0 -0.5 + 0.5 0.0 0.0 0.0 + 0.5 0.5 0.0 0.0 + 0.5 0.0 0.5 0.0 + 0.5 0.0 0.0 0.5] + + mm0_binw = [-15.1475 -0.0 -0.0 -0.0 + -34.6887 -34.6887 -0.0 -0.0 + -21.5207 -0.0 -21.5207 -0.0 + -9.85075 -0.0 -0.0 -9.85075 + 15.1475 0.0 0.0 0.0 + 34.6887 34.6887 0.0 0.0 + 21.5207 0.0 21.5207 0.0 + 9.85075 0.0 0.0 9.85075] + + @test mm0_bin ≈ GLM.momentmatrix(gm_bin) + @test mm0_binw ≈ GLM.momentmatrix(gm_binw) atol = 1e-03 + end + + @testset "Binomial ProbitLink" begin + f = @formula(admit ~ 1 + rank) + gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink()) + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(); + wts=fweights(admit_agr.count), rtol=1e-8) + + mm0_bin = [-0.7978846 0.0000000 0.0000000 0.0000000 + -0.7978846 -0.7978846 0.0000000 0.0000000 + -0.7978846 0.0000000 -0.7978846 0.0000000 + -0.7978846 0.0000000 0.0000000 -0.7978846 + 0.7978846 0.0000000 0.0000000 0.0000000 + 0.7978846 0.7978846 0.0000000 0.0000000 + 0.7978846 0.0000000 0.7978846 0.0000000 + 0.7978846 0.0000000 0.0000000 0.7978846] + + mm0_binw = [-24.20695 0.00000 0.00000 0.00000 + -56.36158 -56.36158 0.00000 0.00000 + -36.86681 0.00000 -36.86681 0.00000 + -17.52584 0.00000 0.00000 -17.52584 + 24.20695 0.00000 0.00000 0.00000 + 56.36158 56.36158 0.00000 0.00000 + 36.86681 0.00000 36.86681 0.00000 + 17.52584 0.00000 0.00000 17.52584] + + @test mm0_bin ≈ GLM.momentmatrix(gm_bin) rtol = 1e-06 + @test mm0_binw ≈ GLM.momentmatrix(gm_binw) rtol = 1e-05 + end +end + +include("analytic_weights.jl") +include("probability_weights.jl") + @testset "contrasts argument" begin # DummyCoding (default) - m = lm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), - contrasts=Dict(:x=>DummyCoding())) - mcat = lm(@formula(y~x), (y=1:25, x=categorical(repeat(1:5, 5)))) - gm = glm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), - contrasts=Dict(:x=>DummyCoding())) - gmcat = glm(@formula(y~x), (y=1:25, x=categorical(repeat(1:5, 5))), + m = lm(@formula(y ~ x), (y=1:25, x=repeat(1:5, 5)); + contrasts=Dict(:x => DummyCoding())) + mcat = lm(@formula(y ~ x), (y=1:25, x=categorical(repeat(1:5, 5)))) + gm = glm(@formula(y ~ x), (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(); + contrasts=Dict(:x => DummyCoding())) + gmcat = glm(@formula(y ~ x), (y=1:25, x=categorical(repeat(1:5, 5))), Normal(), IdentityLink()) @test coef(m) == coef(mcat) ≈ [11, 1, 2, 3, 4] @test coef(gm) == coef(gmcat) ≈ [11, 1, 2, 3, 4] - m = fit(LinearModel, @formula(y~x), (y=1:25, x=repeat(1:5, 5)), - contrasts=Dict(:x=>DummyCoding())) - mcat = fit(LinearModel, @formula(y~x), (y=1:25, x=categorical(repeat(1:5, 5)))) - gm = fit(GeneralizedLinearModel, @formula(y~x), - (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), - contrasts=Dict(:x=>DummyCoding())) - gmcat = fit(GeneralizedLinearModel, @formula(y~x), + m = fit(LinearModel, @formula(y ~ x), (y=1:25, x=repeat(1:5, 5)); + contrasts=Dict(:x => DummyCoding())) + mcat = fit(LinearModel, @formula(y ~ x), (y=1:25, x=categorical(repeat(1:5, 5)))) + gm = fit(GeneralizedLinearModel, @formula(y ~ x), + (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(); + contrasts=Dict(:x => DummyCoding())) + gmcat = fit(GeneralizedLinearModel, @formula(y ~ x), (y=1:25, x=categorical(repeat(1:5, 5))), Normal(), IdentityLink()) @test coef(m) == coef(mcat) ≈ [11, 1, 2, 3, 4] @test coef(gm) == coef(gmcat) ≈ [11, 1, 2, 3, 4] # EffectsCoding - m = lm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), - contrasts=Dict(:x=>EffectsCoding())) - gm = glm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), - contrasts=Dict(:x=>EffectsCoding())) + m = lm(@formula(y ~ x), (y=1:25, x=repeat(1:5, 5)); + contrasts=Dict(:x => EffectsCoding())) + gm = glm(@formula(y ~ x), (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(); + contrasts=Dict(:x => EffectsCoding())) @test coef(m) ≈ coef(gm) ≈ [13, -1, 0, 1, 2] - m = fit(LinearModel, @formula(y~x), (y=1:25, x=repeat(1:5, 5)), - contrasts=Dict(:x=>EffectsCoding())) - gm = fit(GeneralizedLinearModel, @formula(y~x), - (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), - contrasts=Dict(:x=>EffectsCoding())) + m = fit(LinearModel, @formula(y ~ x), (y=1:25, x=repeat(1:5, 5)); + contrasts=Dict(:x => EffectsCoding())) + gm = fit(GeneralizedLinearModel, @formula(y ~ x), + (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(); + contrasts=Dict(:x => EffectsCoding())) @test coef(m) ≈ coef(gm) ≈ [13, -1, 0, 1, 2] end @@ -1818,17 +2067,17 @@ end m = glm(rand(10, 2), rand(10), Normal(), IdentityLink()) @test_throws ArgumentError formula(m) - df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) + df = DataFrame(; x=["a", "b", "c"], y=[1, 2, 3]) m = lm(@formula(y ~ x), df) @test formula(m)::FormulaTerm === m.formula - df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) + df = DataFrame(; x=["a", "b", "c"], y=[1, 2, 3]) m = glm(@formula(y ~ x), df, Normal(), IdentityLink()) @test formula(m)::FormulaTerm === m.formula end @testset "dropcollinear in GLM with Cholesky" begin - data = DataFrame(x1=[4, 5, 9, 6, 5], x2=[5, 3, 6, 7, 1], + data = DataFrame(; x1=[4, 5, 9, 6, 5], x2=[5, 3, 6, 7, 1], x3=[4.2, 4.6, 8.4, 6.2, 4.2], y=[14, 14, 24, 20, 11]) @testset "Check normal with identity link against equivalent linear model" begin @@ -1882,7 +2131,7 @@ end dfrm = DataFrame() dfrm.x1 = randn(StableRNG(123), num_rows) dfrm.x2 = randn(StableRNG(1234), num_rows) - dfrm.x3 = 2*dfrm.x1 + 3*dfrm.x2 + dfrm.x3 = 2 * dfrm.x1 + 3 * dfrm.x2 dfrm.y = Int.(randn(StableRNG(12345), num_rows) .> 0) @testset "Test Logistic Regression Outputs from R" begin @@ -1901,17 +2150,18 @@ end @test GLM.dispersion(mdl, true) ≈ 1 @test predict(mdl)[1:3] ≈ [0.4241893070433117, 0.3754516361306202, 0.6327877688720133] atol = 1.0E-6 - @test confint(mdl)[1:2, 1:2] ≈ [-0.5493329715011036 0.26350316142056085; - -0.3582545657827583 0.64313795309765587] atol = 1.0E-1 + @test confint(mdl)[1:2, + 1:2] ≈ [-0.5493329715011036 0.26350316142056085; + -0.3582545657827583 0.64313795309765587] atol = 1.0E-1 end @testset "`rankdeficient` test case of lm in glm" begin rng = StableRNG(1234321) # an example of rank deficiency caused by a missing cell in a table - dfrm = DataFrame([categorical(repeat(string.('A':'D'), inner=6)), - categorical(repeat(string.('a':'c'), inner=2, outer=4))], + dfrm = DataFrame([categorical(repeat(string.('A':'D'); inner=6)), + categorical(repeat(string.('a':'c'); inner=2, outer=4))], [:G, :H]) - f = @formula(0 ~ 1 + G*H) + f = @formula(0 ~ 1 + G * H) X = ModelMatrix(ModelFrame(f, dfrm)).m y = X * (1:size(X, 2)) + 0.1 * randn(rng, size(X, 1)) inds = deleteat!(collect(1:length(y)), 7:8) @@ -1938,10 +2188,10 @@ end @testset "`rankdeficient` test in GLM with Gamma distribution" begin rng = StableRNG(1234321) # an example of rank deficiency caused by a missing cell in a table - dfrm = DataFrame([categorical(repeat(string.('A':'D'), inner=6)), - categorical(repeat(string.('a':'c'), inner=2, outer=4))], + dfrm = DataFrame([categorical(repeat(string.('A':'D'); inner=6)), + categorical(repeat(string.('a':'c'); inner=2, outer=4))], [:G, :H]) - f = @formula(0 ~ 1 + G*H) + f = @formula(0 ~ 1 + G * H) X = ModelMatrix(ModelFrame(f, dfrm)).m y = X * (1:size(X, 2)) + 0.1 * randn(rng, size(X, 1)) inds = deleteat!(collect(1:length(y)), 7:8) @@ -2015,11 +2265,11 @@ end lm2 = lm(@formula(Prestige ~ 1 + Income + Education + Type), duncan) @test termnames(lm2)[2] != coefnames(lm2) - @test gvif(lm2; scale=true) ≈ [1.486330, 2.301648, 1.502666] atol=1e-4 + @test gvif(lm2; scale=true) ≈ [1.486330, 2.301648, 1.502666] atol = 1e-4 gm2 = glm(@formula(Prestige ~ 1 + Income + Education + Type), duncan, Normal()) @test termnames(gm2)[2] != coefnames(gm2) - @test gvif(gm2; scale=true) ≈ [1.486330, 2.301648, 1.502666] atol=1e-4 + @test gvif(gm2; scale=true) ≈ [1.486330, 2.301648, 1.502666] atol = 1e-4 # the VIF definition depends on modelmatrix, vcov and stderror returning valid # values. It doesn't care about links, offsets, etc. as long as the model matrix, @@ -2037,13 +2287,13 @@ end X = [filip_data_df.x[i]^j for i in 1:length(filip_data_df.x), j in 0:10] # No weights - f1 = lm(X, filip_data_df.y, dropcollinear=false, method=:qr) + f1 = lm(X, filip_data_df.y; dropcollinear=false, method=:qr) @test coef(f1) ≈ filip_estimates_df.estimate rtol = 1e-7 @test stderror(f1) ≈ filip_estimates_df.se rtol = 1e-7 # Weights - f2 = lm(X, filip_data_df.y, dropcollinear=false, method=:qr, - wts=ones(length(filip_data_df.y))) + f2 = lm(X, filip_data_df.y; dropcollinear=false, + method=:qr, wts=uweights(length(filip_data_df.y))) @test coef(f2) ≈ filip_estimates_df.estimate rtol = 1e-7 @test stderror(f2) ≈ filip_estimates_df.se rtol = 1e-7 end @@ -2051,12 +2301,104 @@ end @testset "Non-finite deviance. Issue 417" begin X_fn = Downloads.download("https://github.com/JuliaStats/GLM.jl/files/6279630/x.txt") y_fn = Downloads.download("https://github.com/JuliaStats/GLM.jl/files/6279632/y.txt") - X_df = CSV.read(X_fn, DataFrame, header=false) - y_df = CSV.read(y_fn, DataFrame, header=false) + X_df = CSV.read(X_fn, DataFrame; header=false) + y_df = CSV.read(y_fn, DataFrame; header=false) df = hcat(y_df, select(X_df, Not("Column1"))) ft = glm(@formula(Column1 ~ Column2 + Column3 + Column4), df, Gamma(), LogLink()) @test coef(ft) ≈ [9.648767705301294, -0.11274823562143056, 0.1907889126252095, -0.8123086879222496] - @test_throws DomainError glm(@formula(Column1 ~ Column2 + Column3 + Column4), df, - Gamma(), LogLink(), start=fill(NaN, 4)) + @test_throws DomainError glm(@formula(Column1 ~ Column2 + Column3 + Column4), + df, Gamma(), LogLink(), start=fill(NaN, 4)) +end + +@testset "Test weighting with collinear columns" begin + form.CarbC = form.Carb + form.awts = [0.6, 0.3, 0.6, 0.3, 0.6, 0.3] + form.fwts = [6, 3, 6, 3, 6, 3] + lm0 = fit(LinearModel, @formula(OptDen ~ Carb), form; wts=aweights(form.awts), + method=:qr) + lm1 = fit(LinearModel, @formula(OptDen ~ Carb + CarbC), + form; wts=aweights(form.awts), method=:qr) + @test coef(lm0) ≈ coef(lm1)[1:2] + @test stderror(lm0) ≈ stderror(lm1)[1:2] + @test isnan(stderror(lm1)[3]) + lm0 = fit(LinearModel, @formula(OptDen ~ Carb), form; wts=pweights(form.awts), + method=:qr) + lm1 = fit(LinearModel, @formula(OptDen ~ Carb + CarbC), + form; wts=pweights(form.awts), method=:qr) + @test coef(lm0) ≈ coef(lm1)[1:2] + @test stderror(lm0) ≈ stderror(lm1)[1:2] + @test isnan(stderror(lm1)[3]) + lm0 = fit(LinearModel, @formula(OptDen ~ Carb), form; wts=fweights(form.fwts), + method=:qr) + lm1 = fit(LinearModel, @formula(OptDen ~ Carb + CarbC), + form; wts=fweights(form.fwts), method=:qr) + @test coef(lm0) ≈ coef(lm1)[1:2] + @test stderror(lm0) ≈ stderror(lm1)[1:2] + @test isnan(stderror(lm1)[3]) +end + +rng = StableRNG(123) +df = DataFrame(; x_1=randn(rng, 10), x_2=randn(rng, 10), y=randn(rng, 10)) +df.xx_1 = df.x_1 +df.xx_2 = df.x_2 +df.d = rand(rng, 0:1, 10) +df.w = rand(rng, 10) +frm0 = @formula(y ~ x_1 + x_2) +frm1 = @formula(y ~ x_1 + xx_2 + +x_2 + xx_1) +frmp0 = @formula(d ~ x_1 + x_2) +frmp1 = @formula(d ~ x_1 + xx_2 + +x_2 + xx_1) + +@testset "Leverage unweighted" for method in (:qr, :cholesky) + lev0 = [0.2346366962678214; 0.6633984457928059; 0.32460236947851806; + 0.1543698142163501; 0.3762092067703499; 0.4887705577596249; + 0.15170408132550545; 0.15279492673405848; 0.16851296355750492; + 0.28500093809746074] + + lev0_pr = [0.28923503046426724; 0.0006968399611682526; + 0.6040870179390236; 0.222176173808432; + 0.06247295465277078; 0.8226912702704338; + 0.21412449742521156; 0.2160509316358758; + 0.23269115629631995; 0.33577412754649705] + + lm0 = fit(LinearModel, frm0, df; method=method) + lm1 = fit(LinearModel, frm1, df; method=method) + @test leverage(lm0) ≈ leverage(lm1) + @test lev0 ≈ leverage(lm1) + glm1 = fit(GeneralizedLinearModel, frm1, df, Normal(), IdentityLink(); method=method) + @test lev0 ≈ leverage(glm1) + probit0 = glm(frmp0, df, Binomial(), ProbitLink(); method=method) + probit = glm(frmp1, df, Binomial(), ProbitLink(); method=method) + @test leverage(probit) ≈ leverage(probit0) + @test lev0_pr ≈ leverage(probit0) rtol = 1e-03 +end + +@testset "Leverage weighted" for method in (:qr, :cholesky) + lm0 = fit(LinearModel, frm0, df; method=method, wts=fweights(df.w)) + lm1 = fit(LinearModel, frm1, df; method=method, wts=fweights(df.w)) + + lev0 = [0.4546669409864052, 0.39220506613766826, 0.31067464842874659, + 0.16105201633463462, 0.45458434896240396, 0.43751245519667181, + 0.12193399045053441, 0.12312180988271218, 0.22090225888834489, + 0.32334646473187784] + + lev0_pr = [0.28660353231987495, 2.6405172015486347e-05, 0.62180044570022475, + 0.25772930451725845, 0.058608663568656121, 0.78417628710278586, + 0.16615273053689983, 0.16787702318659792, 0.30440874803670093, + 0.35261685985898611] + + @test leverage(lm0) ≈ leverage(lm1) + @test lev0 ≈ leverage(lm1) + glm1 = fit(GeneralizedLinearModel, frm1, df, Normal(), + IdentityLink(); method=method, wts=fweights(df.w)) + @test lev0 ≈ leverage(glm1) + probit0 = glm(frmp0, df, Binomial(), ProbitLink(); method=method, wts=aweights(df.w), + atol=1e-10, rtol=1e-10, minstepfac=1e-10) + probit = glm(frmp1, df, Binomial(), ProbitLink(); method=method, wts=aweights(df.w), + atol=1e-10, rtol=1e-10, minstepfac=1e-10) + @test leverage(probit) ≈ leverage(probit0) + @test cooksdistance(probit0) ≈ [9.548585e-04; 3.483554e-13; 2.982745e-01; 4.166364e-01; + 2.100668e-05; 2.648063e+00; 2.165093e-03; 2.321434e-03; + 7.665988e-02; 3.416970e-02] rtol = 1e-04 + @test lev0_pr ≈ leverage(probit0) rtol = 1e-03 end