Coverage for nilearn/glm/regression.py: 33%
100 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
1"""Implement some standard regression models: OLS and WLS \
2models, as well as an AR(p) regression model.
4Models are specified with a design matrix and are fit using their
5'fit' method.
7Subclasses that have more complicated covariance matrices
8should write over the 'whiten' method as the fit method
9prewhitens the response by calling 'whiten'.
11General reference for regression models:
13'Introduction to Linear Regression Analysis', Douglas C. Montgomery,
14 Elizabeth A. Peck, G. Geoffrey Vining. Wiley, 2006.
16"""
18__docformat__ = "restructuredtext en"
20import numpy as np
21import scipy.linalg as spl
22from nibabel.onetime import auto_attr
23from numpy.linalg import matrix_rank
25from nilearn.glm._utils import positive_reciprocal
26from nilearn.glm.model import LikelihoodModelResults
29class OLSModel:
30 """A simple ordinary least squares model.
32 Parameters
33 ----------
34 design : array-like
35 This is your design matrix. Data are assumed to be column ordered
36 with observations in rows.
38 Methods
39 -------
40 model.__init___(design)
41 model.logL(b=self.beta, Y)
43 Attributes
44 ----------
45 design : ndarray
46 This is the design, or X, matrix.
48 whitened_design : ndarray
49 This is the whitened design matrix.
50 `design` == `whitened_design` by default for the OLSModel,
51 though models that inherit from the OLSModel will whiten the design.
53 calc_beta : ndarray
54 This is the Moore-Penrose pseudoinverse of the whitened design matrix.
56 normalized_cov_beta : ndarray
57 ``np.dot(calc_beta, calc_beta.T)``
59 df_residuals : scalar
60 Degrees of freedom of the residuals. Number of observations less the
61 rank of the design.
63 df_model : scalar
64 Degrees of freedom of the model. The rank of the design.
66 """
68 def __init__(self, design):
69 super().__init__()
70 self.initialize(design)
72 def initialize(self, design):
73 """Construct instance."""
74 # PLEASE don't assume we have a constant...
75 # TODO: handle case for nonconstant regression
76 self.design = design
77 self.whitened_design = self.whiten(self.design)
78 self.calc_beta = spl.pinv(self.whitened_design)
79 self.normalized_cov_beta = np.dot(
80 self.calc_beta, np.transpose(self.calc_beta)
81 )
82 self.df_total = self.whitened_design.shape[0]
84 eps = np.abs(self.design).sum() * np.finfo(np.float64).eps
85 self.df_model = matrix_rank(self.design, eps)
86 self.df_residuals = self.df_total - self.df_model
88 def logL(self, beta, Y, nuisance=None): # noqa: N802
89 r"""Return the value of the loglikelihood function at beta.
91 Given the whitened design matrix, the loglikelihood is evaluated
92 at the parameter vector, :term:`beta<Beta>`,
93 for the dependent variable, Y
94 and the nuisance parameter, sigma :footcite:t:`Greene2003`.
96 Parameters
97 ----------
98 beta : ndarray
99 The parameter estimates. Must be of length ``df_model``.
101 Y : ndarray
102 The dependent variable
104 nuisance : :obj:`dict`, default=None
105 A dict with key 'sigma', which is an optional estimate of sigma.
106 If None, defaults to its maximum likelihood estimate
107 (with beta fixed) as
108 ``sum((Y - X*beta)**2) / n``, where n=Y.shape[0], X=self.design.
110 Returns
111 -------
112 loglf : float
113 The value of the loglikelihood function.
115 Notes
116 -----
117 The log-Likelihood Function is defined as
119 .. math::
121 \ell(\beta,\sigma,Y)=
122 -\frac{n}{2}\log(2\pi\sigma^2) - \|Y-X\beta\|^2/(2\sigma^2)
124 The parameter :math:`\sigma` above is what is sometimes referred to
125 as a nuisance parameter. That is, the likelihood is considered as a
126 function of :math:`\beta`, but to evaluate it, a value of
127 :math:`\sigma` is needed.
129 If :math:`\sigma` is not provided,
130 then its maximum likelihood estimate:
132 .. math::
134 \hat{\sigma}(\beta) = \frac{\text{SSE}(\beta)}{n}
136 is plugged in. This likelihood is now a function of only :math:`\beta`
137 and is technically referred to as a profile-likelihood.
139 References
140 ----------
141 .. footbibliography::
143 """
144 # This is overwriting an abstract method of LikelihoodModel
145 X = self.whitened_design
146 wY = self.whiten(Y)
147 r = wY - np.dot(X, beta)
148 n = self.df_total
149 SSE = (r**2).sum(0)
150 sigmasq = SSE / n if nuisance is None else nuisance["sigma"]
152 loglf = -n / 2.0 * np.log(2 * np.pi * sigmasq) - SSE / (2 * sigmasq)
153 return loglf
155 def whiten(self, X):
156 """Whiten design matrix.
158 Parameters
159 ----------
160 X : array
161 design matrix
163 Returns
164 -------
165 whitened_X : array
166 This matrix is the matrix whose pseudoinverse is ultimately
167 used in estimating the coefficients. For OLSModel, it is
168 does nothing. For WLSmodel, ARmodel, it pre-applies
169 a square root of the covariance matrix to X.
171 """
172 return X
174 def fit(self, Y):
175 """Fit model to data `Y`.
177 Full fit of the model including estimate of covariance matrix,
178 (whitened) residuals and scale.
180 Parameters
181 ----------
182 Y : array-like
183 The dependent variable for the Least Squares problem.
185 Returns
186 -------
187 fit : RegressionResults
189 """
190 # Other estimates of the covariance matrix for a heteroscedastic
191 # regression model can be implemented in WLSmodel. (Weighted least
192 # squares models assume covariance is diagonal, i.e. heteroscedastic).
193 wY = self.whiten(Y)
194 beta = np.dot(self.calc_beta, wY)
195 wresid = wY - np.dot(self.whitened_design, beta)
196 dispersion = np.sum(wresid**2, 0) / (
197 self.whitened_design.shape[0] - self.whitened_design.shape[1]
198 )
199 lfit = RegressionResults(
200 beta,
201 Y,
202 self,
203 wY,
204 wresid,
205 dispersion=dispersion,
206 cov=self.normalized_cov_beta,
207 )
208 return lfit
211class ARModel(OLSModel):
212 """A regression model with an AR(p) covariance structure.
214 In terms of a LikelihoodModel, the parameters
215 are beta, the usual regression parameters,
216 and sigma, a scalar nuisance parameter that
217 shows up as multiplier in front of the AR(p) covariance.
219 Parameters
220 ----------
221 design : ndarray
222 2D array with design matrix.
224 rho : :obj:`int` or array-like
225 If int, gives order of model, and initializes rho to zeros. If
226 ndarray, gives initial estimate of rho. Be careful as ``ARModel(X,
227 1) != ARModel(X, 1.0)``.
229 """
231 def __init__(self, design, rho):
232 if isinstance(rho, int):
233 self.order = rho
234 self.rho = np.zeros(self.order, np.float64)
235 else:
236 self.rho = np.squeeze(np.asarray(rho))
237 if len(self.rho.shape) not in [0, 1]:
238 raise ValueError("AR parameters must be a scalar or a vector")
239 if self.rho.shape == ():
240 self.rho.shape = (1,)
241 self.order = self.rho.shape[0]
242 super().__init__(design)
244 def whiten(self, X):
245 """Whiten a series of columns according to AR(p) covariance structure.
247 Parameters
248 ----------
249 X : array-like of shape (n_features)
250 Array to whiten.
252 Returns
253 -------
254 whitened_X : ndarray
255 X whitened with order self.order AR.
257 """
258 X = np.asarray(X, np.float64)
259 whitened_X = X.copy()
260 for i in range(self.order):
261 whitened_X[(i + 1) :] = (
262 whitened_X[(i + 1) :] - self.rho[i] * X[: -(i + 1)]
263 )
264 return whitened_X
267class RegressionResults(LikelihoodModelResults):
268 """Summarize the fit of a linear regression model.
270 It handles the output of contrasts, estimates of covariance, etc.
272 """
274 def __init__(
275 self,
276 theta,
277 Y,
278 model,
279 whitened_Y,
280 whitened_residuals,
281 cov=None,
282 dispersion=1.0,
283 nuisance=None,
284 ):
285 """See LikelihoodModelResults constructor.
287 The only difference is that the whitened Y and residual values
288 are stored for a regression model.
290 """
291 LikelihoodModelResults.__init__(
292 self, theta, Y, model, cov, dispersion, nuisance
293 )
294 self.whitened_Y = whitened_Y
295 self.whitened_residuals = whitened_residuals
296 self.whitened_design = model.whitened_design
298 # @auto_attr store the value as an object attribute after initial call
299 # better performance than @property
300 @auto_attr
301 def residuals(self):
302 """Residuals from the fit."""
303 return self.Y - self.predicted
305 @auto_attr
306 def normalized_residuals(self):
307 """Residuals, normalized to have unit length.
309 See :footcite:t:`Montgomery2006` and :footcite:t:`Davidson2004`.
311 Notes
312 -----
313 Is this supposed to return "standardized residuals,"
314 residuals standardized
315 to have mean zero and approximately unit variance?
317 d_i = e_i / sqrt(MS_E)
319 Where MS_E = SSE / (n - k)
321 References
322 ----------
323 .. footbibliography::
325 """
326 return self.residuals * positive_reciprocal(np.sqrt(self.dispersion))
328 @auto_attr
329 def predicted(self):
330 """Return linear predictor values from a design matrix."""
331 beta = self.theta
332 # the LikelihoodModelResults has parameters named 'theta'
333 X = self.whitened_design
334 return np.dot(X, beta)
336 @auto_attr
337 def SSE(self): # noqa: N802
338 """Error sum of squares.
340 If not from an OLS model this is "pseudo"-SSE.
341 """
342 return (self.whitened_residuals**2).sum(0)
344 @auto_attr
345 def r_square(self):
346 """Proportion of explained variance.
348 If not from an OLS model this is "pseudo"-R2.
349 """
350 return np.var(self.predicted, 0) / np.var(self.whitened_Y, 0)
352 @auto_attr
353 def MSE(self): # noqa: N802
354 """Return Mean square (error)."""
355 return self.SSE / self.df_residuals
358class SimpleRegressionResults(LikelihoodModelResults):
359 """Contain only information of the model fit necessary \
360 for :term:`contrast` computation.
362 Its intended to save memory when details of the model are unnecessary.
364 """
366 def __init__(self, results):
367 """See LikelihoodModelResults constructor.
369 The only difference is that the whitened Y and residual values
370 are stored for a regression model.
371 """
372 self.theta = results.theta
373 self.cov = results.cov
374 self.dispersion = results.dispersion
375 self.nuisance = results.nuisance
377 self.df_total = results.Y.shape[0]
378 self.df_model = results.model.df_model
379 # put this as a parameter of LikelihoodModel
380 self.df_residuals = self.df_total - self.df_model
382 def logL(self): # noqa: N802
383 """Return the maximized log-likelihood."""
384 raise NotImplementedError(
385 "logL not implemented for "
386 "SimpleRegressionsResults. "
387 "Use RegressionResults"
388 )
390 def residuals(self, Y, X):
391 """Residuals from the fit."""
392 return Y - self.predicted(X)
394 def normalized_residuals(self, Y, X):
395 """Residuals, normalized to have unit length.
397 See :footcite:t:`Montgomery2006` and :footcite:t:`Davidson2004`.
399 Notes
400 -----
401 Is this supposed to return "standardized residuals,"
402 residuals standardized
403 to have mean zero and approximately unit variance?
405 d_i = e_i / sqrt(MS_E)
407 Where MS_E = SSE / (n - k)
409 References
410 ----------
411 .. footbibliography::
413 """
414 return self.residuals(Y, X) * positive_reciprocal(
415 np.sqrt(self.dispersion)
416 )
418 def predicted(self, X):
419 """Return linear predictor values from a design matrix."""
420 beta = self.theta
421 return np.dot(X, beta)