Coverage for nilearn/decoding/_objective_functions.py: 15%
80 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-18 13:00 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-18 13:00 +0200
1"""Common functions and base classes."""
3from functools import partial
5import numpy as np
6from scipy import linalg
9def spectral_norm_squared(X):
10 """Compute square of the operator 2-norm (spectral norm) of X.
12 This corresponds to the Lipschitz constant of the gradient of the
13 squared-loss function:
15 w -> .5 * ||y - Xw||^2
17 Parameters
18 ----------
19 X : ndarray, shape (n_samples, n_features)
20 Design matrix.
22 Returns
23 -------
24 lipschitz_constant : float
25 The square of the spectral norm of X.
27 """
28 # On big matrices like those that we have in neuroimaging, svdvals
29 # is faster than a power iteration (even when using arpack's)
30 return linalg.svdvals(X)[0] ** 2
33def logistic_loss_lipschitz_constant(X):
34 """Compute the Lipschitz constant (upper bound) for the gradient of the \
35 logistic sum.
37 .. code-block::
39 w -> sum_i log(1+exp(-y_i*(x_i*w + v)))
41 """
42 # N.B: we handle intercept!
43 X = np.hstack((X, np.ones((X.shape[0], 1))))
44 return spectral_norm_squared(X)
47def squared_loss(X, y, w, compute_energy=True, compute_grad=False):
48 """Compute the MSE error, and optionally, its gradient too.
50 The cost / energy function is
52 MSE = .5 * ||y - Xw||^2
54 A (1 / n_samples) factor is applied to the MSE.
56 Parameters
57 ----------
58 X : ndarray, shape (n_samples, n_features)
59 Design matrix.
61 y : ndarray, shape (n_samples,)
62 Target / response vector.
64 w : ndarray shape (n_features,)
65 Unmasked, ravelized weights map.
67 compute_energy : bool, default=True
68 If set then energy is computed, otherwise only gradient is computed.
70 compute_grad : bool, default=False
71 If set then gradient is computed, otherwise only energy is computed.
73 Returns
74 -------
75 energy : float
76 Energy (returned if `compute_energy` is set).
78 gradient : ndarray, shape (n_features,)
79 Gradient of energy (returned if `compute_grad` is set).
81 """
82 if not compute_energy and not compute_grad:
83 raise RuntimeError(
84 "At least one of compute_energy or compute_grad must be True."
85 )
87 residual = np.dot(X, w) - y
89 # compute energy
90 if compute_energy:
91 energy = 0.5 * np.dot(residual, residual)
92 if not compute_grad:
93 return energy
95 grad = np.dot(X.T, residual)
97 return (energy, grad) if compute_energy else grad
100def tv_l1_from_gradient(spatial_grad):
101 """Compute energy contribution due to penalized gradient, in TV-L1 model.
103 Parameters
104 ----------
105 spatial_grad : ndarray, shape (4, nx, ny, nx)
106 precomputed "gradient + id" array
108 Returns
109 -------
110 out : float
111 Energy contribution due to penalized gradient.
112 """
113 tv_term = np.sum(
114 np.sqrt(np.sum(spatial_grad[:-1] * spatial_grad[:-1], axis=0))
115 )
116 l1_term = np.abs(spatial_grad[-1]).sum()
117 return l1_term + tv_term
120def divergence_id(grad, l1_ratio=0.5):
121 """Compute divergence + id of image gradient + id.
123 Parameters
124 ----------
125 grad : ndarray, shape (4, nx, ny, nz, ...)
126 where `img_shape` is the shape of the brain bounding box, and
127 n_axes = len(img_shape).
129 l1_ratio : float in the interval [0, 1]; default=0.5
130 Constant that mixes L1 and spatial prior terms in the penalization.
132 Returns
133 -------
134 res : ndarray, shape (nx, ny, nz, ...)
135 The computed divergence + id operator.
137 Raises
138 ------
139 RuntimeError
141 """
142 if not (0.0 <= l1_ratio <= 1.0):
143 raise RuntimeError(
144 f"l1_ratio must be in the interval [0, 1]; got {l1_ratio}"
145 )
147 res = np.zeros(grad.shape[1:])
149 # the divergence part
150 for d in range(grad.shape[0] - 1):
151 this_grad = np.rollaxis(grad[d], d)
152 this_res = np.rollaxis(res, d)
153 this_res[:-1] += this_grad[:-1]
154 this_res[1:-1] -= this_grad[:-2]
155 if len(this_grad) > 1:
156 this_res[-1] -= this_grad[-2]
158 res *= 1.0 - l1_ratio
160 # the identity part
161 res -= l1_ratio * grad[-1]
163 return res
166def gradient_id(img, l1_ratio=0.5):
167 """Compute gradient + id of an image.
169 Parameters
170 ----------
171 img : ndarray, shape (nx, ny, nz, ...)
172 N-dimensional image
174 l1_ratio : float in the interval [0, 1]; default=0.5
175 Constant that mixes L1 and spatial prior terms in the penalization.
177 Returns
178 -------
179 gradient : ndarray, shape (4, nx, ny, nz, ...).
180 Spatial gradient of the image: the i-th component along the first
181 axis is the gradient along the i-th axis of the original array img.
183 Raises
184 ------
185 RuntimeError
187 """
188 if not (0.0 <= l1_ratio <= 1.0):
189 raise RuntimeError(
190 f"l1_ratio must be in the interval [0, 1]; got {l1_ratio}"
191 )
193 shape = [img.ndim + 1, *img.shape]
194 gradient = np.zeros(shape, dtype=np.float64)
196 # the gradient part: 'Clever' code to have a view of the gradient
197 # with dimension i stop at -1
198 slice_all = [0, slice(None, -1)]
199 for d in range(img.ndim):
200 gradient[tuple(slice_all)] = np.diff(img, axis=d)
201 slice_all[0] = d + 1
202 slice_all.insert(1, slice(None))
204 gradient[:-1] *= 1.0 - l1_ratio
206 # the identity part
207 gradient[-1] = l1_ratio * img
209 return gradient
212def _sigmoid(t, copy=True):
213 """Return 1 / (1 + np.exp(-t))."""
214 if copy:
215 t = np.copy(t)
216 t *= -1.0
217 t = np.exp(t, t)
218 t += 1.0
219 t = np.reciprocal(t, t)
220 return t
223def logistic_loss(X, y, w):
224 """Compute the logistic function of the data: sum(sigmoid(yXw)).
226 Parameters
227 ----------
228 X : ndarray, shape (n_samples, n_features)
229 Design matrix.
231 y : ndarray, shape (n_samples,)
232 Target / response vector. Each entry must be +1 or -1.
234 w : ndarray, shape (n_features,)
235 Unmasked, ravelized input map.
237 Returns
238 -------
239 energy : float
240 Energy contribution due to logistic data-fit term.
241 """
242 z = np.dot(X, w[:-1]) + w[-1]
243 yz = y * z
244 idx = yz > 0
245 out = np.empty_like(yz)
246 out[idx] = np.log1p(np.exp(-yz[idx]))
247 out[~idx] = -yz[~idx] + np.log1p(np.exp(yz[~idx]))
248 out = out.sum()
249 return out
252def logistic_loss_grad(X, y, w):
253 """Compute the derivative of logistic."""
254 z = np.dot(X, w[:-1]) + w[-1]
255 yz = y * z
256 z = _sigmoid(yz, copy=False)
257 z0 = (z - 1.0) * y
258 grad = np.empty(w.shape)
259 grad[:-1] = np.dot(X.T, z0)
260 grad[-1] = np.sum(z0)
261 return grad
264# gradient of squared loss function
265squared_loss_grad = partial(
266 squared_loss, compute_energy=False, compute_grad=True
267)
270def gradient(w):
271 """Pure spatial gradient."""
272 return gradient_id(w, l1_ratio=0.0)[:-1] # pure nabla
275def divergence(v):
276 """Pure spatial divergence."""
277 return divergence_id(np.vstack((v, [np.zeros_like(v[0])])), l1_ratio=0.0)