Coverage for nilearn/decoding/_objective_functions.py: 15%

80 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-18 13:00 +0200

1"""Common functions and base classes.""" 

2 

3from functools import partial 

4 

5import numpy as np 

6from scipy import linalg 

7 

8 

9def spectral_norm_squared(X): 

10 """Compute square of the operator 2-norm (spectral norm) of X. 

11 

12 This corresponds to the Lipschitz constant of the gradient of the 

13 squared-loss function: 

14 

15 w -> .5 * ||y - Xw||^2 

16 

17 Parameters 

18 ---------- 

19 X : ndarray, shape (n_samples, n_features) 

20 Design matrix. 

21 

22 Returns 

23 ------- 

24 lipschitz_constant : float 

25 The square of the spectral norm of X. 

26 

27 """ 

28 # On big matrices like those that we have in neuroimaging, svdvals 

29 # is faster than a power iteration (even when using arpack's) 

30 return linalg.svdvals(X)[0] ** 2 

31 

32 

33def logistic_loss_lipschitz_constant(X): 

34 """Compute the Lipschitz constant (upper bound) for the gradient of the \ 

35 logistic sum. 

36 

37 .. code-block:: 

38 

39 w -> sum_i log(1+exp(-y_i*(x_i*w + v))) 

40 

41 """ 

42 # N.B: we handle intercept! 

43 X = np.hstack((X, np.ones((X.shape[0], 1)))) 

44 return spectral_norm_squared(X) 

45 

46 

47def squared_loss(X, y, w, compute_energy=True, compute_grad=False): 

48 """Compute the MSE error, and optionally, its gradient too. 

49 

50 The cost / energy function is 

51 

52 MSE = .5 * ||y - Xw||^2 

53 

54 A (1 / n_samples) factor is applied to the MSE. 

55 

56 Parameters 

57 ---------- 

58 X : ndarray, shape (n_samples, n_features) 

59 Design matrix. 

60 

61 y : ndarray, shape (n_samples,) 

62 Target / response vector. 

63 

64 w : ndarray shape (n_features,) 

65 Unmasked, ravelized weights map. 

66 

67 compute_energy : bool, default=True 

68 If set then energy is computed, otherwise only gradient is computed. 

69 

70 compute_grad : bool, default=False 

71 If set then gradient is computed, otherwise only energy is computed. 

72 

73 Returns 

74 ------- 

75 energy : float 

76 Energy (returned if `compute_energy` is set). 

77 

78 gradient : ndarray, shape (n_features,) 

79 Gradient of energy (returned if `compute_grad` is set). 

80 

81 """ 

82 if not compute_energy and not compute_grad: 

83 raise RuntimeError( 

84 "At least one of compute_energy or compute_grad must be True." 

85 ) 

86 

87 residual = np.dot(X, w) - y 

88 

89 # compute energy 

90 if compute_energy: 

91 energy = 0.5 * np.dot(residual, residual) 

92 if not compute_grad: 

93 return energy 

94 

95 grad = np.dot(X.T, residual) 

96 

97 return (energy, grad) if compute_energy else grad 

98 

99 

100def tv_l1_from_gradient(spatial_grad): 

101 """Compute energy contribution due to penalized gradient, in TV-L1 model. 

102 

103 Parameters 

104 ---------- 

105 spatial_grad : ndarray, shape (4, nx, ny, nx) 

106 precomputed "gradient + id" array 

107 

108 Returns 

109 ------- 

110 out : float 

111 Energy contribution due to penalized gradient. 

112 """ 

113 tv_term = np.sum( 

114 np.sqrt(np.sum(spatial_grad[:-1] * spatial_grad[:-1], axis=0)) 

115 ) 

116 l1_term = np.abs(spatial_grad[-1]).sum() 

117 return l1_term + tv_term 

118 

119 

120def divergence_id(grad, l1_ratio=0.5): 

121 """Compute divergence + id of image gradient + id. 

122 

123 Parameters 

124 ---------- 

125 grad : ndarray, shape (4, nx, ny, nz, ...) 

126 where `img_shape` is the shape of the brain bounding box, and 

127 n_axes = len(img_shape). 

128 

129 l1_ratio : float in the interval [0, 1]; default=0.5 

130 Constant that mixes L1 and spatial prior terms in the penalization. 

131 

132 Returns 

133 ------- 

134 res : ndarray, shape (nx, ny, nz, ...) 

135 The computed divergence + id operator. 

136 

137 Raises 

138 ------ 

139 RuntimeError 

140 

141 """ 

142 if not (0.0 <= l1_ratio <= 1.0): 

143 raise RuntimeError( 

144 f"l1_ratio must be in the interval [0, 1]; got {l1_ratio}" 

145 ) 

146 

147 res = np.zeros(grad.shape[1:]) 

148 

149 # the divergence part 

150 for d in range(grad.shape[0] - 1): 

151 this_grad = np.rollaxis(grad[d], d) 

152 this_res = np.rollaxis(res, d) 

153 this_res[:-1] += this_grad[:-1] 

154 this_res[1:-1] -= this_grad[:-2] 

155 if len(this_grad) > 1: 

156 this_res[-1] -= this_grad[-2] 

157 

158 res *= 1.0 - l1_ratio 

159 

160 # the identity part 

161 res -= l1_ratio * grad[-1] 

162 

163 return res 

164 

165 

166def gradient_id(img, l1_ratio=0.5): 

167 """Compute gradient + id of an image. 

168 

169 Parameters 

170 ---------- 

171 img : ndarray, shape (nx, ny, nz, ...) 

172 N-dimensional image 

173 

174 l1_ratio : float in the interval [0, 1]; default=0.5 

175 Constant that mixes L1 and spatial prior terms in the penalization. 

176 

177 Returns 

178 ------- 

179 gradient : ndarray, shape (4, nx, ny, nz, ...). 

180 Spatial gradient of the image: the i-th component along the first 

181 axis is the gradient along the i-th axis of the original array img. 

182 

183 Raises 

184 ------ 

185 RuntimeError 

186 

187 """ 

188 if not (0.0 <= l1_ratio <= 1.0): 

189 raise RuntimeError( 

190 f"l1_ratio must be in the interval [0, 1]; got {l1_ratio}" 

191 ) 

192 

193 shape = [img.ndim + 1, *img.shape] 

194 gradient = np.zeros(shape, dtype=np.float64) 

195 

196 # the gradient part: 'Clever' code to have a view of the gradient 

197 # with dimension i stop at -1 

198 slice_all = [0, slice(None, -1)] 

199 for d in range(img.ndim): 

200 gradient[tuple(slice_all)] = np.diff(img, axis=d) 

201 slice_all[0] = d + 1 

202 slice_all.insert(1, slice(None)) 

203 

204 gradient[:-1] *= 1.0 - l1_ratio 

205 

206 # the identity part 

207 gradient[-1] = l1_ratio * img 

208 

209 return gradient 

210 

211 

212def _sigmoid(t, copy=True): 

213 """Return 1 / (1 + np.exp(-t)).""" 

214 if copy: 

215 t = np.copy(t) 

216 t *= -1.0 

217 t = np.exp(t, t) 

218 t += 1.0 

219 t = np.reciprocal(t, t) 

220 return t 

221 

222 

223def logistic_loss(X, y, w): 

224 """Compute the logistic function of the data: sum(sigmoid(yXw)). 

225 

226 Parameters 

227 ---------- 

228 X : ndarray, shape (n_samples, n_features) 

229 Design matrix. 

230 

231 y : ndarray, shape (n_samples,) 

232 Target / response vector. Each entry must be +1 or -1. 

233 

234 w : ndarray, shape (n_features,) 

235 Unmasked, ravelized input map. 

236 

237 Returns 

238 ------- 

239 energy : float 

240 Energy contribution due to logistic data-fit term. 

241 """ 

242 z = np.dot(X, w[:-1]) + w[-1] 

243 yz = y * z 

244 idx = yz > 0 

245 out = np.empty_like(yz) 

246 out[idx] = np.log1p(np.exp(-yz[idx])) 

247 out[~idx] = -yz[~idx] + np.log1p(np.exp(yz[~idx])) 

248 out = out.sum() 

249 return out 

250 

251 

252def logistic_loss_grad(X, y, w): 

253 """Compute the derivative of logistic.""" 

254 z = np.dot(X, w[:-1]) + w[-1] 

255 yz = y * z 

256 z = _sigmoid(yz, copy=False) 

257 z0 = (z - 1.0) * y 

258 grad = np.empty(w.shape) 

259 grad[:-1] = np.dot(X.T, z0) 

260 grad[-1] = np.sum(z0) 

261 return grad 

262 

263 

264# gradient of squared loss function 

265squared_loss_grad = partial( 

266 squared_loss, compute_energy=False, compute_grad=True 

267) 

268 

269 

270def gradient(w): 

271 """Pure spatial gradient.""" 

272 return gradient_id(w, l1_ratio=0.0)[:-1] # pure nabla 

273 

274 

275def divergence(v): 

276 """Pure spatial divergence.""" 

277 return divergence_id(np.vstack((v, [np.zeros_like(v[0])])), l1_ratio=0.0)