Coverage for nilearn/decoding/tests/test_graph_net.py: 0%
144 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-16 12:32 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-16 12:32 +0200
1# Data used in almost all tests
2import numpy as np
3import pytest
4import scipy as sp
5from nibabel import Nifti1Image
6from numpy.testing import assert_almost_equal
7from scipy import linalg
9from nilearn.decoding._objective_functions import divergence, gradient
10from nilearn.decoding.space_net import BaseSpaceNet
11from nilearn.decoding.space_net_solvers import (
12 _graph_net_adjoint_data_function,
13 _graph_net_data_function,
14 _logistic_data_loss_and_spatial_grad,
15 _logistic_data_loss_and_spatial_grad_derivative,
16 _logistic_derivative_lipschitz_constant,
17 _squared_loss_and_spatial_grad,
18 _squared_loss_and_spatial_grad_derivative,
19 _squared_loss_derivative_lipschitz_constant,
20 mfista,
21)
22from nilearn.decoding.tests._testing import create_graph_net_simulation_data
24from .test_same_api import to_niimgs
27def _make_data(task="regression", size=4):
28 X, y, w, mask = create_graph_net_simulation_data(
29 snr=1.0,
30 n_samples=10,
31 size=size,
32 n_points=5,
33 random_state=42,
34 task=task,
35 )
36 X_, _ = to_niimgs(X, [size] * 3)
37 mask_ = Nifti1Image(mask.astype(float), X_.affine)
38 return X, y, w, mask, mask_, X_
41def get_gradient_matrix(w_size, mask):
42 """Return gradient matrix.
44 Given a number of features and a mask (which has the property
45 mask[mask==True].size == w_size) computes a matrix G such that for
46 a w vector we have np.dot(G, w) == gradient(w_masked)[mask]
47 """
48 grad_matrix = np.zeros((mask.ndim * w_size, w_size))
49 grad_mask = np.array([mask for _ in range(mask.ndim)])
50 image_buffer = np.zeros(mask.shape)
52 for i in range(w_size):
53 base_vector = np.zeros(w_size)
54 base_vector[i] = 1
55 image_buffer[mask] = base_vector
56 gradient_column = gradient(image_buffer)[grad_mask]
57 grad_matrix[:, i] = gradient_column
59 return grad_matrix
62def test_grad_matrix(rng):
63 """Test for matricial form of gradient."""
64 _, _, w, mask, *_ = _make_data()
66 G = get_gradient_matrix(w.size, mask)
68 image_buffer = np.zeros(mask.shape)
69 grad_mask = np.array([mask for _ in range(mask.ndim)])
70 for _ in range(10):
71 v = rng.random(w.size) * rng.integers(1000)
72 image_buffer[mask] = v
73 assert_almost_equal(gradient(image_buffer)[grad_mask], np.dot(G, v))
76def test_adjointness(rng, size=4):
77 """Test for adjointness between gradient and divergence operators."""
78 for _ in range(3):
79 image_1 = rng.random((size, size, size))
80 image_2 = rng.random((3, size, size, size))
81 Axdoty = np.dot((gradient(image_1).ravel()), image_2.ravel())
83 xdotAty = np.dot((divergence(image_2).ravel()), image_1.ravel())
85 assert_almost_equal(Axdoty, -xdotAty)
88def test_identity_adjointness(rng, size=4):
89 """Test adjointess between _graph_net_data_function and \
90 _graph_net_adjoint_data_function, with identity design matrix.
91 """
92 # A mask full of ones
93 mask = np.ones((size, size, size), dtype=bool)
95 # But with some zeros
96 mask[0:3, 0:3, 0:3] = 0
97 adjoint_mask = np.array([mask for _ in range(mask.ndim)])
98 n_samples = np.sum(mask)
99 X = np.eye(n_samples)
100 l1_ratio = 0.5
101 for _ in range(10):
102 x = rng.random(np.sum(mask))
103 y = rng.random(n_samples + np.sum(mask) * mask.ndim)
104 Axdoty = np.dot(_graph_net_data_function(X, x, mask, l1_ratio), y)
105 xdotAty = np.dot(
106 _graph_net_adjoint_data_function(X, y, adjoint_mask, l1_ratio), x
107 )
109 assert_almost_equal(Axdoty, xdotAty)
112def test_operators_adjointness(rng, size=4):
113 """Perform same as test_identity_adjointness with generic design matrix."""
114 # A mask full of ones
115 mask = np.ones((size, size, size), dtype=bool)
117 # But with some zeros
118 mask[0:3, 0:3, 0:3] = 0
119 adjoint_mask = np.array([mask for _ in range(mask.ndim)])
120 n_samples = 200
121 X = rng.random((n_samples, np.sum(mask)))
122 l1_ratio = 0.5
123 for _ in range(10):
124 x = rng.random(np.sum(mask))
125 y = rng.random(n_samples + np.sum(mask) * mask.ndim)
126 Axdoty = np.dot(_graph_net_data_function(X, x, mask, l1_ratio), y)
127 xdotAty = np.dot(
128 _graph_net_adjoint_data_function(X, y, adjoint_mask, l1_ratio), x
129 )
131 assert_almost_equal(Axdoty, xdotAty)
134def test_squared_loss_gradient_at_simple_points():
135 """Test gradient of data loss function in points near to zero.
137 This is a not so hard test, just for detecting big errors.
138 """
139 X, y, w, mask = create_graph_net_simulation_data(n_samples=10, size=4)
140 grad_weight = 1
142 def func(w):
143 return _squared_loss_and_spatial_grad(X, y, w, mask, grad_weight)
145 def func_grad(w):
146 return _squared_loss_and_spatial_grad_derivative(
147 X, y, w, mask, grad_weight
148 )
150 for i in range(0, w.size, 2):
151 point = np.zeros(*w.shape)
152 point[i] = 1
154 assert_almost_equal(
155 sp.optimize.check_grad(func, func_grad, point), 0, decimal=3
156 )
159def test_logistic_gradient_at_simple_points():
160 """Test gradient of logistic data loss function in points near to zero.
162 This is a not so hard test, just for detecting big errors.
163 """
164 X, y, w, mask = create_graph_net_simulation_data(n_samples=10, size=4)
165 grad_weight = 1
166 # Add the intercept
167 w = np.append(w, 0)
169 def func(w):
170 return _logistic_data_loss_and_spatial_grad(X, y, w, mask, grad_weight)
172 def func_grad(w):
173 return _logistic_data_loss_and_spatial_grad_derivative(
174 X, y, w, mask, grad_weight
175 )
177 for i in range(0, w.size, 7):
178 point = np.zeros(*w.shape)
179 point[i] = 1
181 assert_almost_equal(
182 sp.optimize.check_grad(func, func_grad, point), 0, decimal=3
183 )
186def test_squared_loss_derivative_lipschitz_constant(rng):
187 """Test Lipschitz-continuity of the derivative of squared_loss loss \
188 function.
189 """
190 X, y, w, mask, *_ = _make_data()
191 grad_weight = 2.08e-1
193 lipschitz_constant = _squared_loss_derivative_lipschitz_constant(
194 X, mask, grad_weight
195 )
197 for _ in range(20):
198 x_1 = rng.random(w.shape) * rng.integers(1000)
199 x_2 = rng.random(w.shape) * rng.integers(1000)
200 gradient_difference = linalg.norm(
201 _squared_loss_and_spatial_grad_derivative(
202 X, y, x_1, mask, grad_weight
203 )
204 - _squared_loss_and_spatial_grad_derivative(
205 X, y, x_2, mask, grad_weight
206 )
207 )
208 point_difference = linalg.norm(x_1 - x_2)
210 assert gradient_difference <= lipschitz_constant * point_difference
213def test_logistic_derivative_lipschitz_constant(rng):
214 """Test Lipschitz-continuity of the derivative of logistic loss."""
215 X, y, w, mask, *_ = _make_data()
216 grad_weight = 2.08e-1
218 lipschitz_constant = _logistic_derivative_lipschitz_constant(
219 X, mask, grad_weight
220 )
222 for _ in range(20):
223 x_1 = rng.random(w.shape[0] + 1) * rng.integers(1000)
224 x_2 = rng.random(w.shape[0] + 1) * rng.integers(1000)
225 gradient_difference = linalg.norm(
226 _logistic_data_loss_and_spatial_grad_derivative(
227 X, y, x_1, mask, grad_weight
228 )
229 - _logistic_data_loss_and_spatial_grad_derivative(
230 X, y, x_2, mask, grad_weight
231 )
232 )
233 point_difference = linalg.norm(x_1 - x_2)
234 assert gradient_difference <= lipschitz_constant * point_difference
237@pytest.mark.parametrize("l1_ratio", np.linspace(0.1, 1, 3))
238def test_max_alpha_squared_loss(l1_ratio):
239 """Tests that models with L1 regularization over the theoretical bound \
240 are full of zeros, for logistic regression.
241 """
242 X, y, _, _, mask_, X_ = _make_data()
244 reg = BaseSpaceNet(
245 mask=mask_,
246 max_iter=10,
247 penalty="graph-net",
248 is_classif=False,
249 verbose=0,
250 )
252 reg.l1_ratios = l1_ratio
253 reg.alphas = np.max(np.dot(X.T, y)) / l1_ratio
254 reg.fit(X_, y)
255 assert_almost_equal(reg.coef_, 0.0)
258def test_tikhonov_regularization_vs_graph_net():
259 """Test one of the extreme cases of Graph-Net.
261 That is, with l1_ratio = 0 (pure Smooth),
262 we compare Graph-Net's performance
263 with the analytical solution for Tikhonov Regularization.
264 """
265 X, y, w, mask, mask_, X_ = _make_data()
267 # XXX A small dataset here (this test is very lengthy)
268 G = get_gradient_matrix(w.size, mask)
269 optimal_model = np.dot(
270 sp.linalg.pinv(np.dot(X.T, X) + y.size * np.dot(G.T, G)),
271 np.dot(X.T, y),
272 )
273 graph_net = BaseSpaceNet(
274 mask=mask_,
275 alphas=1.0 * X.shape[0],
276 l1_ratios=0.0,
277 max_iter=400,
278 fit_intercept=False,
279 screening_percentile=100.0,
280 standardize=False,
281 verbose=0,
282 )
283 graph_net.fit(X_, y.copy())
285 coef_ = graph_net.coef_[0]
286 graph_net_perf = (
287 0.5 / y.size * linalg.norm(np.dot(X, coef_) - y) ** 2
288 + 0.5 * linalg.norm(np.dot(G, coef_)) ** 2
289 )
290 optimal_model_perf = (
291 0.5 / y.size * linalg.norm(np.dot(X, optimal_model) - y) ** 2
292 + 0.5 * linalg.norm(np.dot(G, optimal_model)) ** 2
293 )
294 assert_almost_equal(graph_net_perf, optimal_model_perf, decimal=1)
297def test_mfista_solver_graph_net_no_l1_term():
298 w = np.zeros(2)
299 X = np.array([[1, 0], [0, 4]])
300 y = np.array([-10, 20])
302 def f1(w):
303 return 0.5 * np.dot(np.dot(X, w) - y, np.dot(X, w) - y)
305 def f1_grad(w):
306 return np.dot(X.T, np.dot(X, w) - y)
308 def f2_prox(w, step_size, *args, **kwargs): # noqa: ARG001
309 return w, {"converged": True}
311 lipschitz_constant = _squared_loss_derivative_lipschitz_constant(
312 X, (np.eye(2) == 1).astype(bool), 1
313 )
314 estimate_solution, _, _ = mfista(
315 f1_grad, f2_prox, f1, lipschitz_constant, w.size, tol=1e-8, verbose=0
316 )
318 solution = np.array([-10, 5])
320 assert_almost_equal(estimate_solution, solution, decimal=4)