Coverage for nilearn/decoding/tests/test_decoder.py: 0%

652 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-16 12:32 +0200

1"""Test the decoder module. 

2 

3Order of tests from top to bottom: 

4 

5- helper functions 

6- fixtures 

7- classification 

8- regression 

9- multiclass 

10 

11""" 

12 

13# ruff: noqa: ARG001 

14 

15import collections 

16import numbers 

17import warnings 

18 

19import numpy as np 

20import pytest 

21from nibabel import save 

22from numpy.testing import assert_array_almost_equal 

23from sklearn import clone 

24from sklearn.datasets import load_iris, make_classification, make_regression 

25from sklearn.dummy import DummyClassifier, DummyRegressor 

26from sklearn.ensemble import RandomForestClassifier 

27from sklearn.exceptions import ConvergenceWarning 

28from sklearn.linear_model import ( 

29 LassoCV, 

30 LogisticRegressionCV, 

31 RidgeClassifierCV, 

32 RidgeCV, 

33) 

34from sklearn.metrics import ( 

35 accuracy_score, 

36 check_scoring, 

37 get_scorer, 

38 r2_score, 

39 roc_auc_score, 

40) 

41from sklearn.model_selection import ( 

42 KFold, 

43 LeaveOneGroupOut, 

44 ParameterGrid, 

45 StratifiedKFold, 

46) 

47from sklearn.preprocessing import LabelBinarizer, StandardScaler 

48from sklearn.svm import SVR, LinearSVC 

49from sklearn.utils.estimator_checks import parametrize_with_checks 

50 

51from nilearn._utils.estimator_checks import ( 

52 check_estimator, 

53 nilearn_check_estimator, 

54 return_expected_failed_checks, 

55) 

56from nilearn._utils.param_validation import ( 

57 _get_mask_extent, 

58 check_feature_screening, 

59) 

60from nilearn._utils.tags import SKLEARN_LT_1_6 

61from nilearn.conftest import _rng 

62from nilearn.decoding import ( 

63 Decoder, 

64 DecoderRegressor, 

65 FREMClassifier, 

66 FREMRegressor, 

67) 

68from nilearn.decoding.decoder import ( 

69 SUPPORTED_ESTIMATORS, 

70 _BaseDecoder, 

71 _check_estimator, 

72 _check_param_grid, 

73 _parallel_fit, 

74 _wrap_param_grid, 

75) 

76from nilearn.decoding.tests.test_same_api import to_niimgs 

77from nilearn.maskers import NiftiMasker, SurfaceMasker 

78 

79N_SAMPLES = 80 

80 

81ESTIMATOR_REGRESSION = ("ridge", "svr") 

82 

83 

84ESTIMATORS_TO_CHECK = [ 

85 Decoder(), 

86 DecoderRegressor(), 

87 FREMClassifier(), 

88 FREMRegressor(), 

89] 

90 

91 

92if SKLEARN_LT_1_6: 

93 

94 @pytest.mark.parametrize( 

95 "estimator, check, name", 

96 check_estimator(estimators=ESTIMATORS_TO_CHECK), 

97 ) 

98 def test_check_estimator_sklearn_valid(estimator, check, name): 

99 """Check compliance with sklearn estimators.""" 

100 check(estimator) 

101 

102 @pytest.mark.xfail(reason="invalid checks should fail") 

103 @pytest.mark.parametrize( 

104 "estimator, check, name", 

105 check_estimator(estimators=ESTIMATORS_TO_CHECK, valid=False), 

106 ) 

107 def test_check_estimator_sklearn_invalid(estimator, check, name): 

108 """Check compliance with sklearn estimators.""" 

109 check(estimator) 

110else: 

111 

112 @parametrize_with_checks( 

113 estimators=ESTIMATORS_TO_CHECK, 

114 expected_failed_checks=return_expected_failed_checks, 

115 ) 

116 def test_check_estimator_sklearn(estimator, check): 

117 """Check compliance with sklearn estimators.""" 

118 check(estimator) 

119 

120 

121@pytest.mark.parametrize( 

122 "estimator, check, name", 

123 nilearn_check_estimator(estimators=ESTIMATORS_TO_CHECK), 

124) 

125def test_check_estimator_nilearn(estimator, check, name): 

126 """Check compliance with nilearn estimators rules.""" 

127 check(estimator) 

128 

129 

130def _make_binary_classification_test_data(n_samples=N_SAMPLES, dim=5): 

131 X, y = make_classification( 

132 n_samples=n_samples, 

133 n_features=dim**3, 

134 scale=3.0, 

135 n_informative=5, 

136 n_classes=2, 

137 random_state=42, 

138 ) 

139 X, mask = to_niimgs(X, [dim, dim, dim]) 

140 return X, y, mask 

141 

142 

143@pytest.fixture() 

144def rand_x_y(rng): 

145 X = rng.random((100, 10)) 

146 Y = np.hstack([[-1] * 50, [1] * 50]) 

147 return X, Y 

148 

149 

150def _make_multiclass_classification_test_data(n_samples=40, dim=5): 

151 X, y = make_classification( 

152 n_samples=n_samples, 

153 n_features=dim**3, 

154 scale=3.0, 

155 n_informative=5, 

156 n_classes=4, 

157 random_state=42, 

158 ) 

159 X, mask = to_niimgs(X, [dim, dim, dim]) 

160 return X, y, mask 

161 

162 

163@pytest.fixture(scope="session") 

164def tiny_binary_classification_data(): 

165 """Use for testing errors. 

166 

167 This fixture aims to return a very small data set 

168 because it will only be used for the tests 

169 that check error handling like input validation. 

170 """ 

171 return _make_binary_classification_test_data(n_samples=20) 

172 

173 

174@pytest.fixture 

175def binary_classification_data(): 

176 """Use for test where classification is actually performed.""" 

177 return _make_binary_classification_test_data(n_samples=N_SAMPLES) 

178 

179 

180def _make_regression_test_data(n_samples=N_SAMPLES, dim=5): 

181 X, y = make_regression( 

182 n_samples=n_samples, 

183 n_features=dim**3, 

184 n_informative=dim, 

185 noise=1.5, 

186 bias=1.0, 

187 random_state=42, 

188 ) 

189 X = StandardScaler().fit_transform(X) 

190 X, mask = to_niimgs(X, [dim, dim, dim]) 

191 return X, y, mask 

192 

193 

194@pytest.fixture 

195def regression_data(): 

196 return _make_regression_test_data(n_samples=N_SAMPLES, dim=5) 

197 

198 

199@pytest.fixture 

200def multiclass_data(): 

201 return _make_multiclass_classification_test_data(n_samples=N_SAMPLES) 

202 

203 

204@pytest.mark.parametrize( 

205 "regressor, param", 

206 [ 

207 (RidgeCV(), ["alphas"]), 

208 (SVR(kernel="linear"), ["C"]), 

209 (LassoCV(), ["n_alphas"]), 

210 ], 

211) 

212def test_check_param_grid_regression(regressor, param, rng): 

213 """Test several estimators. 

214 

215 Each one with its specific regularization parameter. 

216 """ 

217 X = rng.random((N_SAMPLES, 10)) 

218 Y = rng.random(N_SAMPLES) 

219 

220 param_grid = _check_param_grid(regressor, X, Y, None) 

221 

222 assert list(param_grid.keys()) == list(param) 

223 

224 

225@pytest.mark.parametrize( 

226 "classifier, param", 

227 [ 

228 (LogisticRegressionCV(penalty="l1"), ["Cs"]), 

229 (LogisticRegressionCV(penalty="l2"), ["Cs"]), 

230 (RidgeClassifierCV(), ["alphas"]), 

231 ], 

232) 

233def test_check_param_grid_classification(rand_x_y, classifier, param): 

234 """Test several estimators. 

235 

236 Each one with its specific regularization parameter. 

237 """ 

238 X, Y = rand_x_y 

239 

240 param_grid = _check_param_grid(classifier, X, Y, None) 

241 

242 assert list(param_grid.keys()) == list(param) 

243 

244 

245@pytest.mark.parametrize( 

246 "param_grid_input", 

247 [ 

248 {"C": [1, 10, 100]}, 

249 {"Cs": [1, 10, 100]}, 

250 [{"C": [1, 10, 100]}, {"fit_intercept": [False]}], 

251 ], 

252) 

253def test_check_param_grid_replacement(rand_x_y, param_grid_input): 

254 X, Y = rand_x_y 

255 param_to_replace = "C" 

256 param_replaced = "Cs" 

257 param_grid_output = _check_param_grid( 

258 LogisticRegressionCV(), 

259 X, 

260 Y, 

261 param_grid_input, 

262 ) 

263 for params in ParameterGrid(param_grid_output): 

264 assert param_to_replace not in params 

265 if param_replaced not in params: 

266 assert params in ParameterGrid(param_grid_input) 

267 

268 

269@pytest.mark.parametrize("estimator", ["log_l1", RandomForestClassifier()]) 

270def test_non_supported_estimator_error(rand_x_y, estimator): 

271 """Raise the error when using a non supported estimator.""" 

272 X, Y = rand_x_y 

273 

274 with pytest.raises( 

275 ValueError, match="Invalid estimator. The supported estimators are:" 

276 ): 

277 _check_param_grid(estimator, X, Y, None) 

278 

279 

280def test_check_parameter_grid_is_empty(rand_x_y): 

281 X, Y = rand_x_y 

282 dummy_classifier = DummyClassifier(random_state=0) 

283 

284 param_grid = _check_param_grid(dummy_classifier, X, Y, None) 

285 

286 assert param_grid == {} 

287 

288 

289@pytest.mark.parametrize( 

290 "param_grid", 

291 [ 

292 {"alphas": [1, 10, 100, 1000]}, 

293 {"alphas": [1, 10, 100, 1000], "fit_intercept": [True, False]}, 

294 {"fit_intercept": [True, False]}, 

295 {"alphas": [[1, 10, 100, 1000]]}, 

296 {"alphas": (1, 10, 100, 1000)}, 

297 {"alphas": [(1, 10, 100, 1000)]}, 

298 {"alphas": ((1, 10, 100, 1000),)}, 

299 {"alphas": np.array([1, 10, 100, 1000])}, 

300 {"alphas": [np.array([1, 10, 100, 1000])]}, 

301 [{"alphas": [1, 10]}, {"alphas": [[100, 1000]]}], 

302 [{"alphas": [1, 10]}, {"fit_intercept": [True, False]}], 

303 ], 

304) 

305def test_wrap_param_grid(param_grid): 

306 param_name = "alphas" 

307 original_grid = ParameterGrid(param_grid) 

308 wrapped_grid = ParameterGrid(_wrap_param_grid(param_grid, param_name)) 

309 for grid_row in wrapped_grid: 

310 if param_name in grid_row: 

311 param_value = grid_row[param_name] 

312 assert isinstance(param_value, collections.abc.Iterable) 

313 assert all( 

314 isinstance(item, numbers.Number) for item in param_value 

315 ) 

316 else: 

317 assert grid_row in original_grid 

318 

319 

320@pytest.mark.parametrize( 

321 "param_grid, need_wrap", 

322 [ 

323 ({"alphas": [1, 10, 100, 1000]}, True), 

324 ({"alphas": [[1, 10, 100, 1000]]}, False), 

325 ], 

326) 

327def test_wrap_param_grid_warning(param_grid, need_wrap): 

328 expected_warning_substring = "should be a sequence of iterables" 

329 

330 if need_wrap: 

331 with pytest.warns(UserWarning, match=expected_warning_substring): 

332 _wrap_param_grid(param_grid, param_name="alphas") 

333 

334 else: 

335 with warnings.catch_warnings(record=True) as raised_warnings: 

336 _wrap_param_grid(param_grid, param_name="alphas") 

337 warning_messages = [ 

338 str(warning.message) for warning in raised_warnings 

339 ] 

340 

341 found_warning = any( 

342 expected_warning_substring in x for x in warning_messages 

343 ) 

344 

345 assert not found_warning 

346 

347 

348def test_wrap_param_grid_is_none(): 

349 assert _wrap_param_grid(None, "alphas") is None 

350 

351 

352@pytest.mark.parametrize( 

353 "model", [DecoderRegressor, Decoder, FREMRegressor, FREMClassifier] 

354) 

355def test_check_inputs_length(model): 

356 iris = load_iris() 

357 X, y = iris.data, iris.target 

358 y = 2 * (y > 0) - 1 

359 X_, mask = to_niimgs(X, (2, 2, 2)) 

360 

361 # Remove ten samples from y 

362 y = y[:-10] 

363 

364 with pytest.raises(ValueError, match="inconsistent numbers of samples"): 

365 model(mask=mask, screening_percentile=100.0).fit(X_, y) 

366 

367 

368@pytest.mark.parametrize( 

369 "estimator", 

370 [ 

371 "svc", 

372 "svc_l2", 

373 "svc_l1", 

374 "logistic", 

375 "logistic_l1", 

376 "logistic_l2", 

377 "ridge", 

378 "ridge_classifier", 

379 "ridge_regressor", 

380 "svr", 

381 "dummy_classifier", 

382 "dummy_regressor", 

383 ], 

384) 

385def test_check_supported_estimator(estimator): 

386 """Check if the estimator is one of the supported estimators.""" 

387 expected_warning = ( 

388 "Use a custom estimator at your own risk " 

389 "of the process not working as intended." 

390 ) 

391 

392 with warnings.catch_warnings(record=True) as raised_warnings: 

393 _check_estimator(_BaseDecoder(estimator=estimator).estimator) 

394 warning_messages = [str(warning.message) for warning in raised_warnings] 

395 

396 assert expected_warning not in warning_messages 

397 

398 

399@pytest.mark.parametrize("estimator", ["ridgo", "svb"]) 

400def test_check_unsupported_estimator(estimator): 

401 """Check if the estimator is one of the supported estimators. 

402 

403 If not, if it is a string and if not in supported ones, 

404 then raise the error. 

405 """ 

406 with pytest.raises(ValueError, match="Invalid estimator"): 

407 _check_estimator(_BaseDecoder(estimator=estimator).estimator) 

408 

409 expected_warning = ( 

410 "Use a custom estimator at your own risk " 

411 "of the process not working as intended." 

412 ) 

413 custom_estimator = RandomForestClassifier() 

414 with pytest.warns(UserWarning, match=expected_warning): 

415 _check_estimator(_BaseDecoder(estimator=custom_estimator).estimator) 

416 

417 

418def test_parallel_fit(rand_x_y): 

419 """Check that results of _parallel_fit is the same \ 

420 for different controlled param_grid. 

421 """ 

422 X, y = make_regression( 

423 n_samples=100, 

424 n_features=20, 

425 n_informative=5, 

426 noise=0.2, 

427 random_state=42, 

428 ) 

429 train = range(80) 

430 

431 _, y_classification = rand_x_y 

432 test = range(80, len(y_classification)) 

433 

434 estimator = SVR(kernel="linear") 

435 

436 # define a scorer 

437 scorer = check_scoring(estimator, "r2") 

438 

439 # Define a screening selector 

440 selector = check_feature_screening( 

441 screening_percentile=None, mask_img=None, is_classification=False 

442 ) 

443 

444 outputs = [] 

445 for params in [[1e-1, 1e0, 1e1], [1e-1, 1e0, 5e0, 1e1]]: 

446 param_grid = {"C": np.array(params)} 

447 outputs.append( 

448 list( 

449 _parallel_fit( 

450 estimator=SVR(kernel="linear"), 

451 X=X, 

452 y=y, 

453 train=train, 

454 test=test, 

455 param_grid=param_grid, 

456 scorer=scorer, 

457 mask_img=None, 

458 class_index=1, 

459 selector=selector, 

460 clustering_percentile=100, 

461 ) 

462 ) 

463 ) 

464 

465 # check that every element of the output tuple is the same for both tries 

466 for a, b in zip(outputs[0], outputs[1]): 

467 if isinstance(a, np.ndarray): 

468 assert_array_almost_equal(a, b) 

469 else: 

470 assert a == b 

471 

472 

473@pytest.mark.parametrize( 

474 "param_values", 

475 ( 

476 [0.001, 0.01, 0.1, 1, 10, 100, 1000], 

477 [[0.001, 0.01, 0.1, 1, 10, 100, 1000]], 

478 ), 

479) 

480@pytest.mark.parametrize( 

481 "estimator, param_name, fitted_param_name, is_classification", 

482 [ 

483 (RidgeCV(), "alphas", "best_alpha", False), 

484 (RidgeClassifierCV(), "alphas", "best_alpha", True), 

485 (LogisticRegressionCV(), "Cs", "best_C", True), 

486 (LassoCV(), "alphas", "best_alpha", False), 

487 ], 

488) 

489def test_parallel_fit_builtin_cv( 

490 rand_x_y, 

491 estimator, 

492 param_name, 

493 fitted_param_name, 

494 is_classification, 

495 param_values, 

496): 

497 """Check that the `fitted_param_name` output of _parallel_fit is \ 

498 a single value even if param_grid is wrapped in a list \ 

499 for models with built-in CV. 

500 """ 

501 # y will be replaced if this is a classification 

502 X, y = make_regression( 

503 n_samples=N_SAMPLES, 

504 n_features=20, 

505 n_informative=5, 

506 noise=0.2, 

507 random_state=42, 

508 ) 

509 

510 # train/test indices 

511 n_samples_train = int(0.8 * N_SAMPLES) 

512 train = range(n_samples_train) 

513 test = range(n_samples_train, N_SAMPLES) 

514 

515 # define a screening selector 

516 selector = check_feature_screening( 

517 screening_percentile=None, mask_img=None, is_classification=False 

518 ) 

519 

520 # create appropriate scorer and update y for classification 

521 if is_classification: 

522 scorer = check_scoring(estimator, "accuracy") 

523 _, y = rand_x_y 

524 else: 

525 scorer = check_scoring(estimator, "r2") 

526 

527 param_grid = {param_name: param_values} 

528 _, _, _, best_param, _, _ = _parallel_fit( 

529 estimator=estimator, 

530 X=X, 

531 y=y, 

532 train=train, 

533 test=test, 

534 param_grid=param_grid, 

535 scorer=scorer, 

536 mask_img=None, 

537 class_index=1, 

538 selector=selector, 

539 clustering_percentile=100, 

540 ) 

541 

542 assert isinstance(best_param[fitted_param_name], numbers.Number) 

543 

544 

545def test_decoder_param_grid_sequence(binary_classification_data): 

546 X, y, _ = binary_classification_data 

547 n_cv_folds = 10 

548 param_grid = [ 

549 { 

550 "penalty": ["l2"], 

551 "C": [100, 1000], 

552 "random_state": [42], # fix the seed for consistent behavior 

553 }, 

554 { 

555 "penalty": ["l1"], 

556 "dual": [False], # "dual" is not in the first dict 

557 "C": [100, 10], 

558 "random_state": [42], # fix the seed for consistent behavior 

559 }, 

560 ] 

561 

562 model = Decoder(param_grid=param_grid, cv=n_cv_folds) 

563 model.fit(X, y) 

564 

565 for best_params in model.cv_params_.values(): 

566 for param_list in best_params.values(): 

567 assert len(param_list) == n_cv_folds 

568 

569 

570def test_decoder_binary_classification_with_masker_object( 

571 binary_classification_data, 

572): 

573 X, y, _ = binary_classification_data 

574 

575 model = Decoder(mask=NiftiMasker()) 

576 model.fit(X, y) 

577 y_pred = model.predict(X) 

578 

579 assert model.scoring == "roc_auc" 

580 assert model.score(X, y) == 1.0 

581 assert accuracy_score(y, y_pred) > 0.95 

582 

583 

584def test_decoder_binary_classification_with_logistic_model( 

585 binary_classification_data, 

586): 

587 """Check decoder with predict_proba for scoring with logistic model.""" 

588 X, y, mask = binary_classification_data 

589 

590 model = Decoder(estimator="logistic_l2", mask=mask) 

591 model.fit(X, y) 

592 y_pred = model.predict(X) 

593 

594 assert accuracy_score(y, y_pred) > 0.95 

595 

596 

597@pytest.mark.parametrize("screening_percentile", [100, 20, None]) 

598def test_decoder_binary_classification_screening( 

599 binary_classification_data, screening_percentile 

600): 

601 X, y, mask = binary_classification_data 

602 

603 model = Decoder(mask=mask, screening_percentile=screening_percentile) 

604 model.fit(X, y) 

605 y_pred = model.predict(X) 

606 

607 assert accuracy_score(y, y_pred) > 0.95 

608 

609 

610@pytest.mark.parametrize("clustering_percentile", [100, 99]) 

611def test_decoder_binary_classification_clustering( 

612 binary_classification_data, clustering_percentile 

613): 

614 X, y, mask = binary_classification_data 

615 

616 model = FREMClassifier( 

617 estimator="logistic_l2", 

618 mask=mask, 

619 clustering_percentile=clustering_percentile, 

620 screening_percentile=90, 

621 cv=5, 

622 ) 

623 model.fit(X, y) 

624 y_pred = model.predict(X) 

625 

626 assert accuracy_score(y, y_pred) > 0.9 

627 

628 

629@pytest.mark.parametrize("cv", [KFold(n_splits=5), LeaveOneGroupOut()]) 

630def test_decoder_binary_classification_cross_validation( 

631 binary_classification_data, cv, rng 

632): 

633 X, y, mask = binary_classification_data 

634 

635 # check cross-validation scheme and fit attribute with groups enabled 

636 model = Decoder( 

637 estimator="svc", mask=mask, standardize="zscore_sample", cv=cv 

638 ) 

639 groups = None 

640 if isinstance(cv, LeaveOneGroupOut): 

641 groups = rng.binomial(2, 0.3, size=len(y)) 

642 model.fit(X, y, groups=groups) 

643 y_pred = model.predict(X) 

644 

645 assert accuracy_score(y, y_pred) > 0.9 

646 

647 

648def test_decoder_dummy_classifier(binary_classification_data): 

649 n_samples = N_SAMPLES 

650 X, y, mask = binary_classification_data 

651 

652 # We make 80% of y to have value of 1.0 to check whether the stratified 

653 # strategy returns a proportion prediction value of 1.0 of roughly 80% 

654 proportion = 0.8 

655 y = np.zeros(n_samples) 

656 y[: int(proportion * n_samples)] = 1.0 

657 

658 model = Decoder(estimator="dummy_classifier", mask=mask) 

659 model.fit(X, y) 

660 y_pred = model.predict(X) 

661 

662 assert np.sum(y_pred == 1.0) / n_samples - proportion < 0.05 

663 

664 

665def test_decoder_dummy_classifier_with_callable(binary_classification_data): 

666 X, y, mask = binary_classification_data 

667 

668 accuracy_scorer = get_scorer("accuracy") 

669 model = Decoder( 

670 estimator="dummy_classifier", mask=mask, scoring=accuracy_scorer 

671 ) 

672 model.fit(X, y) 

673 y_pred = model.predict(X) 

674 

675 assert model.scoring == accuracy_scorer 

676 assert model.score(X, y) == accuracy_score(y, y_pred) 

677 

678 

679def test_decoder_dummy_classifier_strategy_prior(): 

680 X, y, mask = _make_binary_classification_test_data(n_samples=300) 

681 

682 param = {"strategy": "prior"} 

683 dummy_classifier = DummyClassifier(random_state=0) 

684 dummy_classifier.set_params(**param) 

685 model = Decoder(estimator=dummy_classifier, mask=mask) 

686 model.fit(X, y) 

687 y_pred = model.predict(X) 

688 

689 assert np.all(y_pred) == 1.0 

690 assert roc_auc_score(y, y_pred) == 0.5 

691 

692 

693def test_decoder_dummy_classifier_strategy_most_frequent(): 

694 X, y, mask = _make_binary_classification_test_data(n_samples=300) 

695 

696 param = {"strategy": "most_frequent"} 

697 dummy_classifier = DummyClassifier(random_state=0) 

698 dummy_classifier.set_params(**param) 

699 

700 model = Decoder(estimator=dummy_classifier, mask=mask) 

701 model.fit(X, y) 

702 y_pred = model.predict(X) 

703 

704 assert np.all(y_pred) == 1.0 

705 

706 # Returns model coefficients for dummy estimators as None 

707 assert model.coef_ is None 

708 # Dummy output are nothing but the attributes of the dummy estimators 

709 assert model.dummy_output_ is not None 

710 assert model.cv_scores_ is not None 

711 

712 

713def test_decoder_dummy_classifier_roc_scoring(binary_classification_data): 

714 X, y, mask = binary_classification_data 

715 

716 model = Decoder(estimator="dummy_classifier", mask=mask, scoring="roc_auc") 

717 model.fit(X, y) 

718 

719 assert np.mean(model.cv_scores_[0]) >= 0.45 

720 

721 

722def test_decoder_error_not_implemented(tiny_binary_classification_data): 

723 X, y, mask = tiny_binary_classification_data 

724 

725 param = {"strategy": "constant"} 

726 dummy_classifier = DummyClassifier(random_state=0) 

727 dummy_classifier.set_params(**param) 

728 

729 model = Decoder(estimator=dummy_classifier, mask=mask) 

730 

731 with pytest.raises(NotImplementedError): 

732 model.fit(X, y) 

733 

734 

735def test_decoder_error_unknown_scoring_metrics( 

736 tiny_binary_classification_data, 

737): 

738 X, y, mask = tiny_binary_classification_data 

739 

740 dummy_classifier = DummyClassifier(random_state=0) 

741 

742 model = Decoder(estimator=dummy_classifier, mask=mask, scoring="foo") 

743 

744 with pytest.raises( 

745 ValueError, 

746 match="The 'scoring' parameter of check_scoring must be a str among", 

747 ): 

748 model.fit(X, y) 

749 

750 

751def test_decoder_dummy_classifier_default_scoring(): 

752 X, y, _ = _make_binary_classification_test_data() 

753 

754 model = Decoder(estimator="dummy_classifier", scoring=None) 

755 

756 assert model.scoring is None 

757 

758 model.fit(X, y) 

759 

760 assert model.scorer_._score_func == get_scorer("accuracy")._score_func 

761 assert model.scorer_._sign == get_scorer("accuracy")._sign 

762 assert model.score(X, y) > 0.5 

763 

764 

765def test_decoder_classification_string_label(): 

766 iris = load_iris() 

767 X, y = iris.data, iris.target 

768 X, mask = to_niimgs(X, [2, 2, 2]) 

769 labels = ["red", "blue", "green"] 

770 y_str = [labels[y[i]] for i in range(len(y))] 

771 

772 model = Decoder(mask=mask) 

773 model.fit(X, y_str) 

774 y_pred = model.predict(X) 

775 

776 assert accuracy_score(y_str, y_pred) > 0.95 

777 

778 

779@pytest.mark.parametrize("screening_percentile", [100, 20, 1, None]) 

780@pytest.mark.parametrize("estimator", ESTIMATOR_REGRESSION) 

781def test_decoder_regression_screening( 

782 regression_data, screening_percentile, estimator 

783): 

784 X, y, mask = regression_data 

785 

786 model = DecoderRegressor( 

787 estimator=estimator, 

788 mask=mask, 

789 screening_percentile=screening_percentile, 

790 ) 

791 model.fit(X, y) 

792 y_pred = model.predict(X) 

793 

794 assert r2_score(y, y_pred) > 0.95 

795 

796 

797@pytest.mark.parametrize("clustering_percentile", [100, 99]) 

798@pytest.mark.parametrize("estimator", ESTIMATOR_REGRESSION) 

799def test_decoder_regression_clustering( 

800 regression_data, clustering_percentile, estimator 

801): 

802 X, y, mask = regression_data 

803 

804 model = FREMRegressor( 

805 estimator=estimator, 

806 mask=mask, 

807 clustering_percentile=clustering_percentile, 

808 screening_percentile=90, 

809 cv=10, 

810 ) 

811 model.fit(X, y) 

812 y_pred = model.predict(X) 

813 

814 assert model.scoring == "r2" 

815 assert r2_score(y, y_pred) > 0.95 

816 assert model.score(X, y) == r2_score(y, y_pred) 

817 

818 

819def test_decoder_dummy_regression(regression_data): 

820 X, y, mask = regression_data 

821 

822 # Regression with dummy estimator 

823 model = DecoderRegressor( 

824 estimator="dummy_regressor", 

825 mask=mask, 

826 scoring="r2", 

827 screening_percentile=1, 

828 ) 

829 model.fit(X, y) 

830 y_pred = model.predict(X) 

831 

832 assert model.scoring == "r2" 

833 assert r2_score(y, y_pred) <= 0.0 

834 assert model.score(X, y) == r2_score(y, y_pred) 

835 

836 

837def test_decoder_dummy_regression_default_scoring_metric_is_r2( 

838 regression_data, 

839): 

840 """Check that default scoring metric for regression is r2.""" 

841 X, y, mask = regression_data 

842 

843 model = DecoderRegressor( 

844 estimator="dummy_regressor", mask=mask, scoring=None 

845 ) 

846 model.fit(X, y) 

847 y_pred = model.predict(X) 

848 

849 assert model.score(X, y) == r2_score(y, y_pred) 

850 

851 

852def test_decoder_dummy_regression_other_strategy(regression_data): 

853 """Chexk that decoder object use other strategy for dummy regressor.""" 

854 X, y, mask = regression_data 

855 

856 dummy_regressor = DummyRegressor() 

857 param = {"strategy": "median"} 

858 dummy_regressor.set_params(**param) 

859 

860 model = DecoderRegressor(estimator=dummy_regressor, mask=mask) 

861 model.fit(X, y) 

862 y_pred = model.predict(X) 

863 

864 assert r2_score(y, y_pred) <= 0.0 

865 # Returns model coefficients for dummy estimators as None 

866 assert model.coef_ is None 

867 # Dummy output are nothing but the attributes of the dummy estimators 

868 assert model.dummy_output_ is not None 

869 assert model.cv_scores_ is not None 

870 

871 

872def test_decoder_multiclass_classification_masker(multiclass_data): 

873 X, y, _ = multiclass_data 

874 

875 model = Decoder(mask=NiftiMasker()) 

876 model.fit(X, y) 

877 y_pred = model.predict(X) 

878 

879 assert accuracy_score(y, y_pred) > 0.95 

880 

881 

882def test_decoder_multiclass_classification_masker_dummy_classifier( 

883 multiclass_data, 

884): 

885 X, y, _ = multiclass_data 

886 

887 model = Decoder( 

888 estimator="dummy_classifier", mask=NiftiMasker(), scoring="accuracy" 

889 ) 

890 model.fit(X, y) 

891 y_pred = model.predict(X) 

892 

893 assert model.scoring == "accuracy" 

894 # 4-class classification 

895 assert accuracy_score(y, y_pred) > 0.2 

896 assert model.score(X, y) == accuracy_score(y, y_pred) 

897 

898 

899@pytest.mark.parametrize("screening_percentile", [100, 20, None]) 

900def test_decoder_multiclass_classification_screening( 

901 multiclass_data, screening_percentile 

902): 

903 X, y, mask = multiclass_data 

904 

905 model = Decoder(mask=mask, screening_percentile=screening_percentile) 

906 model.fit(X, y) 

907 y_pred = model.predict(X) 

908 

909 assert accuracy_score(y, y_pred) > 0.95 

910 

911 

912@pytest.mark.parametrize("clustering_percentile", [100, 99]) 

913@pytest.mark.parametrize("estimator", ["svc_l2", "svc_l1"]) 

914def test_decoder_multiclass_classification_clustering( 

915 multiclass_data, clustering_percentile, estimator 

916): 

917 X, y, mask = multiclass_data 

918 

919 model = FREMClassifier( 

920 estimator=estimator, 

921 mask=mask, 

922 clustering_percentile=clustering_percentile, 

923 screening_percentile=90, 

924 cv=5, 

925 ) 

926 model.fit(X, y) 

927 y_pred = model.predict(X) 

928 

929 assert model.scoring == "roc_auc" 

930 assert accuracy_score(y, y_pred) > 0.9 

931 

932 

933@pytest.mark.parametrize("cv", [KFold(n_splits=5), LeaveOneGroupOut()]) 

934def test_decoder_multiclass_classification_cross_validation( 

935 multiclass_data, cv 

936): 

937 X, y, mask = multiclass_data 

938 

939 # check cross-validation scheme and fit attribute with groups enabled 

940 model = Decoder( 

941 estimator="svc", mask=mask, standardize="zscore_sample", cv=cv 

942 ) 

943 groups = None 

944 if isinstance(cv, LeaveOneGroupOut): 

945 groups = _rng(0).binomial(2, 0.3, size=len(y)) 

946 model.fit(X, y, groups=groups) 

947 y_pred = model.predict(X) 

948 

949 assert accuracy_score(y, y_pred) > 0.9 

950 

951 

952def test_decoder_multiclass_classification_apply_mask_shape(): 

953 """Test whether if _apply mask output has the same shape \ 

954 as original matrix. 

955 """ 

956 dim = 5 

957 X_init, _ = make_classification( 

958 n_samples=200, 

959 n_features=dim**3, 

960 scale=3.0, 

961 n_informative=5, 

962 n_classes=4, 

963 random_state=42, 

964 ) 

965 X, _ = to_niimgs(X_init, [dim, dim, dim]) 

966 

967 model = Decoder(mask=NiftiMasker()) 

968 

969 X_masked = model._apply_mask(X) 

970 

971 assert X_masked.shape == X_init.shape 

972 

973 

974def test_decoder_multiclass_classification_apply_mask_attributes(affine_eye): 

975 """Test whether model.masker_ have some desire attributes \ 

976 manually set after calling _apply_mask. 

977 

978 By default these parameters are set to None; 

979 """ 

980 X, _, _ = _make_multiclass_classification_test_data() 

981 

982 target_affine = 2 * affine_eye 

983 target_shape = (1, 1, 1) 

984 t_r = 1 

985 high_pass = 1 

986 low_pass = 2 

987 smoothing_fwhm = 0.5 

988 

989 model = Decoder( 

990 target_affine=target_affine, 

991 target_shape=target_shape, 

992 t_r=t_r, 

993 high_pass=high_pass, 

994 low_pass=low_pass, 

995 smoothing_fwhm=smoothing_fwhm, 

996 ) 

997 

998 model._apply_mask(X) 

999 

1000 assert np.any(model.masker_.target_affine == target_affine) 

1001 assert model.masker_.target_shape == target_shape 

1002 assert model.masker_.t_r == t_r 

1003 assert model.masker_.high_pass == high_pass 

1004 assert model.masker_.low_pass == low_pass 

1005 assert model.masker_.smoothing_fwhm == smoothing_fwhm 

1006 

1007 

1008def test_decoder_multiclass_error_incorrect_cv(multiclass_data): 

1009 """Check whether ValueError is raised when cv is not set correctly.""" 

1010 X, y, _ = multiclass_data 

1011 

1012 for cv in ["abc", LinearSVC(dual=True)]: 

1013 model = Decoder(mask=NiftiMasker(), cv=cv) 

1014 with pytest.raises(ValueError, match="Expected cv as an integer"): 

1015 model.fit(X, y) 

1016 

1017 

1018def test_decoder_multiclass_warnings(multiclass_data): 

1019 X, y, _ = multiclass_data 

1020 groups = _rng(0).binomial(2, 0.3, size=len(y)) 

1021 

1022 # Check whether decoder raised warning when groups is set to specific 

1023 # value but CV Splitter is not set 

1024 expected_warning = ( 

1025 "groups parameter is specified but " 

1026 "cv parameter is not set to custom CV splitter. " 

1027 "Using default object LeaveOneGroupOut()." 

1028 ) 

1029 with pytest.warns(UserWarning, match=expected_warning): 

1030 model = Decoder(mask=NiftiMasker()) 

1031 model.fit(X, y, groups=groups) 

1032 

1033 # Check that warning is raised when n_features is lower than 50 after 

1034 # screening and clustering for FREM 

1035 with pytest.warns(UserWarning, match=".*screening_percentile parameters"): 

1036 model = FREMClassifier( 

1037 clustering_percentile=10, 

1038 screening_percentile=10, 

1039 mask=NiftiMasker(), 

1040 cv=1, 

1041 ) 

1042 model.fit(X, y) 

1043 

1044 

1045def test_decoder_tags_classification(): 

1046 """Check value returned by _more_tags.""" 

1047 model = Decoder() 

1048 # TODO 

1049 # remove if block when bumping sklearn_version to > 1.5 

1050 if SKLEARN_LT_1_6: 

1051 assert model.__sklearn_tags__()["require_y"] is True 

1052 else: 

1053 assert model.__sklearn_tags__().target_tags.required is True 

1054 

1055 

1056def test_decoder_tags_regression(): 

1057 """Check value returned by _more_tags.""" 

1058 model = DecoderRegressor() 

1059 # remove if block when bumping sklearn_version to > 1.5 

1060 if SKLEARN_LT_1_6: 

1061 assert model.__sklearn_tags__()["multioutput"] is True 

1062 else: 

1063 assert model.__sklearn_tags__().target_tags.multi_output is True 

1064 

1065 

1066def test_decoder_decision_function(binary_classification_data): 

1067 """Test decision_function with ndarray. Test for backward compatibility.""" 

1068 X, y, mask = binary_classification_data 

1069 

1070 model = Decoder(mask=mask) 

1071 model.fit(X, y) 

1072 X = model.masker_.transform(X) 

1073 assert X.shape[1] == model.coef_.shape[1] 

1074 model.decision_function(X) 

1075 

1076 

1077@pytest.mark.timeout(0) 

1078def test_decoder_strings_filepaths_input( 

1079 tiny_binary_classification_data, tmp_path 

1080): 

1081 """Smoke test for decoder methods to accept list of paths as input. 

1082 

1083 See https://github.com/nilearn/nilearn/issues/4226 

1084 """ 

1085 X, y, _ = tiny_binary_classification_data 

1086 X_paths = [tmp_path / f"niimg{i}.nii" for i in range(X.shape[-1])] 

1087 for i, nii_path in enumerate(X_paths): 

1088 save(X.slicer[..., i], nii_path) 

1089 

1090 model = Decoder(mask=NiftiMasker()) 

1091 model.fit(X_paths, y) 

1092 model.predict(X_paths) 

1093 model.score(X_paths, y) 

1094 

1095 

1096def test_decoder_decision_function_raises_value_error( 

1097 binary_classification_data, 

1098): 

1099 """Test decision_function raises value error.""" 

1100 X, y, _ = binary_classification_data 

1101 

1102 model = Decoder(mask=NiftiMasker()) 

1103 model.fit(X, y) 

1104 X = model.masker_.transform(X) 

1105 X = np.delete(X, 0, axis=1) 

1106 

1107 with pytest.raises( 

1108 ValueError, match=f"X has {X.shape[1]} features per sample" 

1109 ): 

1110 model.decision_function(X) 

1111 

1112 

1113# ------------------------ surface tests ------------------------------------ # 

1114 

1115 

1116@pytest.fixture() 

1117def _make_surface_class_data(rng, surf_img_2d, n_samples=50): 

1118 """Create a surface image classification for testing.""" 

1119 y = rng.choice([0, 1], size=n_samples) 

1120 return surf_img_2d(n_samples), y 

1121 

1122 

1123@pytest.fixture() 

1124def _make_surface_reg_data(rng, surf_img_2d, n_samples=50): 

1125 """Create a surface image regression for testing.""" 

1126 y = rng.random(n_samples) 

1127 return surf_img_2d(n_samples), y 

1128 

1129 

1130@pytest.mark.filterwarnings("ignore:Overriding provided") 

1131def test_decoder_apply_mask_surface(_make_surface_class_data): 

1132 """Test _apply_mask on surface image.""" 

1133 X, _ = _make_surface_class_data 

1134 model = Decoder(mask=SurfaceMasker()) 

1135 X_masked = model._apply_mask(X) 

1136 

1137 assert X_masked.shape == X.shape[::-1] 

1138 assert type(model.mask_img_).__name__ == "SurfaceImage" 

1139 

1140 

1141@pytest.mark.filterwarnings("ignore:Overriding provided") 

1142@pytest.mark.filterwarnings("ignore:After clustering") 

1143def test_decoder_screening_percentile_surface_default( 

1144 _make_surface_class_data, 

1145): 

1146 """Test default screening percentile with surface image.""" 

1147 warnings.simplefilter("ignore", ConvergenceWarning) 

1148 X, y = _make_surface_class_data 

1149 

1150 model = Decoder(mask=SurfaceMasker()) 

1151 model.fit(X, y) 

1152 assert model.screening_percentile_ == 20 

1153 

1154 

1155@pytest.mark.filterwarnings("ignore:Overriding provided") 

1156@pytest.mark.filterwarnings("ignore:After clustering") 

1157@pytest.mark.parametrize("perc", [None, 100, 0]) 

1158def test_decoder_screening_percentile_surface(perc, _make_surface_class_data): 

1159 """Test passing screening percentile with surface image.""" 

1160 warnings.simplefilter("ignore", ConvergenceWarning) 

1161 X, y = _make_surface_class_data 

1162 

1163 model = Decoder(mask=SurfaceMasker(), screening_percentile=perc) 

1164 model.fit(X, y) 

1165 if perc is None: 

1166 assert model.screening_percentile_ == 100 

1167 else: 

1168 assert model.screening_percentile_ == perc 

1169 

1170 

1171@pytest.mark.parametrize("surf_mask_dim", [1, 2]) 

1172@pytest.mark.filterwarnings("ignore:After clustering and screening") 

1173def test_decoder_adjust_screening_lessthan_mask_surface( 

1174 surf_mask_dim, 

1175 surf_mask_1d, 

1176 surf_mask_2d, 

1177 _make_surface_class_data, 

1178 screening_percentile=30, 

1179): 

1180 """When mask size is less than or equal to screening percentile wrt to 

1181 the mesh size, it is adjusted to the ratio of mesh to mask. 

1182 """ 

1183 img, y = _make_surface_class_data 

1184 surf_mask = surf_mask_1d if surf_mask_dim == 1 else surf_mask_2d() 

1185 mask_n_vertices = _get_mask_extent(surf_mask) 

1186 mesh_n_vertices = img.mesh.n_vertices 

1187 mask_to_mesh_ratio = (mask_n_vertices / mesh_n_vertices) * 100 

1188 assert screening_percentile <= mask_to_mesh_ratio 

1189 decoder = Decoder( 

1190 mask=surf_mask, 

1191 param_grid={"C": [0.01, 0.1]}, 

1192 cv=3, 

1193 screening_percentile=screening_percentile, 

1194 ) 

1195 decoder.fit(img, y) 

1196 adjusted = decoder.screening_percentile_ 

1197 assert adjusted == screening_percentile * ( 

1198 mesh_n_vertices / mask_n_vertices 

1199 ) 

1200 

1201 

1202@pytest.mark.parametrize("surf_mask_dim", [1, 2]) 

1203@pytest.mark.filterwarnings("ignore:After clustering and screening") 

1204def test_decoder_adjust_screening_greaterthan_mask_surface( 

1205 surf_mask_dim, 

1206 surf_mask_1d, 

1207 surf_mask_2d, 

1208 _make_surface_class_data, 

1209 screening_percentile=80, 

1210): 

1211 """When mask size is greater than screening percentile wrt to the mesh 

1212 size, it is changed to 100% of mask. 

1213 """ 

1214 img, y = _make_surface_class_data 

1215 surf_mask = surf_mask_1d if surf_mask_dim == 1 else surf_mask_2d() 

1216 mask_n_vertices = _get_mask_extent(surf_mask) 

1217 mesh_n_vertices = img.mesh.n_vertices 

1218 mask_to_mesh_ratio = (mask_n_vertices / mesh_n_vertices) * 100 

1219 assert screening_percentile > mask_to_mesh_ratio 

1220 decoder = Decoder( 

1221 mask=surf_mask_1d, 

1222 param_grid={"C": [0.01, 0.1]}, 

1223 cv=3, 

1224 screening_percentile=screening_percentile, 

1225 ) 

1226 decoder.fit(img, y) 

1227 adjusted = decoder.screening_percentile_ 

1228 assert adjusted == 100 

1229 

1230 

1231@pytest.mark.parametrize("mask", [None, SurfaceMasker()]) 

1232@pytest.mark.parametrize("decoder", [_BaseDecoder, Decoder, DecoderRegressor]) 

1233def test_decoder_fit_surface(decoder, _make_surface_class_data, mask): 

1234 """Test fit for surface image.""" 

1235 warnings.simplefilter("ignore", ConvergenceWarning) 

1236 X, y = _make_surface_class_data 

1237 model = decoder(mask=mask) 

1238 model.fit(X, y) 

1239 

1240 assert model.coef_ is not None 

1241 

1242 

1243@pytest.mark.filterwarnings("ignore:After clustering and screening") 

1244@pytest.mark.parametrize("surf_mask_dim", [1, 2]) 

1245@pytest.mark.parametrize("decoder", [_BaseDecoder, Decoder, DecoderRegressor]) 

1246def test_decoder_fit_surface_with_mask_image( 

1247 _make_surface_class_data, 

1248 decoder, 

1249 surf_mask_dim, 

1250 surf_mask_1d, 

1251 surf_mask_2d, 

1252): 

1253 """Test fit for surface image.""" 

1254 warnings.simplefilter("ignore", ConvergenceWarning) 

1255 X, y = _make_surface_class_data 

1256 surf_mask = surf_mask_1d if surf_mask_dim == 1 else surf_mask_2d() 

1257 model = decoder(mask=surf_mask) 

1258 model.fit(X, y) 

1259 

1260 assert model.coef_ is not None 

1261 

1262 

1263@pytest.mark.filterwarnings("ignore:Overriding provided") 

1264@pytest.mark.parametrize("decoder", [_BaseDecoder, Decoder, DecoderRegressor]) 

1265def test_decoder_error_incompatible_surface_mask_and_volume_data( 

1266 decoder, surf_mask_1d, tiny_binary_classification_data 

1267): 

1268 """Test error when fitting volume data with a surface mask.""" 

1269 data_volume, y, _ = tiny_binary_classification_data 

1270 model = decoder(mask=surf_mask_1d) 

1271 

1272 with pytest.raises( 

1273 TypeError, match="Mask and images to fit must be of compatible types." 

1274 ): 

1275 model.fit(data_volume, y) 

1276 

1277 model = decoder(mask=SurfaceMasker()) 

1278 

1279 with pytest.raises( 

1280 TypeError, match="Mask and images to fit must be of compatible types." 

1281 ): 

1282 model.fit(data_volume, y) 

1283 

1284 

1285@pytest.mark.parametrize("decoder", [_BaseDecoder, Decoder, DecoderRegressor]) 

1286def test_decoder_error_incompatible_surface_data_and_volume_mask( 

1287 _make_surface_class_data, decoder, tiny_binary_classification_data 

1288): 

1289 """Test error when fiting for surface data with a volume mask.""" 

1290 data_surface, y = _make_surface_class_data 

1291 _, _, mask = tiny_binary_classification_data 

1292 model = decoder(mask=mask) 

1293 

1294 with pytest.raises( 

1295 TypeError, match="Mask and images to fit must be of compatible types." 

1296 ): 

1297 model.fit(data_surface, y) 

1298 

1299 

1300def test_decoder_predict_score_surface(_make_surface_class_data): 

1301 """Test classification predict and scoring for surface image.""" 

1302 warnings.simplefilter("ignore", ConvergenceWarning) 

1303 X, y = _make_surface_class_data 

1304 model = Decoder(mask=SurfaceMasker()) 

1305 model.fit(X, y) 

1306 y_pred = model.predict(X) 

1307 

1308 assert model.scoring == "roc_auc" 

1309 

1310 model.score(X, y) 

1311 acc = accuracy_score(y, y_pred) 

1312 assert 0.3 < acc < 0.7 

1313 

1314 

1315@pytest.mark.filterwarnings("ignore:Overriding provided") 

1316@pytest.mark.filterwarnings("ignore:After clustering and screening") 

1317@pytest.mark.filterwarnings("ignore:Solver terminated early") 

1318def test_decoder_regressor_predict_score_surface(_make_surface_reg_data): 

1319 """Test regression predict and scoring for surface image.""" 

1320 X, y = _make_surface_reg_data 

1321 model = DecoderRegressor(mask=SurfaceMasker()) 

1322 model.fit(X, y) 

1323 y_pred = model.predict(X) 

1324 

1325 assert model.scoring == "r2" 

1326 

1327 model.score(X, y) 

1328 r2 = r2_score(y, y_pred) 

1329 assert r2 <= 0 

1330 

1331 

1332@pytest.mark.filterwarnings("ignore:After clustering and screening") 

1333@pytest.mark.filterwarnings("ignore:divide by zero encountered in divide") 

1334@pytest.mark.filterwarnings("ignore:Liblinear failed to converge") 

1335@pytest.mark.filterwarnings("ignore:Solver terminated early") 

1336@pytest.mark.parametrize("frem", [FREMRegressor, FREMClassifier]) 

1337def test_frem_decoder_fit_surface( 

1338 frem, 

1339 _make_surface_class_data, 

1340 surf_mask_1d, 

1341): 

1342 """Test fit for using FREM decoding with surface image.""" 

1343 X, y = _make_surface_class_data 

1344 model = frem(mask=surf_mask_1d, clustering_percentile=90) 

1345 model.fit(X, y) 

1346 

1347 

1348# ------------------------ test decoder vs sklearn -------------------------- # 

1349 

1350 

1351@pytest.mark.timeout(0) 

1352@pytest.mark.parametrize( 

1353 "classifier_penalty", 

1354 ["svc_l1", "svc_l2", "logistic_l1", "logistic_l2", "ridge_classifier"], 

1355) 

1356def test_decoder_vs_sklearn( 

1357 classifier_penalty, strings_to_sklearn=SUPPORTED_ESTIMATORS 

1358): 

1359 """Compare scores from nilearn Decoder with sklearn classifiers.""" 

1360 X, y, mask = _make_multiclass_classification_test_data( 

1361 n_samples=100, dim=10 

1362 ) 

1363 n_classes = len(np.unique(y)) 

1364 # default cross-validation in nilearn is StratifiedKFold 

1365 # with 10 splits 

1366 cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42) 

1367 # default scoring is accuracy 

1368 scorer = check_scoring(strings_to_sklearn[classifier_penalty], "accuracy") 

1369 

1370 ## nilearn decoding 

1371 nilearn_decoder = Decoder( 

1372 estimator=classifier_penalty, 

1373 mask=mask, 

1374 standardize=True, 

1375 cv=cv, 

1376 scoring=scorer, 

1377 screening_percentile=100, # disable screening 

1378 ) 

1379 nilearn_decoder.fit(X, y) 

1380 scores_nilearn = nilearn_decoder.cv_scores_ 

1381 

1382 ## start decoding with sklearn 

1383 masker = NiftiMasker(mask_img=mask, standardize=True) 

1384 X_transformed = masker.fit_transform(X) 

1385 

1386 sklearn_classifier = strings_to_sklearn[classifier_penalty] 

1387 scores_sklearn = {c: [] for c in range(n_classes)} 

1388 # convert multiclass to n_classes binary classifications 

1389 label_binarizer = LabelBinarizer() 

1390 y_binary = label_binarizer.fit_transform(y) 

1391 for klass in range(n_classes): 

1392 for count, (train_idx, test_idx) in enumerate( 

1393 cv.split(X_transformed, y) 

1394 ): 

1395 X_train, X_test = X_transformed[train_idx], X_transformed[test_idx] 

1396 y_train, y_test = ( 

1397 y_binary[train_idx, klass], 

1398 y_binary[test_idx, klass], 

1399 ) 

1400 # set best hyperparameters for each fold 

1401 sklearn_classifier = _set_best_hyperparameters( 

1402 klass, 

1403 sklearn_classifier, 

1404 nilearn_decoder, 

1405 classifier_penalty, 

1406 count, 

1407 ) 

1408 sklearn_classifier.fit(X_train, y_train) 

1409 score = scorer(sklearn_classifier, X_test, y_test) 

1410 scores_sklearn[klass].append(score) 

1411 

1412 # Flatten scores 

1413 flat_sklearn_scores = np.concatenate(list(scores_sklearn.values())) 

1414 flat_nilearn_scores = np.concatenate(list(scores_nilearn.values())) 

1415 

1416 # check average scores are within 2% of each other 

1417 assert np.isclose( 

1418 np.mean(flat_sklearn_scores), np.mean(flat_nilearn_scores), atol=0.02 

1419 ) 

1420 

1421 

1422def _set_best_hyperparameters( 

1423 klass, sklearn_classifier, nilearn_decoder, classifier_penalty, count 

1424): 

1425 if classifier_penalty in ["svc_l1", "svc_l2"]: 

1426 # LinearSVC does not have a CV variant, so we use exactly the 

1427 # parameter selected by nilearn 

1428 sklearn_classifier = clone(sklearn_classifier).set_params( 

1429 C=nilearn_decoder.cv_params_[klass]["C"][count] 

1430 ) 

1431 elif classifier_penalty in ["logistic_l1", "logistic_l2"]: 

1432 # this sets the list of Cs as coded within nilearn and 

1433 # LogisticRegressionCV will select the best one using 

1434 # cross-validation 

1435 sklearn_classifier = clone(sklearn_classifier).set_params( 

1436 Cs=nilearn_decoder.cv_params_[klass]["Cs"][count], 

1437 ) 

1438 elif classifier_penalty in ["ridge_classifier"]: 

1439 # same as logistic regression 

1440 sklearn_classifier = clone(sklearn_classifier).set_params( 

1441 alphas=nilearn_decoder.cv_params_[klass]["alphas"][count] 

1442 ) 

1443 return sklearn_classifier 

1444 

1445 

1446@pytest.mark.parametrize("regressor", ["svr", "lasso", "ridge"]) 

1447def test_regressor_vs_sklearn( 

1448 regressor, strings_to_sklearn=SUPPORTED_ESTIMATORS 

1449): 

1450 """Compare scores from nilearn DecoderRegressor with sklearn regressors.""" 

1451 X, y, mask = _make_regression_test_data(n_samples=100, dim=10) 

1452 # for regression default cv in nilearn is KFold with 10 splits 

1453 # shuffling is False by default but we use it here with a fixed seed 

1454 # to reduce variability in the test 

1455 cv = KFold(n_splits=10, shuffle=True, random_state=42) 

1456 # r2 is the default scoring for regression 

1457 scorer = check_scoring(strings_to_sklearn[regressor], "r2") 

1458 

1459 ## nilearn decoding 

1460 nilearn_regressor = DecoderRegressor( 

1461 estimator=regressor, 

1462 mask=mask, 

1463 standardize=True, 

1464 cv=cv, 

1465 scoring=scorer, 

1466 screening_percentile=100, # disable screening 

1467 ) 

1468 nilearn_regressor.fit(X, y) 

1469 scores_nilearn = nilearn_regressor.cv_scores_["beta"] 

1470 

1471 ## start decoding with sklearn 

1472 masker = NiftiMasker(mask_img=mask, standardize=True) 

1473 X_transformed = masker.fit_transform(X) 

1474 

1475 sklearn_regressor = strings_to_sklearn[regressor] 

1476 scores_sklearn = [] 

1477 

1478 for count, (train_idx, test_idx) in enumerate(cv.split(X_transformed, y)): 

1479 X_train, X_test = X_transformed[train_idx], X_transformed[test_idx] 

1480 y_train, y_test = (y[train_idx], y[test_idx]) 

1481 # set best hyperparameters for each fold 

1482 if regressor == "svr": 

1483 # SVR does not have a CV variant, so we use exactly the 

1484 # parameter selected by nilearn 

1485 sklearn_regressor = clone(sklearn_regressor).set_params( 

1486 C=nilearn_regressor.cv_params_["beta"]["C"][count] 

1487 ) 

1488 elif regressor == "lasso": 

1489 # this sets n_alphas as coded within nilearn and 

1490 # LassoCV will select the best one using cross-validation 

1491 sklearn_regressor = clone(sklearn_regressor).set_params( 

1492 n_alphas=nilearn_regressor.cv_params_["beta"]["n_alphas"][ 

1493 count 

1494 ], 

1495 ) 

1496 elif regressor in ["ridge"]: 

1497 # same as lasso but with alphas 

1498 sklearn_regressor = clone(sklearn_regressor).set_params( 

1499 alphas=nilearn_regressor.cv_params_["beta"]["alphas"][count] 

1500 ) 

1501 sklearn_regressor.fit(X_train, y_train) 

1502 score = scorer(sklearn_regressor, X_test, y_test) 

1503 scores_sklearn.append(score) 

1504 

1505 # check average scores are within 1% of each other 

1506 assert np.isclose( 

1507 np.mean(scores_sklearn), np.mean(scores_nilearn), atol=0.01 

1508 ) 

1509 # also check individual scores are within 1% of each other 

1510 assert np.allclose(scores_sklearn, scores_nilearn, atol=0.01)