Coverage for nilearn/connectome/group_sparse_cov.py: 10%

408 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-20 10:58 +0200

1"""Implementation of algorithm for sparse multi-subjects learning of Gaussian \ 

2graphical models. 

3""" 

4 

5import collections.abc 

6import itertools 

7import operator 

8import warnings 

9 

10import numpy as np 

11import scipy.linalg 

12from joblib import Memory, Parallel, delayed 

13from sklearn.base import BaseEstimator 

14from sklearn.covariance import empirical_covariance 

15from sklearn.model_selection import check_cv 

16from sklearn.utils import check_array 

17from sklearn.utils.extmath import fast_logdet 

18 

19from nilearn._utils import CacheMixin, fill_doc, logger 

20from nilearn._utils.extmath import is_spd 

21from nilearn._utils.logger import find_stack_level 

22from nilearn._utils.param_validation import check_params 

23from nilearn._utils.tags import SKLEARN_LT_1_6 

24 

25 

26def compute_alpha_max(emp_covs, n_samples): 

27 """Compute the critical value of the regularization parameter. 

28 

29 Above this value, the precisions matrices computed by 

30 group_sparse_covariance are diagonal (complete sparsity) 

31 

32 This function also returns the value below which the precision 

33 matrices are fully dense (i.e. minimal number of zero coefficients). 

34 

35 The formula used in this function was derived using the same method 

36 as in :footcite:t:`Duchi2012`. 

37 

38 Parameters 

39 ---------- 

40 emp_covs : array-like, shape (n_features, n_features, n_subjects) 

41 covariance matrix for each subject. 

42 

43 n_samples : array-like, shape (n_subjects,) 

44 number of samples used in the computation of every covariance matrix. 

45 n_samples.sum() can be arbitrary. 

46 

47 Returns 

48 ------- 

49 alpha_max : float 

50 minimal value for the regularization parameter that gives a 

51 fully sparse matrix. 

52 

53 alpha_min : float 

54 minimal value for the regularization parameter that gives a fully 

55 dense matrix. 

56 

57 References 

58 ---------- 

59 .. footbibliography:: 

60 

61 """ 

62 A = np.copy(emp_covs) 

63 n_samples = np.asarray(n_samples).copy() 

64 n_samples /= n_samples.sum() 

65 

66 for k in range(emp_covs.shape[-1]): 

67 # Set diagonal to zero 

68 A[..., k].flat[:: A.shape[0] + 1] = 0 

69 A[..., k] *= n_samples[k] 

70 

71 norms = np.sqrt((A**2).sum(axis=-1)) 

72 

73 return np.max(norms), np.min(norms[norms > 0]) 

74 

75 

76def _update_submatrix(full, sub, sub_inv, p, h, v): 

77 """Update submatrix and its inverse. 

78 

79 sub_inv is the inverse of the submatrix of "full" obtained by removing 

80 the p-th row and column. 

81 

82 sub_inv is modified in-place. After execution of this function, it contains 

83 the inverse of the submatrix of "full" obtained by removing the n+1-th row 

84 and column. 

85 

86 This computation is based on the Sherman-Woodbury-Morrison identity. 

87 

88 """ 

89 n = p - 1 

90 v[: n + 1] = full[: n + 1, n] 

91 v[n + 1 :] = full[n + 2 :, n] 

92 h[: n + 1] = full[n, : n + 1] 

93 h[n + 1 :] = full[n, n + 2 :] 

94 

95 # change row: first usage of SWM identity 

96 coln = sub_inv[:, n : n + 1] # 2d array, useful for sub_inv below 

97 V = h - sub[n, :] 

98 coln = coln / (1.0 + np.dot(V, coln)) 

99 # The following line is equivalent to 

100 # sub_inv -= np.outer(coln, np.dot(V, sub_inv)) 

101 sub_inv -= np.dot(coln, np.dot(V, sub_inv)[np.newaxis, :]) 

102 sub[n, :] = h 

103 

104 # change column: second usage of SWM identity 

105 rown = sub_inv[n : n + 1, :] # 2d array, useful for sub_inv below 

106 U = v - sub[:, n] 

107 rown = rown / (1.0 + np.dot(rown, U)) 

108 # The following line is equivalent to (but faster) 

109 # sub_inv -= np.outer(np.dot(sub_inv, U), rown) 

110 sub_inv -= np.dot(np.dot(sub_inv, U)[:, np.newaxis], rown) 

111 sub[:, n] = v # equivalent to sub[n, :] += U 

112 

113 # Make sub_inv symmetric (overcome some numerical limitations) 

114 sub_inv += sub_inv.T.copy() 

115 sub_inv /= 2.0 

116 

117 

118def _assert_submatrix(full, sub, n): 

119 """Check that "sub" is the matrix obtained \ 

120 by removing the p-th col and row in "full". 

121 

122 Used only for debugging. 

123 

124 """ 

125 true_sub = np.empty_like(sub) 

126 true_sub[:n, :n] = full[:n, :n] 

127 true_sub[n:, n:] = full[n + 1 :, n + 1 :] 

128 true_sub[:n, n:] = full[:n, n + 1 :] 

129 true_sub[n:, :n] = full[n + 1 :, :n] 

130 

131 np.testing.assert_almost_equal(true_sub, sub) 

132 

133 

134@fill_doc 

135def group_sparse_covariance( 

136 subjects, 

137 alpha, 

138 max_iter=50, 

139 tol=1e-3, 

140 verbose=0, 

141 probe_function=None, 

142 precisions_init=None, 

143 debug=False, 

144): 

145 """Compute sparse precision matrices and covariance matrices. 

146 

147 The precision matrices returned by this function are sparse, and share a 

148 common sparsity pattern: all have zeros at the same location. This is 

149 achieved by simultaneous computation of all precision matrices at the 

150 same time. 

151 

152 Running time is linear on max_iter, and number of subjects (len(subjects)), 

153 but cubic on number of features (subjects[0].shape[1]). 

154 

155 The present algorithm is based on :footcite:t:`Honorio2012`. 

156 

157 Parameters 

158 ---------- 

159 subjects : :obj:`list` of numpy.ndarray 

160 input subjects. Each subject is a 2D array, whose columns contain 

161 signals. Each array shape must be (sample number, feature number). 

162 The sample number can vary from subject to subject, but all subjects 

163 must have the same number of features (i.e. of columns). 

164 

165 alpha : :obj:`float` 

166 regularization parameter. With normalized covariances matrices and 

167 number of samples, sensible values lie in the [0, 1] range(zero is 

168 no regularization: output is not sparse) 

169 

170 max_iter : :obj:`int`, default=50 

171 maximum number of iterations. 

172 

173 tol : positive :obj:`float` or None, default=0.001 

174 The tolerance to declare convergence: if the duality gap goes below 

175 this value, optimization is stopped. If None, no check is performed. 

176 

177 %(verbose0)s 

178 

179 probe_function : callable or None, default=None 

180 This value is called before the first iteration and after each 

181 iteration. If it returns True, then optimization is stopped 

182 prematurely. 

183 The function is given as arguments (in that order): 

184 

185 - empirical covariances (ndarray), 

186 - number of samples for each subject (ndarray), 

187 - regularization parameter (float) 

188 - maximum iteration number (integer) 

189 - tolerance (float) 

190 - current iteration number (integer). -1 means "before first iteration" 

191 - current value of precisions (ndarray). 

192 - previous value of precisions (ndarray). None before first iteration. 

193 

194 precisions_init : numpy.ndarray, default=None 

195 initial value of the precision matrices. If not provided, a diagonal 

196 matrix with the variances of each input signal is used. 

197 

198 debug : :obj:`bool`, default=False 

199 if True, perform checks during computation. It can help find 

200 numerical problems, but increases computation time a lot. 

201 

202 Returns 

203 ------- 

204 emp_covs : numpy.ndarray, shape (n_features, n_features, n_subjects) 

205 empirical covariances matrices 

206 

207 precisions : numpy.ndarray, shape (n_features, n_features, n_subjects) 

208 estimated precision matrices 

209 

210 References 

211 ---------- 

212 .. footbibliography:: 

213 

214 """ 

215 emp_covs, n_samples = empirical_covariances( 

216 subjects, assume_centered=False 

217 ) 

218 

219 precisions = _group_sparse_covariance( 

220 emp_covs, 

221 n_samples, 

222 alpha, 

223 max_iter=max_iter, 

224 tol=tol, 

225 verbose=verbose, 

226 precisions_init=precisions_init, 

227 probe_function=probe_function, 

228 debug=debug, 

229 ) 

230 

231 return emp_covs, precisions 

232 

233 

234def _group_sparse_covariance( 

235 emp_covs, 

236 n_samples, 

237 alpha, 

238 max_iter=10, 

239 tol=1e-3, 

240 precisions_init=None, 

241 probe_function=None, 

242 verbose=0, 

243 debug=False, 

244): 

245 """Implement an internal version of group_sparse_covariance. 

246 

247 See its docstring for details. 

248 

249 """ 

250 if tol == -1: 

251 tol = None 

252 

253 _check_alpha(alpha) 

254 

255 n_subjects = emp_covs.shape[-1] 

256 n_features = emp_covs[0].shape[0] 

257 n_samples = np.asarray(n_samples) 

258 n_samples /= n_samples.sum() # essential for numerical stability 

259 

260 _check_diagonal_normalization(emp_covs, n_subjects) 

261 

262 omega = _init_omega(emp_covs, precisions_init) 

263 

264 # Preallocate arrays 

265 y = np.ndarray(shape=(n_subjects, n_features - 1), dtype=np.float64) 

266 u = np.ndarray(shape=(n_subjects, n_features - 1), dtype=np.float64) 

267 y_1 = np.ndarray(shape=(n_subjects, n_features - 2), dtype=np.float64) 

268 h_12 = np.ndarray(shape=(n_subjects, n_features - 2), dtype=np.float64) 

269 q = np.ndarray(shape=(n_subjects,), dtype=np.float64) 

270 aq = np.ndarray(shape=(n_subjects,), dtype=np.float64) # temp. array 

271 c = np.ndarray(shape=(n_subjects,), dtype=np.float64) 

272 W = np.ndarray( 

273 shape=(omega.shape[0] - 1, omega.shape[1] - 1, omega.shape[2]), 

274 dtype=np.float64, 

275 order="F", 

276 ) 

277 W_inv = np.ndarray(shape=W.shape, dtype=np.float64, order="F") 

278 

279 # Auxiliary arrays. 

280 v = np.ndarray((omega.shape[0] - 1,), dtype=np.float64) 

281 h = np.ndarray((omega.shape[1] - 1,), dtype=np.float64) 

282 

283 # Optional. 

284 tolerance_reached = False 

285 max_norm = None 

286 

287 omega_old = np.empty_like(omega) 

288 

289 if probe_function is not None: 

290 # iteration number -1 means called before iteration loop. 

291 probe_function( 

292 emp_covs, n_samples, alpha, max_iter, tol, -1, omega, None 

293 ) 

294 

295 probe_interrupted = False 

296 

297 # Start optimization loop. Variables are named following (mostly) the 

298 # Honorio-Samaras paper notations. 

299 

300 # Used in the innermost loop. Computed here to save some computation. 

301 alpha2 = alpha**2 

302 

303 for n in range(max_iter): 

304 suffix = ( 

305 f" variation (max norm): {max_norm:.3e} " 

306 if max_norm is not None 

307 else "" 

308 ) 

309 

310 logger.log( 

311 f"* iteration {n:d} ({100.0 * n / max_iter:.0f} %){suffix} ...", 

312 verbose=verbose, 

313 ) 

314 

315 omega_old[...] = omega 

316 for p in range(n_features): 

317 if p == 0: 

318 W, W_inv = _set_initial_state_w_and_w_inv(omega, debug, p) 

319 

320 else: 

321 if debug: 

322 omega_orig = omega.copy() 

323 

324 _update_w_and_w_inv( 

325 omega, debug, W, W_inv, n_subjects, p, h, v 

326 ) 

327 

328 if debug: 

329 # Check that omega has not been modified. 

330 np.testing.assert_almost_equal(omega_orig, omega) 

331 

332 # In the following lines, implicit loop on k (subjects) 

333 # Extract y and u 

334 y[:, :p] = omega[:p, p, :].T 

335 y[:, p:] = omega[p + 1 :, p, :].T 

336 

337 u[:, :p] = emp_covs[:p, p, :].T 

338 u[:, p:] = emp_covs[p + 1 :, p, :].T 

339 

340 for m in range(n_features - 1): 

341 # Coordinate descent on y 

342 

343 # T(k) -> n_samples[k] 

344 # v(k) -> emp_covs[p, p, k] 

345 # h_22(k) -> W_inv[m, m, k] 

346 # h_12(k) -> W_inv[:m, m, k], W_inv[m+1:, m, k] 

347 # y_1(k) -> y[k, :m], y[k, m+1:] 

348 # u_2(k) -> u[k, m] 

349 h_12[:, :m] = W_inv[:m, m, :].T 

350 h_12[:, m:] = W_inv[m + 1 :, m, :].T 

351 y_1[:, :m] = y[:, :m] 

352 y_1[:, m:] = y[:, m + 1 :] 

353 

354 c[:] = -n_samples * ( 

355 emp_covs[p, p, :] * (h_12 * y_1).sum(axis=1) + u[:, m] 

356 ) 

357 c2 = np.sqrt(np.dot(c, c)) 

358 

359 # x -> y[:][m] 

360 if c2 <= alpha: 

361 y[:, m] = 0 # x* = 0 

362 else: 

363 # q(k) -> T(k) * v(k) * h_22(k) 

364 # \lambda -> gamma (lambda is a Python keyword) 

365 q[:] = n_samples * emp_covs[p, p, :] * W_inv[m, m, :] 

366 

367 if debug: 

368 assert np.all(q > 0) 

369 # x* = \lambda* diag(1 + \lambda q)^{-1} c 

370 

371 # Newton-Raphson loop. Loosely based on Scipy's. 

372 # Tolerance does not seem to be important for numerical 

373 # stability (tolerance of 1e-2 works) but has an effect on 

374 # overall convergence rate (the tighter the better.) 

375 

376 gamma = 0.0 # initial value 

377 # Precompute some quantities 

378 cc = c * c 

379 two_ccq = 2.0 * cc * q 

380 for _ in itertools.repeat(None, 100): 

381 # Function whose zero must be determined (fval) and 

382 # its derivative (fder). 

383 # Written inplace to save some function calls. 

384 aq = 1.0 + gamma * q 

385 aq2 = aq * aq 

386 fder = (two_ccq / (aq2 * aq)).sum() 

387 

388 if fder == 0: 

389 msg = "derivative was zero." 

390 warnings.warn( 

391 msg, 

392 RuntimeWarning, 

393 stacklevel=find_stack_level(), 

394 ) 

395 break 

396 fval = -(alpha2 - (cc / aq2).sum()) / fder 

397 gamma = fval + gamma 

398 if abs(fval) < 1.5e-8: 

399 break 

400 

401 if abs(fval) > 0.1: 

402 warnings.warn( 

403 "Newton-Raphson step did not converge.\n" 

404 "This may indicate a badly conditioned system.", 

405 stacklevel=find_stack_level(), 

406 ) 

407 

408 if debug: 

409 assert gamma >= 0.0, gamma 

410 

411 y[:, m] = (gamma * c) / aq # x* 

412 

413 # Copy back y in omega (column and row) 

414 omega[:p, p, :] = y[:, :p].T 

415 omega[p + 1 :, p, :] = y[:, p:].T 

416 omega[p, :p, :] = y[:, :p].T 

417 omega[p, p + 1 :, :] = y[:, p:].T 

418 

419 for k in range(n_subjects): 

420 omega[p, p, k] = 1.0 / emp_covs[p, p, k] + np.dot( 

421 np.dot(y[k, :], W_inv[..., k]), y[k, :] 

422 ) 

423 

424 if debug: 

425 assert is_spd(omega[..., k]) 

426 

427 if probe_function is not None and probe_function( 

428 emp_covs, 

429 n_samples, 

430 alpha, 

431 max_iter, 

432 tol, 

433 n, 

434 omega, 

435 omega_old, 

436 ): 

437 probe_interrupted = True 

438 logger.log( 

439 "probe_function interrupted loop", verbose=verbose, msg_level=2 

440 ) 

441 break 

442 

443 # Compute max of variation 

444 omega_old -= omega 

445 omega_old = abs(omega_old) 

446 max_norm = omega_old.max() 

447 

448 tolerance_reached = _check_if_tolerance_reached( 

449 tol, max_norm, verbose, n 

450 ) 

451 if tolerance_reached: 

452 break 

453 

454 if tol is not None and not tolerance_reached and not probe_interrupted: 

455 warnings.warn( 

456 "Maximum number of iterations reached without getting " 

457 "to the requested tolerance level.", 

458 stacklevel=find_stack_level(), 

459 ) 

460 

461 return omega 

462 

463 

464def _init_omega(emp_covs, precisions_init): 

465 """Initialize omega value.""" 

466 if precisions_init is None: 

467 n_subjects = emp_covs.shape[-1] 

468 # Fortran order make omega[..., k] contiguous, which is often useful. 

469 omega = np.ndarray(shape=emp_covs.shape, dtype=np.float64, order="F") 

470 for k in range(n_subjects): 

471 # Values on main diagonals are far from zero, because they 

472 # are timeseries energy. 

473 omega[..., k] = np.diag(1.0 / np.diag(emp_covs[..., k])) 

474 else: 

475 omega = precisions_init.copy() 

476 

477 return omega 

478 

479 

480def _check_alpha(alpha): 

481 if not isinstance(alpha, (int, float)) or alpha < 0: 

482 raise ValueError( 

483 "Regularization parameter alpha must be a positive number.\n" 

484 f"You provided: {alpha=}" 

485 ) 

486 

487 

488def _check_diagonal_normalization(emp_covs, n_subjects): 

489 ones = np.ones(emp_covs.shape[0]) 

490 for k in range(n_subjects): 

491 if ( 

492 abs(emp_covs[..., k].flat[:: emp_covs.shape[0] + 1] - ones) > 0.1 

493 ).any(): 

494 warnings.warn( 

495 "Input signals do not all have unit variance. " 

496 "This can lead to numerical instability.", 

497 stacklevel=find_stack_level(), 

498 ) 

499 break 

500 

501 

502def _set_initial_state_w_and_w_inv(omega, debug, p): 

503 """Set initial state by removing first col/row.""" 

504 W = omega[1:, 1:, :].copy() # stack of W(k) 

505 W_inv = np.ndarray(shape=W.shape, dtype=np.float64) 

506 for k in range(W.shape[2]): 

507 # stack of W^-1(k) 

508 W_inv[..., k] = scipy.linalg.inv(W[..., k]) 

509 

510 if debug: 

511 np.testing.assert_almost_equal( 

512 np.dot(W_inv[..., k], W[..., k]), 

513 np.eye(W_inv[..., k].shape[0]), 

514 decimal=10, 

515 ) 

516 _assert_submatrix(omega[..., k], W[..., k], p) 

517 assert is_spd(W_inv[..., k]) 

518 

519 return W, W_inv 

520 

521 

522def _update_w_and_w_inv(omega, debug, W, W_inv, n_subjects, p, h, v): 

523 for k in range(n_subjects): 

524 _update_submatrix(omega[..., k], W[..., k], W_inv[..., k], p, h, v) 

525 

526 if debug: 

527 _assert_submatrix(omega[..., k], W[..., k], p) 

528 assert is_spd(W_inv[..., k], decimal=14) 

529 np.testing.assert_almost_equal( 

530 np.dot(W[..., k], W_inv[..., k]), 

531 np.eye(W_inv[..., k].shape[0]), 

532 decimal=10, 

533 ) 

534 

535 

536def _check_if_tolerance_reached(tol, max_norm, verbose, n): 

537 tolerance_reached = tol is not None and max_norm < tol 

538 if tolerance_reached: 

539 logger.log( 

540 f"tolerance reached at iteration number {n + 1:d}: {max_norm:.3e}", 

541 verbose=verbose, 

542 ) 

543 return tolerance_reached 

544 

545 

546@fill_doc 

547class GroupSparseCovariance(CacheMixin, BaseEstimator): 

548 """Covariance and precision matrix estimator. 

549 

550 The model used has been introduced in :footcite:t:`Varoquaux2010a`, and the 

551 algorithm used is based on what is described in :footcite:t:`Honorio2012`. 

552 

553 Parameters 

554 ---------- 

555 alpha : :obj:`float`, default=0.1 

556 regularization parameter. With normalized covariances matrices and 

557 number of samples, sensible values lie in the [0, 1] range(zero is 

558 no regularization: output is not sparse). 

559 

560 tol : positive :obj:`float`, default=1e-3 

561 The tolerance to declare convergence: if the dual gap goes below 

562 this value, iterations are stopped. 

563 

564 max_iter : :obj:`int`, default=10 

565 maximum number of iterations. The default value is rather 

566 conservative. 

567 

568 %(verbose0)s 

569 

570 %(memory)s 

571 

572 %(memory_level)s 

573 

574 Attributes 

575 ---------- 

576 covariances_ : numpy.ndarray, shape (n_features, n_features, n_subjects) 

577 empirical covariance matrices. 

578 

579 precisions_ : numpy.ndarraye, shape (n_features, n_features, n_subjects) 

580 precisions matrices estimated using the group-sparse algorithm. 

581 

582 References 

583 ---------- 

584 .. footbibliography:: 

585 

586 """ 

587 

588 def __init__( 

589 self, 

590 alpha=0.1, 

591 tol=1e-3, 

592 max_iter=10, 

593 verbose=0, 

594 memory=None, 

595 memory_level=0, 

596 ): 

597 self.alpha = alpha 

598 self.tol = tol 

599 self.max_iter = max_iter 

600 

601 self.memory = memory 

602 self.memory_level = memory_level 

603 self.verbose = verbose 

604 

605 def _more_tags(self): 

606 """Return estimator tags. 

607 

608 TODO remove when bumping sklearn_version > 1.5 

609 """ 

610 return self.__sklearn_tags__() 

611 

612 def __sklearn_tags__(self): 

613 """Return estimator tags. 

614 

615 See the sklearn documentation for more details on tags 

616 https://scikit-learn.org/1.6/developers/develop.html#estimator-tags 

617 """ 

618 if SKLEARN_LT_1_6: 

619 from nilearn._utils.tags import tags 

620 

621 return tags(niimg_like=False) 

622 

623 from nilearn._utils.tags import InputTags 

624 

625 tags = super().__sklearn_tags__() 

626 tags.input_tags = InputTags(niimg_like=False) 

627 return tags 

628 

629 @fill_doc 

630 def fit(self, subjects, y=None): 

631 """Fits the group sparse precision model according \ 

632 to the given training data and parameters. 

633 

634 Parameters 

635 ---------- 

636 subjects : :obj:`list` of numpy.ndarray \ 

637 with shapes (n_samples, n_features) 

638 input subjects. Each subject is a 2D array, whose columns contain 

639 signals. Sample number can vary from subject to subject, but all 

640 subjects must have the same number of features (i.e. of columns). 

641 

642 %(y_dummy)s 

643 

644 Returns 

645 ------- 

646 self : GroupSparseCovariance instance 

647 the object itself. Useful for chaining operations. 

648 

649 """ 

650 del y 

651 check_params(self.__dict__) 

652 for x in subjects: 

653 check_array(x, accept_sparse=False) 

654 

655 if self.memory is None: 

656 self.memory = Memory(location=None) 

657 

658 logger.log("Computing covariance matrices", verbose=self.verbose) 

659 self.covariances_, n_samples = empirical_covariances( 

660 subjects, assume_centered=False 

661 ) 

662 

663 logger.log("Computing precision matrices", verbose=self.verbose) 

664 ret = self._cache(_group_sparse_covariance)( 

665 self.covariances_, 

666 n_samples, 

667 self.alpha, 

668 tol=self.tol, 

669 max_iter=self.max_iter, 

670 verbose=max(0, self.verbose - 1), 

671 debug=False, 

672 ) 

673 

674 self.precisions_ = ret 

675 return self 

676 

677 def __sklearn_is_fitted__(self): 

678 return hasattr(self, "precisions_") and hasattr(self, "covariances_") 

679 

680 

681def empirical_covariances(subjects, assume_centered=False, standardize=False): 

682 """Compute empirical covariances for several signals. 

683 

684 Parameters 

685 ---------- 

686 subjects : :obj:`list` of numpy.ndarray, \ 

687 shape for each (n_samples, n_features) 

688 input subjects. Each subject is a 2D array, whose columns contain 

689 signals. Sample number can vary from subject to subject, but all 

690 subjects must have the same number of features (i.e. of columns). 

691 

692 assume_centered : :obj:`bool`, default=False 

693 if True, assume that all input signals are centered. This slightly 

694 decreases computation time by avoiding useless computation. 

695 

696 standardize : :obj:`bool`, default=False 

697 if True, set every signal variance to one before computing their 

698 covariance matrix (i.e. compute a correlation matrix). 

699 

700 Returns 

701 ------- 

702 emp_covs : numpy.ndarray, \ 

703 shape : (feature number, feature number, subject number) 

704 empirical covariances. 

705 

706 n_samples : numpy.ndarray, shape: (subject number,) 

707 number of samples for each subject. dtype is np.float64. 

708 

709 """ 

710 if not hasattr(subjects, "__iter__"): 

711 raise ValueError( 

712 "'subjects' input argument must be an iterable. " 

713 f"You provided {subjects.__class__}" 

714 ) 

715 

716 n_subjects = [s.shape[1] for s in subjects] 

717 if len(set(n_subjects)) > 1: 

718 raise ValueError( 

719 "All subjects must have the same number of " 

720 f"features.\nYou provided: {n_subjects}" 

721 ) 

722 n_subjects = len(subjects) 

723 n_features = subjects[0].shape[1] 

724 

725 # Enable to change dtype here because depending on user, conversion from 

726 # single precision to double will be required or not. 

727 emp_covs = np.empty((n_features, n_features, n_subjects), order="F") 

728 for k, s in enumerate(subjects): 

729 if standardize: 

730 s = s / s.std(axis=0) # copy on purpose 

731 M = empirical_covariance(s, assume_centered=assume_centered) 

732 

733 # Force matrix symmetry, for numerical stability 

734 # of _group_sparse_covariance 

735 emp_covs[..., k] = M + M.T 

736 emp_covs /= 2 

737 

738 n_samples = np.asarray([s.shape[0] for s in subjects], dtype=np.float64) 

739 

740 return emp_covs, n_samples 

741 

742 

743def group_sparse_scores( 

744 precisions, n_samples, emp_covs, alpha, duality_gap=False, debug=False 

745): 

746 """Compute scores used by group_sparse_covariance. 

747 

748 The log-likelihood of a given list of empirical covariances / 

749 precisions. 

750 

751 Parameters 

752 ---------- 

753 precisions : numpy.ndarray, shape (n_features, n_features, n_subjects) 

754 estimated precisions. 

755 

756 n_samples : array-like, shape (n_subjects,) 

757 number of samples used in estimating each subject in "precisions". 

758 n_samples.sum() must be equal to 1. 

759 

760 emp_covs : numpy.ndarray, shape (n_features, n_features, n_subjects) 

761 empirical covariance matrix 

762 

763 alpha : :obj:`float` 

764 regularization parameter 

765 

766 duality_gap : :obj:`bool`, default=False 

767 if True, also returns a duality gap upper bound. 

768 

769 debug : :obj:`bool`, default=False 

770 if True, some consistency checks are performed to help solving 

771 numerical problems. 

772 

773 Returns 

774 ------- 

775 log_lik : float 

776 log-likelihood of precisions on the given covariances. This is the 

777 opposite of the loss function, without the regularization term 

778 

779 objective : float 

780 value of objective function. This is the value minimized by 

781 group_sparse_covariance() 

782 

783 duality_gap : float 

784 duality gap upper bound. The returned bound is tight: it vanishes for 

785 the optimal precision matrices 

786 

787 """ 

788 n_features, _, n_subjects = emp_covs.shape 

789 

790 log_lik = 0 

791 for k in range(n_subjects): 

792 log_lik_k = -np.sum(emp_covs[..., k] * precisions[..., k]) 

793 log_lik_k += fast_logdet(precisions[..., k]) 

794 log_lik += n_samples[k] * log_lik_k 

795 

796 l2 = np.sqrt((precisions**2).sum(axis=-1)) 

797 l12 = l2.sum() - np.diag(l2).sum() # Do not count diagonal terms 

798 objective = alpha * l12 - log_lik 

799 ret = (log_lik, objective) 

800 

801 # Compute duality gap if requested 

802 if duality_gap is True: 

803 A = np.empty(precisions.shape, dtype=np.float64, order="F") 

804 for k in range(n_subjects): 

805 # TODO: can be computed more efficiently using W_inv. See 

806 # Friedman, Jerome, Trevor Hastie, and Robert Tibshirani. 

807 # 'Sparse Inverse Covariance Estimation with the Graphical Lasso'. 

808 # Biostatistics 9, no. 3 (1 July 2008): 432-441. 

809 precisions_inv = scipy.linalg.inv(precisions[..., k]) 

810 if debug: 

811 assert is_spd(precisions_inv) 

812 

813 A[..., k] = n_samples[k] * (precisions_inv - emp_covs[..., k]) 

814 

815 if debug: 

816 np.testing.assert_almost_equal(A[..., k], A[..., k].T) 

817 

818 # Project A on the set of feasible points 

819 alpha_max = np.sqrt((A**2).sum(axis=-1)) 

820 mask = alpha_max > alpha 

821 for k in range(A.shape[-1]): 

822 A[mask, k] *= alpha / alpha_max[mask] 

823 # Set zeros on diagonals. Essential to get an always positive 

824 # duality gap. 

825 A[..., k].flat[:: A.shape[0] + 1] = 0 

826 

827 dual_obj = 0 # dual objective 

828 for k in range(n_subjects): 

829 B = emp_covs[..., k] + A[..., k] / n_samples[k] 

830 dual_obj += n_samples[k] * (n_features + fast_logdet(B)) 

831 

832 # The previous computation can lead to a non-feasible point, because 

833 # one of the Bs may not be positive definite. 

834 # Use another value in this case, that ensure positive definiteness 

835 # of B. The upper bound on the duality gap is not tight in the 

836 # following, but is smaller than infinity, which is better in any case. 

837 if not np.isfinite(dual_obj): 

838 for k in range(n_subjects): 

839 A[..., k] = -n_samples[k] * emp_covs[..., k] 

840 A[..., k].flat[:: A.shape[0] + 1] = 0 

841 alpha_max = np.sqrt((A**2).sum(axis=-1)).max() 

842 # the second value (0.05 is arbitrary: positive in ]0,1[) 

843 gamma = min((alpha / alpha_max, 0.05)) 

844 dual_obj = 0 

845 for k in range(n_subjects): 

846 # add gamma on the diagonal 

847 B = (1.0 - gamma) * emp_covs[..., k] + gamma * np.eye( 

848 emp_covs.shape[0] 

849 ) 

850 dual_obj += n_samples[k] * (n_features + fast_logdet(B)) 

851 

852 gap = objective - dual_obj 

853 ret = (*ret, gap) 

854 return ret 

855 

856 

857@fill_doc 

858def group_sparse_covariance_path( 

859 train_subjs, 

860 alphas, 

861 test_subjs=None, 

862 tol=1e-3, 

863 max_iter=10, 

864 precisions_init=None, 

865 verbose=0, 

866 debug=False, 

867 probe_function=None, 

868): 

869 """Get estimated precision matrices for different values of alpha. 

870 

871 Calling this function is faster than calling group_sparse_covariance() 

872 repeatedly, because it makes use of the first result to initialize the 

873 next computation. 

874 

875 Parameters 

876 ---------- 

877 train_subjs : :obj:`list` of numpy.ndarray 

878 list of signals. 

879 

880 alphas : :obj:`list` of :obj:`float` 

881 values of alpha to use. Best results for sorted values (decreasing) 

882 

883 test_subjs : :obj:`list` of numpy.ndarray, default=None 

884 list of signals, independent from those in train_subjs, on which to 

885 compute a score. If None, no score is computed. 

886 

887 %(verbose0)s 

888 

889 tol, max_iter, debug, precisions_init : 

890 Passed to group_sparse_covariance(). See the corresponding docstring 

891 for details. 

892 

893 probe_function : callable, default=None 

894 This value is called before the first iteration and after each 

895 iteration. If it returns True, then optimization is stopped 

896 prematurely. 

897 The function is given as arguments (in that order): 

898 

899 - empirical covariances (ndarray), 

900 - number of samples for each subject (ndarray), 

901 - regularization parameter (float) 

902 - maximum iteration number (integer) 

903 - tolerance (float) 

904 - current iteration number (integer). -1 means "before first iteration" 

905 - current value of precisions (ndarray). 

906 - previous value of precisions (ndarray). None before first iteration. 

907 

908 Returns 

909 ------- 

910 precisions_list : :obj:`list` of numpy.ndarray 

911 estimated precisions for each value of alpha provided. The length of 

912 this list is the same as that of parameter "alphas". 

913 

914 scores : :obj:`list` of float 

915 for each estimated precision, score obtained on the test set. Output 

916 only if test_subjs is not None. 

917 

918 """ 

919 train_covs, train_n_samples = empirical_covariances( 

920 train_subjs, assume_centered=False, standardize=True 

921 ) 

922 

923 scores = [] 

924 precisions_list = [] 

925 for alpha in alphas: 

926 precisions = _group_sparse_covariance( 

927 train_covs, 

928 train_n_samples, 

929 alpha, 

930 tol=tol, 

931 max_iter=max_iter, 

932 precisions_init=precisions_init, 

933 verbose=max(0, verbose - 1), 

934 debug=debug, 

935 probe_function=probe_function, 

936 ) 

937 

938 # Compute log-likelihood 

939 if test_subjs is not None: 

940 test_covs, _ = empirical_covariances( 

941 test_subjs, assume_centered=False, standardize=True 

942 ) 

943 scores.append( 

944 group_sparse_scores(precisions, train_n_samples, test_covs, 0)[ 

945 0 

946 ] 

947 ) 

948 precisions_list.append(precisions) 

949 precisions_init = precisions 

950 

951 return ( 

952 (precisions_list, scores) 

953 if test_subjs is not None 

954 else precisions_list 

955 ) 

956 

957 

958class EarlyStopProbe: 

959 """Callable probe for early stopping in GroupSparseCovarianceCV. 

960 

961 Stop optimizing as soon as the score on the test set starts decreasing. 

962 An instance of this class is supposed to be passed in the probe_function 

963 argument of group_sparse_covariance(). 

964 

965 """ 

966 

967 def __init__(self, test_subjs, verbose=0): 

968 self.test_emp_covs, _ = empirical_covariances(test_subjs) 

969 self.verbose = verbose 

970 

971 def __call__( # noqa: D102 

972 self, 

973 emp_covs, # noqa: ARG002 

974 n_samples, 

975 alpha, 

976 max_iter, # noqa: ARG002 

977 tol, # noqa: ARG002 

978 iter_n, 

979 omega, 

980 prev_omega, # noqa: ARG002 

981 ): 

982 log_lik, _ = group_sparse_scores( 

983 omega, n_samples, self.test_emp_covs, alpha 

984 ) 

985 if iter_n > -1 and self.last_log_lik > log_lik: 

986 logger.log( 

987 "Log-likelihood on test set is decreasing. " 

988 f"Stopping at iteration {iter_n}", 

989 verbose=self.verbose, 

990 ) 

991 return True 

992 self.last_log_lik = log_lik 

993 

994 

995@fill_doc 

996class GroupSparseCovarianceCV(CacheMixin, BaseEstimator): 

997 """Sparse inverse covariance w/ cross-validated choice of the parameter. 

998 

999 A cross-validated value for the regularization parameter is first 

1000 determined using several calls to group_sparse_covariance. Then a final 

1001 optimization is run to get a value for the precision matrices, using the 

1002 selected value of the parameter. Different values of tolerance and of 

1003 maximum iteration number can be used in these two phases (see the tol 

1004 and tol_cv keyword below for example). 

1005 

1006 Parameters 

1007 ---------- 

1008 alphas : :obj:`int`, default=4 

1009 initial number of points in the grid of regularization parameter 

1010 values. Each step of grid refinement adds that many points as well. 

1011 

1012 n_refinements : :obj:`int`, default=4 

1013 number of times the initial grid should be refined. 

1014 

1015 cv : :obj:`int`, default=None 

1016 number of folds in a K-fold cross-validation scheme. 

1017 

1018 tol_cv : :obj:`float`, default=1e-2 

1019 tolerance used to get the optimal alpha value. It has the same meaning 

1020 as the `tol` parameter in :func:`group_sparse_covariance`. 

1021 

1022 max_iter_cv : :obj:`int`, default=50 

1023 maximum number of iterations for each optimization, during the alpha- 

1024 selection phase. 

1025 

1026 tol : :obj:`float`, default=1e-3 

1027 tolerance used during the final optimization for determining precision 

1028 matrices value. 

1029 

1030 max_iter : :obj:`int`, default=100 

1031 maximum number of iterations in the final optimization. 

1032 

1033 %(verbose0)s 

1034 

1035 %(n_jobs)s 

1036 

1037 debug : :obj:`bool`, default=False 

1038 if True, activates some internal checks for consistency. Only useful 

1039 for nilearn developers, not users. 

1040 

1041 early_stopping : :obj:`bool`, default=True 

1042 if True, reduce computation time by using a heuristic to reduce the 

1043 number of iterations required to get the optimal value for alpha. Be 

1044 aware that this can lead to slightly different values for the optimal 

1045 alpha compared to early_stopping=False. 

1046 

1047 Attributes 

1048 ---------- 

1049 covariances_ : numpy.ndarray, shape (n_features, n_features, n_subjects) 

1050 covariance matrices, one per subject. 

1051 

1052 precisions_ : numpy.ndarray, shape (n_features, n_features, n_subjects) 

1053 precision matrices, one per subject. All matrices have the same 

1054 sparsity pattern (if a coefficient is zero for a given matrix, it 

1055 is also zero for every other.) 

1056 

1057 alpha_ : float 

1058 penalization parameter value selected. 

1059 

1060 cv_alphas_ : list of floats 

1061 all values of the penalization parameter explored. 

1062 

1063 cv_scores_ : numpy.ndarray, shape (n_alphas, n_folds) 

1064 scores obtained on test set for each value of the penalization 

1065 parameter explored. 

1066 

1067 See Also 

1068 -------- 

1069 GroupSparseCovariance, 

1070 sklearn.covariance.GraphicalLassoCV 

1071 

1072 Notes 

1073 ----- 

1074 The search for the optimal penalization parameter (alpha) is done on an 

1075 iteratively refined grid: first the cross-validated scores on a grid are 

1076 computed, then a new refined grid is centered around the maximum, and so 

1077 on. 

1078 

1079 """ 

1080 

1081 def __init__( 

1082 self, 

1083 alphas=4, 

1084 n_refinements=4, 

1085 cv=None, 

1086 tol_cv=1e-2, 

1087 max_iter_cv=50, 

1088 tol=1e-3, 

1089 max_iter=100, 

1090 verbose=0, 

1091 n_jobs=1, 

1092 debug=False, 

1093 early_stopping=True, 

1094 ): 

1095 self.alphas = alphas 

1096 self.n_refinements = n_refinements 

1097 self.tol_cv = tol_cv 

1098 self.max_iter_cv = max_iter_cv 

1099 self.cv = cv 

1100 self.tol = tol 

1101 self.max_iter = max_iter 

1102 

1103 self.verbose = verbose 

1104 self.n_jobs = n_jobs 

1105 self.debug = debug 

1106 self.early_stopping = early_stopping 

1107 

1108 def _more_tags(self): 

1109 """Return estimator tags. 

1110 

1111 TODO remove when bumping sklearn_version > 1.5 

1112 """ 

1113 return self.__sklearn_tags__() 

1114 

1115 def __sklearn_tags__(self): 

1116 """Return estimator tags. 

1117 

1118 See the sklearn documentation for more details on tags 

1119 https://scikit-learn.org/1.6/developers/develop.html#estimator-tags 

1120 """ 

1121 if SKLEARN_LT_1_6: 

1122 from nilearn._utils.tags import tags 

1123 

1124 return tags(niimg_like=False) 

1125 

1126 from nilearn._utils.tags import InputTags 

1127 

1128 tags = super().__sklearn_tags__() 

1129 tags.input_tags = InputTags(niimg_like=False) 

1130 return tags 

1131 

1132 @fill_doc 

1133 def fit(self, subjects, y=None): 

1134 """Compute cross-validated group-sparse precisions. 

1135 

1136 Parameters 

1137 ---------- 

1138 subjects : :obj:`list` of numpy.ndarray \ 

1139 with shapes (n_samples, n_features) 

1140 input subjects. Each subject is a 2D array, whose columns contain 

1141 signals. Sample number can vary from subject to subject, but all 

1142 subjects must have the same number of features (i.e. of columns.) 

1143 

1144 %(y_dummy)s 

1145 

1146 Returns 

1147 ------- 

1148 self : GroupSparseCovarianceCV 

1149 the object instance itself. 

1150 

1151 """ 

1152 del y 

1153 check_params(self.__dict__) 

1154 

1155 for x in subjects: 

1156 check_array(x, accept_sparse=False) 

1157 

1158 # Empirical covariances 

1159 emp_covs, n_samples = empirical_covariances( 

1160 subjects, assume_centered=False 

1161 ) 

1162 n_subjects = emp_covs.shape[2] 

1163 

1164 # One cv generator per subject must be created, because each subject 

1165 # can have a different number of samples from the others. 

1166 cv = [ 

1167 check_cv( 

1168 self.cv, np.ones(subjects[k].shape[0]), classifier=False 

1169 ).split(subjects[k]) 

1170 for k in range(n_subjects) 

1171 ] 

1172 path = [] # List of (alpha, scores, covs) 

1173 n_alphas = self.alphas 

1174 

1175 if isinstance(n_alphas, collections.abc.Sequence): 

1176 alphas = list(self.alphas) 

1177 n_refinements = 1 

1178 else: 

1179 n_refinements = self.n_refinements 

1180 alpha_1, _ = compute_alpha_max(emp_covs, n_samples) 

1181 alpha_0 = 1e-2 * alpha_1 

1182 alphas = np.logspace( 

1183 np.log10(alpha_0), np.log10(alpha_1), n_alphas 

1184 )[::-1] 

1185 

1186 covs_init = itertools.repeat(None) 

1187 

1188 # Copying the cv generators to use them n_refinements times. 

1189 cv_ = zip(*cv) 

1190 

1191 for i, (this_cv) in enumerate(itertools.tee(cv_, n_refinements)): 

1192 # Compute the cross-validated loss on the current grid 

1193 train_test_subjs = [] 

1194 for train_test in this_cv: 

1195 assert len(train_test) == n_subjects 

1196 train_test_subjs.append( 

1197 list( 

1198 zip( 

1199 *[ 

1200 (subject[train, :], subject[test, :]) 

1201 for subject, (train, test) in zip( 

1202 subjects, train_test 

1203 ) 

1204 ] 

1205 ) 

1206 ) 

1207 ) 

1208 if self.early_stopping: 

1209 probes = [ 

1210 EarlyStopProbe( 

1211 test_subjs, verbose=max(0, self.verbose - 1) 

1212 ) 

1213 for _, test_subjs in train_test_subjs 

1214 ] 

1215 else: 

1216 probes = itertools.repeat(None) 

1217 

1218 this_path = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( 

1219 delayed(group_sparse_covariance_path)( 

1220 train_subjs, 

1221 alphas, 

1222 test_subjs=test_subjs, 

1223 max_iter=self.max_iter_cv, 

1224 tol=self.tol_cv, 

1225 verbose=max(0, self.verbose - 1), 

1226 debug=self.debug, 

1227 # Warm restart is useless with early stopping. 

1228 precisions_init=None if self.early_stopping else prec_init, 

1229 probe_function=probe, 

1230 ) 

1231 for (train_subjs, test_subjs), prec_init, probe in zip( 

1232 train_test_subjs, covs_init, probes 

1233 ) 

1234 ) 

1235 

1236 # this_path[i] is a tuple (precisions_list, scores) 

1237 # - scores: scores obtained with the i-th folding, for each value 

1238 # of alpha. 

1239 # - precisions_list: corresponding precisions matrices, for each 

1240 # value of alpha. 

1241 precisions_list, scores = list(zip(*this_path)) 

1242 # now scores[i][j] is the score for the i-th folding, j-th value of 

1243 # alpha (analogous for precisions_list) 

1244 precisions_list = list(zip(*precisions_list)) 

1245 scores = [np.mean(sc) for sc in zip(*scores)] 

1246 # scores[i] is the mean score obtained for the i-th value of alpha. 

1247 

1248 path.extend(list(zip(alphas, scores, precisions_list))) 

1249 path = sorted(path, key=operator.itemgetter(0), reverse=True) 

1250 

1251 # Find the maximum score (avoid using the built-in 'max' function 

1252 # to have a fully-reproducible selection of the smallest alpha in 

1253 # case of equality) 

1254 best_score = -np.inf 

1255 last_finite_idx = 0 

1256 for index, (_, this_score, _) in enumerate(path): 

1257 if this_score >= 0.1 / np.finfo(np.float64).eps: 

1258 this_score = np.nan 

1259 if np.isfinite(this_score): 

1260 last_finite_idx = index 

1261 if this_score >= best_score: 

1262 best_score = this_score 

1263 best_index = index 

1264 

1265 # Refine the grid 

1266 if best_index == 0: 

1267 # We do not need to go back: we have chosen 

1268 # the highest value of alpha for which there are 

1269 # non-zero coefficients 

1270 alpha_1 = path[0][0] 

1271 alpha_0 = path[1][0] 

1272 covs_init = path[0][2] 

1273 elif best_index == last_finite_idx and best_index != len(path) - 1: 

1274 # We have non-converged models on the upper bound of the 

1275 # grid, we need to refine the grid there 

1276 alpha_1 = path[best_index][0] 

1277 alpha_0 = path[best_index + 1][0] 

1278 covs_init = path[best_index][2] 

1279 elif best_index == len(path) - 1: 

1280 alpha_1 = path[best_index][0] 

1281 alpha_0 = 0.01 * path[best_index][0] 

1282 covs_init = path[best_index][2] 

1283 else: 

1284 alpha_1 = path[best_index - 1][0] 

1285 alpha_0 = path[best_index + 1][0] 

1286 covs_init = path[best_index - 1][2] 

1287 alphas = np.logspace( 

1288 np.log10(alpha_1), np.log10(alpha_0), len(alphas) + 2 

1289 ) 

1290 alphas = alphas[1:-1] 

1291 if n_refinements > 1: 

1292 logger.log( 

1293 "[GroupSparseCovarianceCV] Done refinement " 

1294 f"{i: 2} out of {n_refinements}", 

1295 verbose=self.verbose, 

1296 ) 

1297 

1298 path = list(zip(*path)) 

1299 cv_scores_ = list(path[1]) 

1300 alphas = list(path[0]) 

1301 

1302 self.cv_scores_ = np.array(cv_scores_) 

1303 self.alpha_ = alphas[best_index] 

1304 self.cv_alphas_ = alphas 

1305 

1306 # Finally, fit the model with the selected alpha 

1307 logger.log("Final optimization", verbose=self.verbose) 

1308 self.covariances_ = emp_covs 

1309 self.precisions_ = _group_sparse_covariance( 

1310 emp_covs, 

1311 n_samples, 

1312 self.alpha_, 

1313 tol=self.tol, 

1314 max_iter=self.max_iter, 

1315 verbose=max(0, self.verbose - 1), 

1316 debug=self.debug, 

1317 ) 

1318 return self 

1319 

1320 def __sklearn_is_fitted__(self): 

1321 return hasattr(self, "precisions_") and hasattr(self, "covariances_")