Coverage for nilearn/connectome/group_sparse_cov.py: 10%
408 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
1"""Implementation of algorithm for sparse multi-subjects learning of Gaussian \
2graphical models.
3"""
5import collections.abc
6import itertools
7import operator
8import warnings
10import numpy as np
11import scipy.linalg
12from joblib import Memory, Parallel, delayed
13from sklearn.base import BaseEstimator
14from sklearn.covariance import empirical_covariance
15from sklearn.model_selection import check_cv
16from sklearn.utils import check_array
17from sklearn.utils.extmath import fast_logdet
19from nilearn._utils import CacheMixin, fill_doc, logger
20from nilearn._utils.extmath import is_spd
21from nilearn._utils.logger import find_stack_level
22from nilearn._utils.param_validation import check_params
23from nilearn._utils.tags import SKLEARN_LT_1_6
26def compute_alpha_max(emp_covs, n_samples):
27 """Compute the critical value of the regularization parameter.
29 Above this value, the precisions matrices computed by
30 group_sparse_covariance are diagonal (complete sparsity)
32 This function also returns the value below which the precision
33 matrices are fully dense (i.e. minimal number of zero coefficients).
35 The formula used in this function was derived using the same method
36 as in :footcite:t:`Duchi2012`.
38 Parameters
39 ----------
40 emp_covs : array-like, shape (n_features, n_features, n_subjects)
41 covariance matrix for each subject.
43 n_samples : array-like, shape (n_subjects,)
44 number of samples used in the computation of every covariance matrix.
45 n_samples.sum() can be arbitrary.
47 Returns
48 -------
49 alpha_max : float
50 minimal value for the regularization parameter that gives a
51 fully sparse matrix.
53 alpha_min : float
54 minimal value for the regularization parameter that gives a fully
55 dense matrix.
57 References
58 ----------
59 .. footbibliography::
61 """
62 A = np.copy(emp_covs)
63 n_samples = np.asarray(n_samples).copy()
64 n_samples /= n_samples.sum()
66 for k in range(emp_covs.shape[-1]):
67 # Set diagonal to zero
68 A[..., k].flat[:: A.shape[0] + 1] = 0
69 A[..., k] *= n_samples[k]
71 norms = np.sqrt((A**2).sum(axis=-1))
73 return np.max(norms), np.min(norms[norms > 0])
76def _update_submatrix(full, sub, sub_inv, p, h, v):
77 """Update submatrix and its inverse.
79 sub_inv is the inverse of the submatrix of "full" obtained by removing
80 the p-th row and column.
82 sub_inv is modified in-place. After execution of this function, it contains
83 the inverse of the submatrix of "full" obtained by removing the n+1-th row
84 and column.
86 This computation is based on the Sherman-Woodbury-Morrison identity.
88 """
89 n = p - 1
90 v[: n + 1] = full[: n + 1, n]
91 v[n + 1 :] = full[n + 2 :, n]
92 h[: n + 1] = full[n, : n + 1]
93 h[n + 1 :] = full[n, n + 2 :]
95 # change row: first usage of SWM identity
96 coln = sub_inv[:, n : n + 1] # 2d array, useful for sub_inv below
97 V = h - sub[n, :]
98 coln = coln / (1.0 + np.dot(V, coln))
99 # The following line is equivalent to
100 # sub_inv -= np.outer(coln, np.dot(V, sub_inv))
101 sub_inv -= np.dot(coln, np.dot(V, sub_inv)[np.newaxis, :])
102 sub[n, :] = h
104 # change column: second usage of SWM identity
105 rown = sub_inv[n : n + 1, :] # 2d array, useful for sub_inv below
106 U = v - sub[:, n]
107 rown = rown / (1.0 + np.dot(rown, U))
108 # The following line is equivalent to (but faster)
109 # sub_inv -= np.outer(np.dot(sub_inv, U), rown)
110 sub_inv -= np.dot(np.dot(sub_inv, U)[:, np.newaxis], rown)
111 sub[:, n] = v # equivalent to sub[n, :] += U
113 # Make sub_inv symmetric (overcome some numerical limitations)
114 sub_inv += sub_inv.T.copy()
115 sub_inv /= 2.0
118def _assert_submatrix(full, sub, n):
119 """Check that "sub" is the matrix obtained \
120 by removing the p-th col and row in "full".
122 Used only for debugging.
124 """
125 true_sub = np.empty_like(sub)
126 true_sub[:n, :n] = full[:n, :n]
127 true_sub[n:, n:] = full[n + 1 :, n + 1 :]
128 true_sub[:n, n:] = full[:n, n + 1 :]
129 true_sub[n:, :n] = full[n + 1 :, :n]
131 np.testing.assert_almost_equal(true_sub, sub)
134@fill_doc
135def group_sparse_covariance(
136 subjects,
137 alpha,
138 max_iter=50,
139 tol=1e-3,
140 verbose=0,
141 probe_function=None,
142 precisions_init=None,
143 debug=False,
144):
145 """Compute sparse precision matrices and covariance matrices.
147 The precision matrices returned by this function are sparse, and share a
148 common sparsity pattern: all have zeros at the same location. This is
149 achieved by simultaneous computation of all precision matrices at the
150 same time.
152 Running time is linear on max_iter, and number of subjects (len(subjects)),
153 but cubic on number of features (subjects[0].shape[1]).
155 The present algorithm is based on :footcite:t:`Honorio2012`.
157 Parameters
158 ----------
159 subjects : :obj:`list` of numpy.ndarray
160 input subjects. Each subject is a 2D array, whose columns contain
161 signals. Each array shape must be (sample number, feature number).
162 The sample number can vary from subject to subject, but all subjects
163 must have the same number of features (i.e. of columns).
165 alpha : :obj:`float`
166 regularization parameter. With normalized covariances matrices and
167 number of samples, sensible values lie in the [0, 1] range(zero is
168 no regularization: output is not sparse)
170 max_iter : :obj:`int`, default=50
171 maximum number of iterations.
173 tol : positive :obj:`float` or None, default=0.001
174 The tolerance to declare convergence: if the duality gap goes below
175 this value, optimization is stopped. If None, no check is performed.
177 %(verbose0)s
179 probe_function : callable or None, default=None
180 This value is called before the first iteration and after each
181 iteration. If it returns True, then optimization is stopped
182 prematurely.
183 The function is given as arguments (in that order):
185 - empirical covariances (ndarray),
186 - number of samples for each subject (ndarray),
187 - regularization parameter (float)
188 - maximum iteration number (integer)
189 - tolerance (float)
190 - current iteration number (integer). -1 means "before first iteration"
191 - current value of precisions (ndarray).
192 - previous value of precisions (ndarray). None before first iteration.
194 precisions_init : numpy.ndarray, default=None
195 initial value of the precision matrices. If not provided, a diagonal
196 matrix with the variances of each input signal is used.
198 debug : :obj:`bool`, default=False
199 if True, perform checks during computation. It can help find
200 numerical problems, but increases computation time a lot.
202 Returns
203 -------
204 emp_covs : numpy.ndarray, shape (n_features, n_features, n_subjects)
205 empirical covariances matrices
207 precisions : numpy.ndarray, shape (n_features, n_features, n_subjects)
208 estimated precision matrices
210 References
211 ----------
212 .. footbibliography::
214 """
215 emp_covs, n_samples = empirical_covariances(
216 subjects, assume_centered=False
217 )
219 precisions = _group_sparse_covariance(
220 emp_covs,
221 n_samples,
222 alpha,
223 max_iter=max_iter,
224 tol=tol,
225 verbose=verbose,
226 precisions_init=precisions_init,
227 probe_function=probe_function,
228 debug=debug,
229 )
231 return emp_covs, precisions
234def _group_sparse_covariance(
235 emp_covs,
236 n_samples,
237 alpha,
238 max_iter=10,
239 tol=1e-3,
240 precisions_init=None,
241 probe_function=None,
242 verbose=0,
243 debug=False,
244):
245 """Implement an internal version of group_sparse_covariance.
247 See its docstring for details.
249 """
250 if tol == -1:
251 tol = None
253 _check_alpha(alpha)
255 n_subjects = emp_covs.shape[-1]
256 n_features = emp_covs[0].shape[0]
257 n_samples = np.asarray(n_samples)
258 n_samples /= n_samples.sum() # essential for numerical stability
260 _check_diagonal_normalization(emp_covs, n_subjects)
262 omega = _init_omega(emp_covs, precisions_init)
264 # Preallocate arrays
265 y = np.ndarray(shape=(n_subjects, n_features - 1), dtype=np.float64)
266 u = np.ndarray(shape=(n_subjects, n_features - 1), dtype=np.float64)
267 y_1 = np.ndarray(shape=(n_subjects, n_features - 2), dtype=np.float64)
268 h_12 = np.ndarray(shape=(n_subjects, n_features - 2), dtype=np.float64)
269 q = np.ndarray(shape=(n_subjects,), dtype=np.float64)
270 aq = np.ndarray(shape=(n_subjects,), dtype=np.float64) # temp. array
271 c = np.ndarray(shape=(n_subjects,), dtype=np.float64)
272 W = np.ndarray(
273 shape=(omega.shape[0] - 1, omega.shape[1] - 1, omega.shape[2]),
274 dtype=np.float64,
275 order="F",
276 )
277 W_inv = np.ndarray(shape=W.shape, dtype=np.float64, order="F")
279 # Auxiliary arrays.
280 v = np.ndarray((omega.shape[0] - 1,), dtype=np.float64)
281 h = np.ndarray((omega.shape[1] - 1,), dtype=np.float64)
283 # Optional.
284 tolerance_reached = False
285 max_norm = None
287 omega_old = np.empty_like(omega)
289 if probe_function is not None:
290 # iteration number -1 means called before iteration loop.
291 probe_function(
292 emp_covs, n_samples, alpha, max_iter, tol, -1, omega, None
293 )
295 probe_interrupted = False
297 # Start optimization loop. Variables are named following (mostly) the
298 # Honorio-Samaras paper notations.
300 # Used in the innermost loop. Computed here to save some computation.
301 alpha2 = alpha**2
303 for n in range(max_iter):
304 suffix = (
305 f" variation (max norm): {max_norm:.3e} "
306 if max_norm is not None
307 else ""
308 )
310 logger.log(
311 f"* iteration {n:d} ({100.0 * n / max_iter:.0f} %){suffix} ...",
312 verbose=verbose,
313 )
315 omega_old[...] = omega
316 for p in range(n_features):
317 if p == 0:
318 W, W_inv = _set_initial_state_w_and_w_inv(omega, debug, p)
320 else:
321 if debug:
322 omega_orig = omega.copy()
324 _update_w_and_w_inv(
325 omega, debug, W, W_inv, n_subjects, p, h, v
326 )
328 if debug:
329 # Check that omega has not been modified.
330 np.testing.assert_almost_equal(omega_orig, omega)
332 # In the following lines, implicit loop on k (subjects)
333 # Extract y and u
334 y[:, :p] = omega[:p, p, :].T
335 y[:, p:] = omega[p + 1 :, p, :].T
337 u[:, :p] = emp_covs[:p, p, :].T
338 u[:, p:] = emp_covs[p + 1 :, p, :].T
340 for m in range(n_features - 1):
341 # Coordinate descent on y
343 # T(k) -> n_samples[k]
344 # v(k) -> emp_covs[p, p, k]
345 # h_22(k) -> W_inv[m, m, k]
346 # h_12(k) -> W_inv[:m, m, k], W_inv[m+1:, m, k]
347 # y_1(k) -> y[k, :m], y[k, m+1:]
348 # u_2(k) -> u[k, m]
349 h_12[:, :m] = W_inv[:m, m, :].T
350 h_12[:, m:] = W_inv[m + 1 :, m, :].T
351 y_1[:, :m] = y[:, :m]
352 y_1[:, m:] = y[:, m + 1 :]
354 c[:] = -n_samples * (
355 emp_covs[p, p, :] * (h_12 * y_1).sum(axis=1) + u[:, m]
356 )
357 c2 = np.sqrt(np.dot(c, c))
359 # x -> y[:][m]
360 if c2 <= alpha:
361 y[:, m] = 0 # x* = 0
362 else:
363 # q(k) -> T(k) * v(k) * h_22(k)
364 # \lambda -> gamma (lambda is a Python keyword)
365 q[:] = n_samples * emp_covs[p, p, :] * W_inv[m, m, :]
367 if debug:
368 assert np.all(q > 0)
369 # x* = \lambda* diag(1 + \lambda q)^{-1} c
371 # Newton-Raphson loop. Loosely based on Scipy's.
372 # Tolerance does not seem to be important for numerical
373 # stability (tolerance of 1e-2 works) but has an effect on
374 # overall convergence rate (the tighter the better.)
376 gamma = 0.0 # initial value
377 # Precompute some quantities
378 cc = c * c
379 two_ccq = 2.0 * cc * q
380 for _ in itertools.repeat(None, 100):
381 # Function whose zero must be determined (fval) and
382 # its derivative (fder).
383 # Written inplace to save some function calls.
384 aq = 1.0 + gamma * q
385 aq2 = aq * aq
386 fder = (two_ccq / (aq2 * aq)).sum()
388 if fder == 0:
389 msg = "derivative was zero."
390 warnings.warn(
391 msg,
392 RuntimeWarning,
393 stacklevel=find_stack_level(),
394 )
395 break
396 fval = -(alpha2 - (cc / aq2).sum()) / fder
397 gamma = fval + gamma
398 if abs(fval) < 1.5e-8:
399 break
401 if abs(fval) > 0.1:
402 warnings.warn(
403 "Newton-Raphson step did not converge.\n"
404 "This may indicate a badly conditioned system.",
405 stacklevel=find_stack_level(),
406 )
408 if debug:
409 assert gamma >= 0.0, gamma
411 y[:, m] = (gamma * c) / aq # x*
413 # Copy back y in omega (column and row)
414 omega[:p, p, :] = y[:, :p].T
415 omega[p + 1 :, p, :] = y[:, p:].T
416 omega[p, :p, :] = y[:, :p].T
417 omega[p, p + 1 :, :] = y[:, p:].T
419 for k in range(n_subjects):
420 omega[p, p, k] = 1.0 / emp_covs[p, p, k] + np.dot(
421 np.dot(y[k, :], W_inv[..., k]), y[k, :]
422 )
424 if debug:
425 assert is_spd(omega[..., k])
427 if probe_function is not None and probe_function(
428 emp_covs,
429 n_samples,
430 alpha,
431 max_iter,
432 tol,
433 n,
434 omega,
435 omega_old,
436 ):
437 probe_interrupted = True
438 logger.log(
439 "probe_function interrupted loop", verbose=verbose, msg_level=2
440 )
441 break
443 # Compute max of variation
444 omega_old -= omega
445 omega_old = abs(omega_old)
446 max_norm = omega_old.max()
448 tolerance_reached = _check_if_tolerance_reached(
449 tol, max_norm, verbose, n
450 )
451 if tolerance_reached:
452 break
454 if tol is not None and not tolerance_reached and not probe_interrupted:
455 warnings.warn(
456 "Maximum number of iterations reached without getting "
457 "to the requested tolerance level.",
458 stacklevel=find_stack_level(),
459 )
461 return omega
464def _init_omega(emp_covs, precisions_init):
465 """Initialize omega value."""
466 if precisions_init is None:
467 n_subjects = emp_covs.shape[-1]
468 # Fortran order make omega[..., k] contiguous, which is often useful.
469 omega = np.ndarray(shape=emp_covs.shape, dtype=np.float64, order="F")
470 for k in range(n_subjects):
471 # Values on main diagonals are far from zero, because they
472 # are timeseries energy.
473 omega[..., k] = np.diag(1.0 / np.diag(emp_covs[..., k]))
474 else:
475 omega = precisions_init.copy()
477 return omega
480def _check_alpha(alpha):
481 if not isinstance(alpha, (int, float)) or alpha < 0:
482 raise ValueError(
483 "Regularization parameter alpha must be a positive number.\n"
484 f"You provided: {alpha=}"
485 )
488def _check_diagonal_normalization(emp_covs, n_subjects):
489 ones = np.ones(emp_covs.shape[0])
490 for k in range(n_subjects):
491 if (
492 abs(emp_covs[..., k].flat[:: emp_covs.shape[0] + 1] - ones) > 0.1
493 ).any():
494 warnings.warn(
495 "Input signals do not all have unit variance. "
496 "This can lead to numerical instability.",
497 stacklevel=find_stack_level(),
498 )
499 break
502def _set_initial_state_w_and_w_inv(omega, debug, p):
503 """Set initial state by removing first col/row."""
504 W = omega[1:, 1:, :].copy() # stack of W(k)
505 W_inv = np.ndarray(shape=W.shape, dtype=np.float64)
506 for k in range(W.shape[2]):
507 # stack of W^-1(k)
508 W_inv[..., k] = scipy.linalg.inv(W[..., k])
510 if debug:
511 np.testing.assert_almost_equal(
512 np.dot(W_inv[..., k], W[..., k]),
513 np.eye(W_inv[..., k].shape[0]),
514 decimal=10,
515 )
516 _assert_submatrix(omega[..., k], W[..., k], p)
517 assert is_spd(W_inv[..., k])
519 return W, W_inv
522def _update_w_and_w_inv(omega, debug, W, W_inv, n_subjects, p, h, v):
523 for k in range(n_subjects):
524 _update_submatrix(omega[..., k], W[..., k], W_inv[..., k], p, h, v)
526 if debug:
527 _assert_submatrix(omega[..., k], W[..., k], p)
528 assert is_spd(W_inv[..., k], decimal=14)
529 np.testing.assert_almost_equal(
530 np.dot(W[..., k], W_inv[..., k]),
531 np.eye(W_inv[..., k].shape[0]),
532 decimal=10,
533 )
536def _check_if_tolerance_reached(tol, max_norm, verbose, n):
537 tolerance_reached = tol is not None and max_norm < tol
538 if tolerance_reached:
539 logger.log(
540 f"tolerance reached at iteration number {n + 1:d}: {max_norm:.3e}",
541 verbose=verbose,
542 )
543 return tolerance_reached
546@fill_doc
547class GroupSparseCovariance(CacheMixin, BaseEstimator):
548 """Covariance and precision matrix estimator.
550 The model used has been introduced in :footcite:t:`Varoquaux2010a`, and the
551 algorithm used is based on what is described in :footcite:t:`Honorio2012`.
553 Parameters
554 ----------
555 alpha : :obj:`float`, default=0.1
556 regularization parameter. With normalized covariances matrices and
557 number of samples, sensible values lie in the [0, 1] range(zero is
558 no regularization: output is not sparse).
560 tol : positive :obj:`float`, default=1e-3
561 The tolerance to declare convergence: if the dual gap goes below
562 this value, iterations are stopped.
564 max_iter : :obj:`int`, default=10
565 maximum number of iterations. The default value is rather
566 conservative.
568 %(verbose0)s
570 %(memory)s
572 %(memory_level)s
574 Attributes
575 ----------
576 covariances_ : numpy.ndarray, shape (n_features, n_features, n_subjects)
577 empirical covariance matrices.
579 precisions_ : numpy.ndarraye, shape (n_features, n_features, n_subjects)
580 precisions matrices estimated using the group-sparse algorithm.
582 References
583 ----------
584 .. footbibliography::
586 """
588 def __init__(
589 self,
590 alpha=0.1,
591 tol=1e-3,
592 max_iter=10,
593 verbose=0,
594 memory=None,
595 memory_level=0,
596 ):
597 self.alpha = alpha
598 self.tol = tol
599 self.max_iter = max_iter
601 self.memory = memory
602 self.memory_level = memory_level
603 self.verbose = verbose
605 def _more_tags(self):
606 """Return estimator tags.
608 TODO remove when bumping sklearn_version > 1.5
609 """
610 return self.__sklearn_tags__()
612 def __sklearn_tags__(self):
613 """Return estimator tags.
615 See the sklearn documentation for more details on tags
616 https://scikit-learn.org/1.6/developers/develop.html#estimator-tags
617 """
618 if SKLEARN_LT_1_6:
619 from nilearn._utils.tags import tags
621 return tags(niimg_like=False)
623 from nilearn._utils.tags import InputTags
625 tags = super().__sklearn_tags__()
626 tags.input_tags = InputTags(niimg_like=False)
627 return tags
629 @fill_doc
630 def fit(self, subjects, y=None):
631 """Fits the group sparse precision model according \
632 to the given training data and parameters.
634 Parameters
635 ----------
636 subjects : :obj:`list` of numpy.ndarray \
637 with shapes (n_samples, n_features)
638 input subjects. Each subject is a 2D array, whose columns contain
639 signals. Sample number can vary from subject to subject, but all
640 subjects must have the same number of features (i.e. of columns).
642 %(y_dummy)s
644 Returns
645 -------
646 self : GroupSparseCovariance instance
647 the object itself. Useful for chaining operations.
649 """
650 del y
651 check_params(self.__dict__)
652 for x in subjects:
653 check_array(x, accept_sparse=False)
655 if self.memory is None:
656 self.memory = Memory(location=None)
658 logger.log("Computing covariance matrices", verbose=self.verbose)
659 self.covariances_, n_samples = empirical_covariances(
660 subjects, assume_centered=False
661 )
663 logger.log("Computing precision matrices", verbose=self.verbose)
664 ret = self._cache(_group_sparse_covariance)(
665 self.covariances_,
666 n_samples,
667 self.alpha,
668 tol=self.tol,
669 max_iter=self.max_iter,
670 verbose=max(0, self.verbose - 1),
671 debug=False,
672 )
674 self.precisions_ = ret
675 return self
677 def __sklearn_is_fitted__(self):
678 return hasattr(self, "precisions_") and hasattr(self, "covariances_")
681def empirical_covariances(subjects, assume_centered=False, standardize=False):
682 """Compute empirical covariances for several signals.
684 Parameters
685 ----------
686 subjects : :obj:`list` of numpy.ndarray, \
687 shape for each (n_samples, n_features)
688 input subjects. Each subject is a 2D array, whose columns contain
689 signals. Sample number can vary from subject to subject, but all
690 subjects must have the same number of features (i.e. of columns).
692 assume_centered : :obj:`bool`, default=False
693 if True, assume that all input signals are centered. This slightly
694 decreases computation time by avoiding useless computation.
696 standardize : :obj:`bool`, default=False
697 if True, set every signal variance to one before computing their
698 covariance matrix (i.e. compute a correlation matrix).
700 Returns
701 -------
702 emp_covs : numpy.ndarray, \
703 shape : (feature number, feature number, subject number)
704 empirical covariances.
706 n_samples : numpy.ndarray, shape: (subject number,)
707 number of samples for each subject. dtype is np.float64.
709 """
710 if not hasattr(subjects, "__iter__"):
711 raise ValueError(
712 "'subjects' input argument must be an iterable. "
713 f"You provided {subjects.__class__}"
714 )
716 n_subjects = [s.shape[1] for s in subjects]
717 if len(set(n_subjects)) > 1:
718 raise ValueError(
719 "All subjects must have the same number of "
720 f"features.\nYou provided: {n_subjects}"
721 )
722 n_subjects = len(subjects)
723 n_features = subjects[0].shape[1]
725 # Enable to change dtype here because depending on user, conversion from
726 # single precision to double will be required or not.
727 emp_covs = np.empty((n_features, n_features, n_subjects), order="F")
728 for k, s in enumerate(subjects):
729 if standardize:
730 s = s / s.std(axis=0) # copy on purpose
731 M = empirical_covariance(s, assume_centered=assume_centered)
733 # Force matrix symmetry, for numerical stability
734 # of _group_sparse_covariance
735 emp_covs[..., k] = M + M.T
736 emp_covs /= 2
738 n_samples = np.asarray([s.shape[0] for s in subjects], dtype=np.float64)
740 return emp_covs, n_samples
743def group_sparse_scores(
744 precisions, n_samples, emp_covs, alpha, duality_gap=False, debug=False
745):
746 """Compute scores used by group_sparse_covariance.
748 The log-likelihood of a given list of empirical covariances /
749 precisions.
751 Parameters
752 ----------
753 precisions : numpy.ndarray, shape (n_features, n_features, n_subjects)
754 estimated precisions.
756 n_samples : array-like, shape (n_subjects,)
757 number of samples used in estimating each subject in "precisions".
758 n_samples.sum() must be equal to 1.
760 emp_covs : numpy.ndarray, shape (n_features, n_features, n_subjects)
761 empirical covariance matrix
763 alpha : :obj:`float`
764 regularization parameter
766 duality_gap : :obj:`bool`, default=False
767 if True, also returns a duality gap upper bound.
769 debug : :obj:`bool`, default=False
770 if True, some consistency checks are performed to help solving
771 numerical problems.
773 Returns
774 -------
775 log_lik : float
776 log-likelihood of precisions on the given covariances. This is the
777 opposite of the loss function, without the regularization term
779 objective : float
780 value of objective function. This is the value minimized by
781 group_sparse_covariance()
783 duality_gap : float
784 duality gap upper bound. The returned bound is tight: it vanishes for
785 the optimal precision matrices
787 """
788 n_features, _, n_subjects = emp_covs.shape
790 log_lik = 0
791 for k in range(n_subjects):
792 log_lik_k = -np.sum(emp_covs[..., k] * precisions[..., k])
793 log_lik_k += fast_logdet(precisions[..., k])
794 log_lik += n_samples[k] * log_lik_k
796 l2 = np.sqrt((precisions**2).sum(axis=-1))
797 l12 = l2.sum() - np.diag(l2).sum() # Do not count diagonal terms
798 objective = alpha * l12 - log_lik
799 ret = (log_lik, objective)
801 # Compute duality gap if requested
802 if duality_gap is True:
803 A = np.empty(precisions.shape, dtype=np.float64, order="F")
804 for k in range(n_subjects):
805 # TODO: can be computed more efficiently using W_inv. See
806 # Friedman, Jerome, Trevor Hastie, and Robert Tibshirani.
807 # 'Sparse Inverse Covariance Estimation with the Graphical Lasso'.
808 # Biostatistics 9, no. 3 (1 July 2008): 432-441.
809 precisions_inv = scipy.linalg.inv(precisions[..., k])
810 if debug:
811 assert is_spd(precisions_inv)
813 A[..., k] = n_samples[k] * (precisions_inv - emp_covs[..., k])
815 if debug:
816 np.testing.assert_almost_equal(A[..., k], A[..., k].T)
818 # Project A on the set of feasible points
819 alpha_max = np.sqrt((A**2).sum(axis=-1))
820 mask = alpha_max > alpha
821 for k in range(A.shape[-1]):
822 A[mask, k] *= alpha / alpha_max[mask]
823 # Set zeros on diagonals. Essential to get an always positive
824 # duality gap.
825 A[..., k].flat[:: A.shape[0] + 1] = 0
827 dual_obj = 0 # dual objective
828 for k in range(n_subjects):
829 B = emp_covs[..., k] + A[..., k] / n_samples[k]
830 dual_obj += n_samples[k] * (n_features + fast_logdet(B))
832 # The previous computation can lead to a non-feasible point, because
833 # one of the Bs may not be positive definite.
834 # Use another value in this case, that ensure positive definiteness
835 # of B. The upper bound on the duality gap is not tight in the
836 # following, but is smaller than infinity, which is better in any case.
837 if not np.isfinite(dual_obj):
838 for k in range(n_subjects):
839 A[..., k] = -n_samples[k] * emp_covs[..., k]
840 A[..., k].flat[:: A.shape[0] + 1] = 0
841 alpha_max = np.sqrt((A**2).sum(axis=-1)).max()
842 # the second value (0.05 is arbitrary: positive in ]0,1[)
843 gamma = min((alpha / alpha_max, 0.05))
844 dual_obj = 0
845 for k in range(n_subjects):
846 # add gamma on the diagonal
847 B = (1.0 - gamma) * emp_covs[..., k] + gamma * np.eye(
848 emp_covs.shape[0]
849 )
850 dual_obj += n_samples[k] * (n_features + fast_logdet(B))
852 gap = objective - dual_obj
853 ret = (*ret, gap)
854 return ret
857@fill_doc
858def group_sparse_covariance_path(
859 train_subjs,
860 alphas,
861 test_subjs=None,
862 tol=1e-3,
863 max_iter=10,
864 precisions_init=None,
865 verbose=0,
866 debug=False,
867 probe_function=None,
868):
869 """Get estimated precision matrices for different values of alpha.
871 Calling this function is faster than calling group_sparse_covariance()
872 repeatedly, because it makes use of the first result to initialize the
873 next computation.
875 Parameters
876 ----------
877 train_subjs : :obj:`list` of numpy.ndarray
878 list of signals.
880 alphas : :obj:`list` of :obj:`float`
881 values of alpha to use. Best results for sorted values (decreasing)
883 test_subjs : :obj:`list` of numpy.ndarray, default=None
884 list of signals, independent from those in train_subjs, on which to
885 compute a score. If None, no score is computed.
887 %(verbose0)s
889 tol, max_iter, debug, precisions_init :
890 Passed to group_sparse_covariance(). See the corresponding docstring
891 for details.
893 probe_function : callable, default=None
894 This value is called before the first iteration and after each
895 iteration. If it returns True, then optimization is stopped
896 prematurely.
897 The function is given as arguments (in that order):
899 - empirical covariances (ndarray),
900 - number of samples for each subject (ndarray),
901 - regularization parameter (float)
902 - maximum iteration number (integer)
903 - tolerance (float)
904 - current iteration number (integer). -1 means "before first iteration"
905 - current value of precisions (ndarray).
906 - previous value of precisions (ndarray). None before first iteration.
908 Returns
909 -------
910 precisions_list : :obj:`list` of numpy.ndarray
911 estimated precisions for each value of alpha provided. The length of
912 this list is the same as that of parameter "alphas".
914 scores : :obj:`list` of float
915 for each estimated precision, score obtained on the test set. Output
916 only if test_subjs is not None.
918 """
919 train_covs, train_n_samples = empirical_covariances(
920 train_subjs, assume_centered=False, standardize=True
921 )
923 scores = []
924 precisions_list = []
925 for alpha in alphas:
926 precisions = _group_sparse_covariance(
927 train_covs,
928 train_n_samples,
929 alpha,
930 tol=tol,
931 max_iter=max_iter,
932 precisions_init=precisions_init,
933 verbose=max(0, verbose - 1),
934 debug=debug,
935 probe_function=probe_function,
936 )
938 # Compute log-likelihood
939 if test_subjs is not None:
940 test_covs, _ = empirical_covariances(
941 test_subjs, assume_centered=False, standardize=True
942 )
943 scores.append(
944 group_sparse_scores(precisions, train_n_samples, test_covs, 0)[
945 0
946 ]
947 )
948 precisions_list.append(precisions)
949 precisions_init = precisions
951 return (
952 (precisions_list, scores)
953 if test_subjs is not None
954 else precisions_list
955 )
958class EarlyStopProbe:
959 """Callable probe for early stopping in GroupSparseCovarianceCV.
961 Stop optimizing as soon as the score on the test set starts decreasing.
962 An instance of this class is supposed to be passed in the probe_function
963 argument of group_sparse_covariance().
965 """
967 def __init__(self, test_subjs, verbose=0):
968 self.test_emp_covs, _ = empirical_covariances(test_subjs)
969 self.verbose = verbose
971 def __call__( # noqa: D102
972 self,
973 emp_covs, # noqa: ARG002
974 n_samples,
975 alpha,
976 max_iter, # noqa: ARG002
977 tol, # noqa: ARG002
978 iter_n,
979 omega,
980 prev_omega, # noqa: ARG002
981 ):
982 log_lik, _ = group_sparse_scores(
983 omega, n_samples, self.test_emp_covs, alpha
984 )
985 if iter_n > -1 and self.last_log_lik > log_lik:
986 logger.log(
987 "Log-likelihood on test set is decreasing. "
988 f"Stopping at iteration {iter_n}",
989 verbose=self.verbose,
990 )
991 return True
992 self.last_log_lik = log_lik
995@fill_doc
996class GroupSparseCovarianceCV(CacheMixin, BaseEstimator):
997 """Sparse inverse covariance w/ cross-validated choice of the parameter.
999 A cross-validated value for the regularization parameter is first
1000 determined using several calls to group_sparse_covariance. Then a final
1001 optimization is run to get a value for the precision matrices, using the
1002 selected value of the parameter. Different values of tolerance and of
1003 maximum iteration number can be used in these two phases (see the tol
1004 and tol_cv keyword below for example).
1006 Parameters
1007 ----------
1008 alphas : :obj:`int`, default=4
1009 initial number of points in the grid of regularization parameter
1010 values. Each step of grid refinement adds that many points as well.
1012 n_refinements : :obj:`int`, default=4
1013 number of times the initial grid should be refined.
1015 cv : :obj:`int`, default=None
1016 number of folds in a K-fold cross-validation scheme.
1018 tol_cv : :obj:`float`, default=1e-2
1019 tolerance used to get the optimal alpha value. It has the same meaning
1020 as the `tol` parameter in :func:`group_sparse_covariance`.
1022 max_iter_cv : :obj:`int`, default=50
1023 maximum number of iterations for each optimization, during the alpha-
1024 selection phase.
1026 tol : :obj:`float`, default=1e-3
1027 tolerance used during the final optimization for determining precision
1028 matrices value.
1030 max_iter : :obj:`int`, default=100
1031 maximum number of iterations in the final optimization.
1033 %(verbose0)s
1035 %(n_jobs)s
1037 debug : :obj:`bool`, default=False
1038 if True, activates some internal checks for consistency. Only useful
1039 for nilearn developers, not users.
1041 early_stopping : :obj:`bool`, default=True
1042 if True, reduce computation time by using a heuristic to reduce the
1043 number of iterations required to get the optimal value for alpha. Be
1044 aware that this can lead to slightly different values for the optimal
1045 alpha compared to early_stopping=False.
1047 Attributes
1048 ----------
1049 covariances_ : numpy.ndarray, shape (n_features, n_features, n_subjects)
1050 covariance matrices, one per subject.
1052 precisions_ : numpy.ndarray, shape (n_features, n_features, n_subjects)
1053 precision matrices, one per subject. All matrices have the same
1054 sparsity pattern (if a coefficient is zero for a given matrix, it
1055 is also zero for every other.)
1057 alpha_ : float
1058 penalization parameter value selected.
1060 cv_alphas_ : list of floats
1061 all values of the penalization parameter explored.
1063 cv_scores_ : numpy.ndarray, shape (n_alphas, n_folds)
1064 scores obtained on test set for each value of the penalization
1065 parameter explored.
1067 See Also
1068 --------
1069 GroupSparseCovariance,
1070 sklearn.covariance.GraphicalLassoCV
1072 Notes
1073 -----
1074 The search for the optimal penalization parameter (alpha) is done on an
1075 iteratively refined grid: first the cross-validated scores on a grid are
1076 computed, then a new refined grid is centered around the maximum, and so
1077 on.
1079 """
1081 def __init__(
1082 self,
1083 alphas=4,
1084 n_refinements=4,
1085 cv=None,
1086 tol_cv=1e-2,
1087 max_iter_cv=50,
1088 tol=1e-3,
1089 max_iter=100,
1090 verbose=0,
1091 n_jobs=1,
1092 debug=False,
1093 early_stopping=True,
1094 ):
1095 self.alphas = alphas
1096 self.n_refinements = n_refinements
1097 self.tol_cv = tol_cv
1098 self.max_iter_cv = max_iter_cv
1099 self.cv = cv
1100 self.tol = tol
1101 self.max_iter = max_iter
1103 self.verbose = verbose
1104 self.n_jobs = n_jobs
1105 self.debug = debug
1106 self.early_stopping = early_stopping
1108 def _more_tags(self):
1109 """Return estimator tags.
1111 TODO remove when bumping sklearn_version > 1.5
1112 """
1113 return self.__sklearn_tags__()
1115 def __sklearn_tags__(self):
1116 """Return estimator tags.
1118 See the sklearn documentation for more details on tags
1119 https://scikit-learn.org/1.6/developers/develop.html#estimator-tags
1120 """
1121 if SKLEARN_LT_1_6:
1122 from nilearn._utils.tags import tags
1124 return tags(niimg_like=False)
1126 from nilearn._utils.tags import InputTags
1128 tags = super().__sklearn_tags__()
1129 tags.input_tags = InputTags(niimg_like=False)
1130 return tags
1132 @fill_doc
1133 def fit(self, subjects, y=None):
1134 """Compute cross-validated group-sparse precisions.
1136 Parameters
1137 ----------
1138 subjects : :obj:`list` of numpy.ndarray \
1139 with shapes (n_samples, n_features)
1140 input subjects. Each subject is a 2D array, whose columns contain
1141 signals. Sample number can vary from subject to subject, but all
1142 subjects must have the same number of features (i.e. of columns.)
1144 %(y_dummy)s
1146 Returns
1147 -------
1148 self : GroupSparseCovarianceCV
1149 the object instance itself.
1151 """
1152 del y
1153 check_params(self.__dict__)
1155 for x in subjects:
1156 check_array(x, accept_sparse=False)
1158 # Empirical covariances
1159 emp_covs, n_samples = empirical_covariances(
1160 subjects, assume_centered=False
1161 )
1162 n_subjects = emp_covs.shape[2]
1164 # One cv generator per subject must be created, because each subject
1165 # can have a different number of samples from the others.
1166 cv = [
1167 check_cv(
1168 self.cv, np.ones(subjects[k].shape[0]), classifier=False
1169 ).split(subjects[k])
1170 for k in range(n_subjects)
1171 ]
1172 path = [] # List of (alpha, scores, covs)
1173 n_alphas = self.alphas
1175 if isinstance(n_alphas, collections.abc.Sequence):
1176 alphas = list(self.alphas)
1177 n_refinements = 1
1178 else:
1179 n_refinements = self.n_refinements
1180 alpha_1, _ = compute_alpha_max(emp_covs, n_samples)
1181 alpha_0 = 1e-2 * alpha_1
1182 alphas = np.logspace(
1183 np.log10(alpha_0), np.log10(alpha_1), n_alphas
1184 )[::-1]
1186 covs_init = itertools.repeat(None)
1188 # Copying the cv generators to use them n_refinements times.
1189 cv_ = zip(*cv)
1191 for i, (this_cv) in enumerate(itertools.tee(cv_, n_refinements)):
1192 # Compute the cross-validated loss on the current grid
1193 train_test_subjs = []
1194 for train_test in this_cv:
1195 assert len(train_test) == n_subjects
1196 train_test_subjs.append(
1197 list(
1198 zip(
1199 *[
1200 (subject[train, :], subject[test, :])
1201 for subject, (train, test) in zip(
1202 subjects, train_test
1203 )
1204 ]
1205 )
1206 )
1207 )
1208 if self.early_stopping:
1209 probes = [
1210 EarlyStopProbe(
1211 test_subjs, verbose=max(0, self.verbose - 1)
1212 )
1213 for _, test_subjs in train_test_subjs
1214 ]
1215 else:
1216 probes = itertools.repeat(None)
1218 this_path = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
1219 delayed(group_sparse_covariance_path)(
1220 train_subjs,
1221 alphas,
1222 test_subjs=test_subjs,
1223 max_iter=self.max_iter_cv,
1224 tol=self.tol_cv,
1225 verbose=max(0, self.verbose - 1),
1226 debug=self.debug,
1227 # Warm restart is useless with early stopping.
1228 precisions_init=None if self.early_stopping else prec_init,
1229 probe_function=probe,
1230 )
1231 for (train_subjs, test_subjs), prec_init, probe in zip(
1232 train_test_subjs, covs_init, probes
1233 )
1234 )
1236 # this_path[i] is a tuple (precisions_list, scores)
1237 # - scores: scores obtained with the i-th folding, for each value
1238 # of alpha.
1239 # - precisions_list: corresponding precisions matrices, for each
1240 # value of alpha.
1241 precisions_list, scores = list(zip(*this_path))
1242 # now scores[i][j] is the score for the i-th folding, j-th value of
1243 # alpha (analogous for precisions_list)
1244 precisions_list = list(zip(*precisions_list))
1245 scores = [np.mean(sc) for sc in zip(*scores)]
1246 # scores[i] is the mean score obtained for the i-th value of alpha.
1248 path.extend(list(zip(alphas, scores, precisions_list)))
1249 path = sorted(path, key=operator.itemgetter(0), reverse=True)
1251 # Find the maximum score (avoid using the built-in 'max' function
1252 # to have a fully-reproducible selection of the smallest alpha in
1253 # case of equality)
1254 best_score = -np.inf
1255 last_finite_idx = 0
1256 for index, (_, this_score, _) in enumerate(path):
1257 if this_score >= 0.1 / np.finfo(np.float64).eps:
1258 this_score = np.nan
1259 if np.isfinite(this_score):
1260 last_finite_idx = index
1261 if this_score >= best_score:
1262 best_score = this_score
1263 best_index = index
1265 # Refine the grid
1266 if best_index == 0:
1267 # We do not need to go back: we have chosen
1268 # the highest value of alpha for which there are
1269 # non-zero coefficients
1270 alpha_1 = path[0][0]
1271 alpha_0 = path[1][0]
1272 covs_init = path[0][2]
1273 elif best_index == last_finite_idx and best_index != len(path) - 1:
1274 # We have non-converged models on the upper bound of the
1275 # grid, we need to refine the grid there
1276 alpha_1 = path[best_index][0]
1277 alpha_0 = path[best_index + 1][0]
1278 covs_init = path[best_index][2]
1279 elif best_index == len(path) - 1:
1280 alpha_1 = path[best_index][0]
1281 alpha_0 = 0.01 * path[best_index][0]
1282 covs_init = path[best_index][2]
1283 else:
1284 alpha_1 = path[best_index - 1][0]
1285 alpha_0 = path[best_index + 1][0]
1286 covs_init = path[best_index - 1][2]
1287 alphas = np.logspace(
1288 np.log10(alpha_1), np.log10(alpha_0), len(alphas) + 2
1289 )
1290 alphas = alphas[1:-1]
1291 if n_refinements > 1:
1292 logger.log(
1293 "[GroupSparseCovarianceCV] Done refinement "
1294 f"{i: 2} out of {n_refinements}",
1295 verbose=self.verbose,
1296 )
1298 path = list(zip(*path))
1299 cv_scores_ = list(path[1])
1300 alphas = list(path[0])
1302 self.cv_scores_ = np.array(cv_scores_)
1303 self.alpha_ = alphas[best_index]
1304 self.cv_alphas_ = alphas
1306 # Finally, fit the model with the selected alpha
1307 logger.log("Final optimization", verbose=self.verbose)
1308 self.covariances_ = emp_covs
1309 self.precisions_ = _group_sparse_covariance(
1310 emp_covs,
1311 n_samples,
1312 self.alpha_,
1313 tol=self.tol,
1314 max_iter=self.max_iter,
1315 verbose=max(0, self.verbose - 1),
1316 debug=self.debug,
1317 )
1318 return self
1320 def __sklearn_is_fitted__(self):
1321 return hasattr(self, "precisions_") and hasattr(self, "covariances_")