Coverage for nilearn/reporting/get_clusters_table.py: 12%

128 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-18 13:00 +0200

1"""Implement plotting functions useful to report analysis results.""" 

2 

3import warnings 

4from collections import OrderedDict 

5from decimal import Decimal 

6from string import ascii_lowercase 

7 

8import numpy as np 

9import pandas as pd 

10from nibabel import affines 

11from scipy.ndimage import ( 

12 center_of_mass, 

13 generate_binary_structure, 

14 label, 

15 maximum_filter, 

16 minimum_filter, 

17) 

18 

19from nilearn._utils import check_niimg_3d 

20from nilearn._utils.logger import find_stack_level 

21from nilearn._utils.niimg import safe_get_data 

22from nilearn.image import new_img_like, threshold_img 

23from nilearn.image.resampling import coord_transform 

24 

25 

26def _local_max(data, affine, min_distance): 

27 """Find all local maxima of the array, separated by at least min_distance. 

28 

29 Adapted from https://stackoverflow.com/a/22631583/2589328 

30 

31 Parameters 

32 ---------- 

33 data : array_like 

34 3D array of with masked values for cluster. 

35 

36 affine : np.ndarray 

37 Square matrix specifying the position of the image array data 

38 in a reference space. 

39 

40 min_distance : int 

41 Minimum distance between local maxima in ``data``, in terms of mm. 

42 

43 Returns 

44 ------- 

45 ijk : `numpy.ndarray` 

46 (n_foci, 3) array of local maxima indices for cluster. 

47 

48 vals : `numpy.ndarray` 

49 (n_foci,) array of values from data at ijk. 

50 

51 """ 

52 ijk, vals = _identify_subpeaks(data) 

53 xyz, ijk, vals = _sort_subpeaks(ijk, vals, affine) 

54 ijk, vals = _pare_subpeaks(xyz, ijk, vals, min_distance) 

55 return ijk, vals 

56 

57 

58def _identify_subpeaks(data): 

59 """Identify cluster peak and subpeaks based on minimum distance. 

60 

61 Parameters 

62 ---------- 

63 data : `numpy.ndarray` 

64 3D array of with masked values for cluster. 

65 

66 Returns 

67 ------- 

68 ijk : `numpy.ndarray` 

69 (n_foci, 3) array of local maximum indices for cluster. 

70 vals : `numpy.ndarray` 

71 (n_foci,) array of values from data at ijk. 

72 

73 Notes 

74 ----- 

75 When a cluster's local maximum corresponds to contiguous voxels with the 

76 same values (as in a binary cluster), this function determines the center 

77 of mass for those voxels. If the center of mass falls outside the cluster, 

78 we instead report the nearest cluster voxel. 

79 """ 

80 data_max = maximum_filter(data, 3) 

81 maxima = data == data_max 

82 zero_mask = data == 0 

83 maxima[zero_mask] = 0 

84 

85 # Don't treat constant patches as maxima unless the entire cluster is 

86 # constant (as in a binary cluster). 

87 is_constant = np.isclose(data[~zero_mask].max(), data[~zero_mask].min()) 

88 if not is_constant: 

89 data_min = minimum_filter(data, 3) 

90 diff = (data_max - data_min) > 0 

91 maxima[diff == 0] = 0 

92 

93 labeled, n_subpeaks = label(maxima) 

94 labels_index = np.arange(1, n_subpeaks + 1) 

95 ijk = np.array(center_of_mass(data, labeled, labels_index)) 

96 ijk = np.round(ijk).astype(int) 

97 # Determine if all subpeaks are within the cluster 

98 # They may not be if the cluster is binary and has a shape where the COM is 

99 # outside the cluster, like a donut. 

100 subpeaks_outside_cluster = ( 

101 labeled[ijk[:, 0], ijk[:, 1], ijk[:, 2]] != labels_index 

102 ) 

103 if np.any(subpeaks_outside_cluster): 

104 warnings.warn( 

105 ( 

106 "Attention: At least one of the (sub)peaks " 

107 "falls outside of the cluster body. " 

108 "Identifying the nearest in-cluster voxel." 

109 ), 

110 stacklevel=find_stack_level(), 

111 ) 

112 # Replace centers of mass with their nearest neighbor points in the 

113 # corresponding clusters. Note this is also equivalent to computing the 

114 # centers of mass constrained to points within the cluster. 

115 ijk[subpeaks_outside_cluster] = _cluster_nearest_neighbor( 

116 ijk[subpeaks_outside_cluster], 

117 labels_index[subpeaks_outside_cluster], 

118 labeled, 

119 ) 

120 vals = data[ijk[:, 0], ijk[:, 1], ijk[:, 2]] 

121 return ijk, vals 

122 

123 

124def _cluster_nearest_neighbor(ijk, labels_index, labeled): 

125 """Find the nearest neighbor for given points in the corresponding cluster. 

126 

127 Parameters 

128 ---------- 

129 ijk : :obj:`numpy.ndarray` 

130 (n_pts, 3) array of query points. 

131 labels_index : :obj:`numpy.ndarray` 

132 (n_pts,) array of corresponding cluster indices. 

133 labeled : :obj:`numpy.ndarray` 

134 3D array with voxels labeled according to cluster index. 

135 

136 Returns 

137 ------- 

138 nbrs : :obj:`numpy.ndarray` 

139 (n_pts, 3) nearest neighbor points. 

140 """ 

141 labels = labeled[labeled > 0] 

142 clusters_ijk = np.array(labeled.nonzero()).T 

143 nbrs = np.zeros_like(ijk) 

144 for ii, (lab, point) in enumerate(zip(labels_index, ijk)): 

145 lab_ijk = clusters_ijk[labels == lab] 

146 dist = np.linalg.norm(lab_ijk - point, axis=1) 

147 nbrs[ii] = lab_ijk[np.argmin(dist)] 

148 return nbrs 

149 

150 

151def _sort_subpeaks(ijk, vals, affine): 

152 """Sort subpeaks in cluster in descending order of stat value. 

153 

154 Parameters 

155 ---------- 

156 ijk : 2D numpy.ndarray 

157 The matrix indices of subpeaks to sort. 

158 vals : 1D numpy.ndarray 

159 The statistical value associated with each subpeak in ``ijk``. 

160 affine : (4x4) numpy.ndarray 

161 The affine of the img from which the subpeaks were extracted. 

162 Used to convert IJK indices to XYZ coordinates. 

163 

164 Returns 

165 ------- 

166 xyz : 2D numpy.ndarray 

167 The sorted coordinates of the subpeaks. 

168 ijk : 2D numpy.ndarray 

169 The sorted matrix indices of subpeaks. 

170 vals : 1D numpy.ndarray 

171 The sorted statistical value associated with each subpeak in ``ijk``. 

172 """ 

173 order = (-vals).argsort() 

174 vals = vals[order] 

175 ijk = ijk[order, :] 

176 xyz = affines.apply_affine(affine, ijk) # Convert to xyz in mm 

177 return xyz, ijk, vals 

178 

179 

180def _pare_subpeaks(xyz, ijk, vals, min_distance): 

181 """Reduce list of subpeaks based on distance. 

182 

183 Parameters 

184 ---------- 

185 xyz : 2D numpy.ndarray 

186 Subpeak coordinates to reduce. Rows correspond to peaks, columns 

187 correspond to x, y, and z dimensions. 

188 ijk : 2D numpy.ndarray 

189 The subpeak coordinates in ``xyz``, but converted to matrix indices. 

190 vals : 1D numpy.ndarray 

191 The statistical value associated with each subpeak in ``xyz``/``ijk``. 

192 min_distance : float 

193 The minimum distance between subpeaks, in millimeters. 

194 

195 Returns 

196 ------- 

197 ijk : 2D numpy.ndarray 

198 The reduced index of subpeaks. 

199 vals : 1D numpy.ndarray 

200 The statistical values associated with the reduced set of subpeaks. 

201 """ 

202 keep_idx = np.ones(xyz.shape[0]).astype(bool) 

203 for i in range(xyz.shape[0]): 

204 for j in range(i + 1, xyz.shape[0]): 

205 if keep_idx[i] == 1: 

206 dist = np.linalg.norm(xyz[i, :] - xyz[j, :]) 

207 keep_idx[j] = dist > min_distance 

208 ijk = ijk[keep_idx, :] 

209 vals = vals[keep_idx] 

210 return ijk, vals 

211 

212 

213def get_clusters_table( 

214 stat_img, 

215 stat_threshold, 

216 cluster_threshold=None, 

217 two_sided=False, 

218 min_distance=8.0, 

219 return_label_maps=False, 

220): 

221 """Create pandas dataframe with img cluster statistics. 

222 

223 This function should work on any statistical maps where more extreme values 

224 indicate greater statistical significance. 

225 For example, z-statistic or -log10(p) maps are valid inputs, but a p-value 

226 map is not. 

227 

228 .. important:: 

229 

230 For binary clusters (clusters comprised of only one value), 

231 the table reports the center of mass of the cluster, 

232 rather than any peaks/subpeaks. 

233 

234 This center of mass may, in some cases, appear outside of the cluster. 

235 

236 .. versionchanged:: 0.9.2 

237 In this case, the cluster voxel nearest to the center of mass is 

238 reported. 

239 

240 .. seealso:: 

241 

242 This function does not report any named anatomical location 

243 for the clusters. 

244 To get the names of the location of the clusters 

245 according to one or several atlases, 

246 we recommend using 

247 the `atlasreader package <https://github.com/miykael/atlasreader>`_. 

248 

249 

250 Parameters 

251 ---------- 

252 stat_img : Niimg-like object 

253 Statistical image to threshold and summarize. 

254 

255 stat_threshold : :obj:`float` 

256 Cluster forming threshold. This value must be in the same scale as 

257 ``stat_img``. 

258 

259 cluster_threshold : :obj:`int` or None, default=None 

260 Cluster size threshold, in :term:`voxels<voxel>`. 

261 If None, then no cluster size threshold will be applied. 

262 

263 two_sided : :obj:`bool`, default=False 

264 Whether to employ two-sided thresholding or to evaluate positive values 

265 only. 

266 

267 min_distance : :obj:`float`, default=8.0 

268 Minimum distance between subpeaks, in millimeters. 

269 

270 .. note:: 

271 If two different clusters are closer than ``min_distance``, it can 

272 result in peaks closer than ``min_distance``. 

273 

274 return_label_maps : :obj:`bool`, default=False 

275 Whether or not to additionally output cluster label map images. 

276 

277 .. versionadded:: 0.10.1 

278 

279 Returns 

280 ------- 

281 result_table : :obj:`pandas.DataFrame` 

282 Table with peaks and subpeaks from thresholded ``stat_img``. 

283 The columns in this table include: 

284 

285 ================== ==================================================== 

286 Cluster ID The cluster number. Subpeaks have letters after the 

287 number. 

288 X/Y/Z The coordinate for the peak, in millimeters. 

289 Peak Stat The statistical value associated with the peak. 

290 The statistic type is dependent on the type of the 

291 statistical image. 

292 Cluster Size (mm3) The size of the cluster, in millimeters cubed. 

293 Rows corresponding to subpeaks will not have a value 

294 in this column. 

295 ================== ==================================================== 

296 

297 label_maps : :obj:`list` 

298 Returned if return_label_maps=True 

299 List of Niimg-like objects of cluster label maps. 

300 If two_sided==True, first and second maps correspond 

301 to positive and negative tails. 

302 

303 .. versionadded:: 0.10.1 

304 

305 """ 

306 cols = ["Cluster ID", "X", "Y", "Z", "Peak Stat", "Cluster Size (mm3)"] 

307 # Replace None with 0 

308 cluster_threshold = 0 if cluster_threshold is None else cluster_threshold 

309 

310 # check that stat_img is niimg-like object and 3D 

311 stat_img = check_niimg_3d(stat_img) 

312 affine = stat_img.affine 

313 shape = stat_img.shape 

314 

315 # Apply threshold(s) to image 

316 stat_img = threshold_img( 

317 img=stat_img, 

318 threshold=stat_threshold, 

319 cluster_threshold=cluster_threshold, 

320 two_sided=two_sided, 

321 mask_img=None, 

322 copy=True, 

323 copy_header=True, 

324 ) 

325 

326 # If cluster threshold is used, there is chance that stat_map will be 

327 # modified, therefore copy is needed 

328 stat_map = safe_get_data( 

329 stat_img, 

330 ensure_finite=True, 

331 copy_data=(cluster_threshold != 0), 

332 ) 

333 

334 # Define array for 6-connectivity, aka NN1 or "faces" 

335 bin_struct = generate_binary_structure(rank=3, connectivity=1) 

336 

337 voxel_size = np.prod(stat_img.header.get_zooms()) 

338 

339 signs = [1, -1] if two_sided else [1] 

340 no_clusters_found = True 

341 rows = [] 

342 label_maps = [] 

343 for sign in signs: 

344 # Flip map if necessary 

345 temp_stat_map = stat_map * sign 

346 

347 # Binarize using cluster-defining threshold 

348 binarized = temp_stat_map > stat_threshold 

349 binarized = binarized.astype(int) 

350 

351 # If the stat threshold is too high simply return an empty dataframe 

352 if np.sum(binarized) == 0: 

353 warnings.warn( 

354 "Attention: No clusters " 

355 f"with stat {'higher' if sign == 1 else 'lower'} " 

356 f"than {stat_threshold * sign}", 

357 category=UserWarning, 

358 stacklevel=find_stack_level(), 

359 ) 

360 continue 

361 

362 # Now re-label and create table 

363 label_map = label(binarized, bin_struct)[0] 

364 clust_ids = sorted(np.unique(label_map)[1:]) 

365 peak_vals = np.array( 

366 [np.max(temp_stat_map * (label_map == c)) for c in clust_ids] 

367 ) 

368 # Sort by descending max value 

369 clust_ids = [clust_ids[c] for c in (-peak_vals).argsort()] 

370 

371 if return_label_maps: 

372 # Relabel label_map based on sorted ids 

373 relabel_idx = np.insert(clust_ids, 0, 0).argsort().astype(np.int32) 

374 relabel_map = relabel_idx[label_map.flatten()].reshape(shape) 

375 # Save label maps as nifti objects 

376 label_maps.append( 

377 new_img_like(stat_img, relabel_map, affine=affine) 

378 ) 

379 

380 for c_id, c_val in enumerate(clust_ids): 

381 cluster_mask = label_map == c_val 

382 masked_data = temp_stat_map * cluster_mask 

383 

384 cluster_size_mm = int(np.sum(cluster_mask) * voxel_size) 

385 

386 # Get peaks, subpeaks and associated statistics 

387 subpeak_ijk, subpeak_vals = _local_max( 

388 masked_data, 

389 stat_img.affine, 

390 min_distance=min_distance, 

391 ) 

392 subpeak_vals *= sign # flip signs if necessary 

393 subpeak_xyz = np.asarray( 

394 coord_transform( 

395 subpeak_ijk[:, 0], 

396 subpeak_ijk[:, 1], 

397 subpeak_ijk[:, 2], 

398 stat_img.affine, 

399 ) 

400 ).tolist() 

401 subpeak_xyz = np.array(subpeak_xyz).T 

402 

403 # Only report peak and, at most, top 3 subpeaks. 

404 n_subpeaks = np.min((len(subpeak_vals), 4)) 

405 for subpeak in range(n_subpeaks): 

406 if subpeak == 0: 

407 row = [ 

408 c_id + 1, 

409 subpeak_xyz[subpeak, 0], 

410 subpeak_xyz[subpeak, 1], 

411 subpeak_xyz[subpeak, 2], 

412 subpeak_vals[subpeak], 

413 cluster_size_mm, 

414 ] 

415 else: 

416 # Subpeak naming convention is cluster num+letter: 

417 # 1a, 1b, etc 

418 sp_id = f"{c_id + 1}{ascii_lowercase[subpeak - 1]}" 

419 row = [ 

420 sp_id, 

421 subpeak_xyz[subpeak, 0], 

422 subpeak_xyz[subpeak, 1], 

423 subpeak_xyz[subpeak, 2], 

424 subpeak_vals[subpeak], 

425 "", 

426 ] 

427 rows += [row] 

428 

429 # If we reach this point, there are clusters in this sign 

430 no_clusters_found = False 

431 

432 if no_clusters_found: 

433 result_table = pd.DataFrame(columns=cols) 

434 else: 

435 result_table = pd.DataFrame(columns=cols, data=rows) 

436 

437 return (result_table, label_maps) if return_label_maps else result_table 

438 

439 

440def clustering_params_to_dataframe( 

441 threshold, 

442 cluster_threshold, 

443 min_distance, 

444 height_control, 

445 alpha, 

446 is_volume_glm, 

447): 

448 """Create a Pandas DataFrame from the supplied arguments. 

449 

450 For use as part of the Cluster Table. 

451 

452 Parameters 

453 ---------- 

454 threshold : float 

455 Cluster forming threshold in same scale as `stat_img` (either a 

456 p-value or z-scale value). 

457 

458 cluster_threshold : int or None 

459 Cluster size threshold, in voxels. 

460 

461 min_distance : float 

462 For display purposes only. 

463 Minimum distance between subpeaks in mm. 

464 

465 height_control : string or None 

466 False positive control meaning of cluster forming 

467 threshold: 'fpr' (default) or 'fdr' or 'bonferroni' or None 

468 

469 alpha : float 

470 Number controlling the thresholding (either a p-value or q-value). 

471 Its actual meaning depends on the height_control parameter. 

472 This function translates alpha to a z-scale threshold. 

473 

474 is_volume_glm: bool 

475 True if we are dealing with volume data. 

476 

477 Returns 

478 ------- 

479 table_details : Pandas.DataFrame 

480 Dataframe with clustering parameters. 

481 

482 """ 

483 table_details = OrderedDict() 

484 threshold = np.around(threshold, 3) 

485 

486 if height_control: 

487 table_details.update({"Height control": height_control}) 

488 # HTMLDocument.get_iframe() invoked in Python2 Jupyter Notebooks 

489 # mishandles certain unicode characters 

490 # & raises error due to greek alpha symbol. 

491 # This is simpler than overloading the class using inheritance, 

492 # especially given limited Python2 use at time of release. 

493 if alpha < 0.001: 

494 alpha = f"{Decimal(alpha):.2E}" 

495 table_details.update({"\u03b1": alpha}) 

496 table_details.update({"Threshold (computed)": threshold}) 

497 else: 

498 table_details.update({"Height control": "None"}) 

499 table_details.update({"Threshold Z": threshold}) 

500 

501 if is_volume_glm: 

502 table_details.update( 

503 {"Cluster size threshold (voxels)": cluster_threshold} 

504 ) 

505 table_details.update({"Minimum distance (mm)": min_distance}) 

506 

507 table_details = pd.DataFrame.from_dict( 

508 table_details, 

509 orient="index", 

510 ) 

511 

512 return table_details