Coverage for nilearn/glm/first_level/experimental_paradigm.py: 20%

54 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-16 12:32 +0200

1"""An experimental protocol is handled as a pandas DataFrame \ 

2that includes an 'onset' field. 

3 

4This yields the onset time of the events in the experimental paradigm. 

5It can also contain: 

6 

7 * a 'trial_type' field that yields the condition identifier. 

8 * a 'duration' field that yields event duration (for so-called block 

9 paradigms). 

10 * a 'modulation' field that associated a scalar value to each event. 

11""" 

12 

13import warnings 

14 

15import pandas as pd 

16from pandas.api.types import is_numeric_dtype 

17 

18from nilearn._utils import logger 

19from nilearn._utils.logger import find_stack_level 

20 

21 

22def check_events(events): 

23 """Test that the events data describes a valid experimental paradigm. 

24 

25 It is valid if the events data has ``'onset'`` and ``'duration'`` keys 

26 with numeric non NaN values. 

27 

28 Parameters 

29 ---------- 

30 events : pandas DataFrame 

31 Events data that describes a functional experimental paradigm. 

32 

33 Returns 

34 ------- 

35 events : pandas DataFrame 

36 Events data that describes a functional experimental paradigm. 

37 

38 The dataframe has the following columns: 

39 

40 trial_type : array of shape (n_events,), dtype='s' 

41 Per-event experimental conditions identifier. 

42 Defaults to np.repeat('dummy', len(onsets)). 

43 

44 onset : array of shape (n_events,), dtype='f' 

45 Per-event onset time (in seconds) 

46 

47 duration : array of shape (n_events,), dtype='f' 

48 Per-event duration, (in seconds) 

49 defaults to zeros(n_events) when no duration is provided 

50 

51 modulation : array of shape (n_events,), dtype='f' 

52 Per-event modulation, (in seconds) 

53 defaults to ones(n_events) when no duration is provided. 

54 

55 Raises 

56 ------ 

57 TypeError 

58 If the events data is not a pandas DataFrame. 

59 

60 ValueError 

61 If the events data has: 

62 

63 - no ``'onset'`` or ``'duration'`` column, 

64 - has non numeric values 

65 in the ``'onset'`` or ``'duration'`` columns 

66 - has nan values in the ``'onset'`` or ``'duration'`` columns. 

67 

68 Warns 

69 ----- 

70 UserWarning 

71 If the events data: 

72 

73 - has no ``'trial_type'`` column, 

74 - has any event with a duration equal to 0, 

75 - contains columns other than ``'onset'``, ``'duration'``, 

76 ``'trial_type'`` or ``'modulation'``, 

77 - contains duplicated events, meaning event with same: 

78 

79 - ``'trial_type'`` 

80 - ``'onset'`` 

81 - ``'duration'`` 

82 

83 """ 

84 # Check that events is a Pandas DataFrame 

85 if not isinstance(events, pd.DataFrame): 

86 raise TypeError( 

87 "Events should be a Pandas DataFrame. " 

88 f"A {type(events)} was provided instead." 

89 ) 

90 

91 events = _check_columns(events) 

92 

93 events_copy = events.copy() 

94 

95 events_copy = _handle_missing_trial_types(events_copy) 

96 

97 _check_null_duration(events_copy) 

98 

99 _check_unexpected_columns(events_copy) 

100 

101 return _handle_modulation(events_copy) 

102 

103 

104def _check_columns(events): 

105 """Check events has onset and duration numeric columns with no NaN.""" 

106 for col_name in ["onset", "duration"]: 

107 if col_name not in events.columns: 

108 raise ValueError( 

109 f"The provided events data has no {col_name} column." 

110 ) 

111 if events[col_name].isna().any(): 

112 raise ValueError( 

113 f"The following column must not contain nan values: {col_name}" 

114 ) 

115 # Make sure we have a numeric type for duration 

116 if not is_numeric_dtype(events[col_name]): 

117 try: 

118 events = events.astype({col_name: float}) 

119 except ValueError as e: 

120 raise ValueError( 

121 f"Could not cast {col_name} to float in events data." 

122 ) from e 

123 return events 

124 

125 

126def _handle_missing_trial_types(events): 

127 """Create 'dummy' events trial_type if the column is not present.""" 

128 if "trial_type" not in events.columns: 

129 warnings.warn( 

130 "'trial_type' column not found in the given events data.", 

131 stacklevel=find_stack_level(), 

132 ) 

133 events["trial_type"] = "dummy" 

134 return events 

135 

136 

137def _check_null_duration(events): 

138 """Warn if there are events with null duration.""" 

139 conditions_with_null_duration = events["trial_type"][ 

140 events["duration"] == 0 

141 ].unique() 

142 if len(conditions_with_null_duration) > 0: 

143 ordered_list = [ 

144 f"- '{x}'\n" for x in sorted(conditions_with_null_duration) 

145 ] 

146 ordered_list = "".join(ordered_list) 

147 warnings.warn( 

148 ( 

149 "The following conditions contain events with null duration:\n" 

150 f"{ordered_list}" 

151 ), 

152 stacklevel=find_stack_level(), 

153 ) 

154 

155 

156def _handle_modulation(events): 

157 """Set the modulation column to 1 if it is not present.""" 

158 if "modulation" in events.columns: 

159 logger.log( 

160 "A 'modulation' column was found in " 

161 "the given events data and is used.", 

162 ) 

163 else: 

164 events["modulation"] = 1 

165 return events 

166 

167 

168VALID_FIELDS = {"onset", "duration", "trial_type", "modulation"} 

169 

170 

171def _check_unexpected_columns(events): 

172 """Warn for each unexpected column that will not be used afterwards.""" 

173 unexpected_columns = list(set(events.columns).difference(VALID_FIELDS)) 

174 if unexpected_columns: 

175 warnings.warn( 

176 "The following unexpected columns " 

177 "in events data will be ignored: " 

178 f"{', '.join(unexpected_columns)}", 

179 stacklevel=find_stack_level(), 

180 ) 

181 

182 

183# Two events are duplicates if they have the same: 

184# - trial type 

185# - onset 

186# - duration 

187COLUMN_DEFINING_EVENT_IDENTITY = ["trial_type", "onset", "duration"] 

188 

189# Duplicate handling strategy 

190# Sum the modulation values of duplicate events 

191STRATEGY = {"modulation": "sum"} 

192 

193 

194def handle_modulation_of_duplicate_events(events): 

195 """Deal with modulation of duplicate events if they have one. 

196 

197 Currently the strategy is to sum the modulation values of duplicate events. 

198 """ 

199 cleaned_events = ( 

200 events.groupby(COLUMN_DEFINING_EVENT_IDENTITY, sort=False) 

201 .agg(STRATEGY) 

202 .reset_index() 

203 ) 

204 

205 # If there are duplicates, give a warning 

206 if len(cleaned_events) != len(events): 

207 warnings.warn( 

208 "Duplicated events were detected. " 

209 "Amplitudes of these events will be summed. " 

210 "You might want to verify your inputs.", 

211 stacklevel=find_stack_level(), 

212 ) 

213 

214 return cleaned_events